The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/zfs/vdev_raidz_math_aarch64_neon_common.h

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 /*
   22  * Copyright (C) 2016 Romain Dolbeau. All rights reserved.
   23  */
   24 
   25 #include <sys/types.h>
   26 #include <sys/simd.h>
   27 
   28 #ifdef __linux__
   29 #define __asm __asm__ __volatile__
   30 #endif
   31 
   32 #define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
   33 #define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
   34 
   35 #define VR0_(REG, ...) "%[w"#REG"]"
   36 #define VR1_(_1, REG, ...) "%[w"#REG"]"
   37 #define VR2_(_1, _2, REG, ...) "%[w"#REG"]"
   38 #define VR3_(_1, _2, _3, REG, ...) "%[w"#REG"]"
   39 #define VR4_(_1, _2, _3, _4, REG, ...) "%[w"#REG"]"
   40 #define VR5_(_1, _2, _3, _4, _5, REG, ...) "%[w"#REG"]"
   41 #define VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "%[w"#REG"]"
   42 #define VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "%[w"#REG"]"
   43 
   44 /*
   45  * Here we need registers not used otherwise.
   46  * They will be used in unused ASM for the case
   47  * with more registers than required... but GCC
   48  * will still need to make sure the constraints
   49  * are correct, and duplicate constraints are illegal
   50  * ... and we use the "register" number as a name
   51  */
   52 
   53 #define VR0(r...) VR0_(r)
   54 #define VR1(r...) VR1_(r)
   55 #define VR2(r...) VR2_(r, 36)
   56 #define VR3(r...) VR3_(r, 36, 35)
   57 #define VR4(r...) VR4_(r, 36, 35, 34, 33)
   58 #define VR5(r...) VR5_(r, 36, 35, 34, 33, 32)
   59 #define VR6(r...) VR6_(r, 36, 35, 34, 33, 32, 31)
   60 #define VR7(r...) VR7_(r, 36, 35, 34, 33, 32, 31, 30)
   61 
   62 #define VR(X) "%[w"#X"]"
   63 
   64 #define RVR0_(REG, ...) [w##REG] "w" (w##REG)
   65 #define RVR1_(_1, REG, ...) [w##REG] "w" (w##REG)
   66 #define RVR2_(_1, _2, REG, ...) [w##REG] "w" (w##REG)
   67 #define RVR3_(_1, _2, _3, REG, ...) [w##REG] "w" (w##REG)
   68 #define RVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "w" (w##REG)
   69 #define RVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "w" (w##REG)
   70 #define RVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "w" (w##REG)
   71 #define RVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "w" (w##REG)
   72 
   73 #define RVR0(r...) RVR0_(r)
   74 #define RVR1(r...) RVR1_(r)
   75 #define RVR2(r...) RVR2_(r, 36)
   76 #define RVR3(r...) RVR3_(r, 36, 35)
   77 #define RVR4(r...) RVR4_(r, 36, 35, 34, 33)
   78 #define RVR5(r...) RVR5_(r, 36, 35, 34, 33, 32)
   79 #define RVR6(r...) RVR6_(r, 36, 35, 34, 33, 32, 31)
   80 #define RVR7(r...) RVR7_(r, 36, 35, 34, 33, 32, 31, 30)
   81 
   82 #define RVR(X) [w##X] "w" (w##X)
   83 
   84 #define WVR0_(REG, ...) [w##REG] "=w" (w##REG)
   85 #define WVR1_(_1, REG, ...) [w##REG] "=w" (w##REG)
   86 #define WVR2_(_1, _2, REG, ...) [w##REG] "=w" (w##REG)
   87 #define WVR3_(_1, _2, _3, REG, ...) [w##REG] "=w" (w##REG)
   88 #define WVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "=w" (w##REG)
   89 #define WVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "=w" (w##REG)
   90 #define WVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "=w" (w##REG)
   91 #define WVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "=w" (w##REG)
   92 
   93 #define WVR0(r...) WVR0_(r)
   94 #define WVR1(r...) WVR1_(r)
   95 #define WVR2(r...) WVR2_(r, 36)
   96 #define WVR3(r...) WVR3_(r, 36, 35)
   97 #define WVR4(r...) WVR4_(r, 36, 35, 34, 33)
   98 #define WVR5(r...) WVR5_(r, 36, 35, 34, 33, 32)
   99 #define WVR6(r...) WVR6_(r, 36, 35, 34, 33, 32, 31)
  100 #define WVR7(r...) WVR7_(r, 36, 35, 34, 33, 32, 31, 30)
  101 
  102 #define WVR(X) [w##X] "=w" (w##X)
  103 
  104 #define UVR0_(REG, ...) [w##REG] "+&w" (w##REG)
  105 #define UVR1_(_1, REG, ...) [w##REG] "+&w" (w##REG)
  106 #define UVR2_(_1, _2, REG, ...) [w##REG] "+&w" (w##REG)
  107 #define UVR3_(_1, _2, _3, REG, ...) [w##REG] "+&w" (w##REG)
  108 #define UVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "+&w" (w##REG)
  109 #define UVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "+&w" (w##REG)
  110 #define UVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "+&w" (w##REG)
  111 #define UVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "+&w" (w##REG)
  112 
  113 #define UVR0(r...) UVR0_(r)
  114 #define UVR1(r...) UVR1_(r)
  115 #define UVR2(r...) UVR2_(r, 36)
  116 #define UVR3(r...) UVR3_(r, 36, 35)
  117 #define UVR4(r...) UVR4_(r, 36, 35, 34, 33)
  118 #define UVR5(r...) UVR5_(r, 36, 35, 34, 33, 32)
  119 #define UVR6(r...) UVR6_(r, 36, 35, 34, 33, 32, 31)
  120 #define UVR7(r...) UVR7_(r, 36, 35, 34, 33, 32, 31, 30)
  121 
  122 #define UVR(X) [w##X] "+&w" (w##X)
  123 
  124 #define R_01(REG1, REG2, ...) REG1, REG2
  125 #define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3
  126 #define R_23(REG...) _R_23(REG, 1, 2, 3)
  127 
  128 #define ZFS_ASM_BUG()   ASSERT(0)
  129 
  130 #define OFFSET(ptr, val)        (((unsigned char *)(ptr))+val)
  131 
  132 extern const uint8_t gf_clmul_mod_lt[4*256][16];
  133 
  134 #define ELEM_SIZE 16
  135 
  136 typedef struct v {
  137         uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
  138 } v_t;
  139 
  140 #define XOR_ACC(src, r...)                                              \
  141 {                                                                       \
  142         switch (REG_CNT(r)) {                                           \
  143         case 8:                                                         \
  144                 __asm(                                                  \
  145                 "ld1 { v21.4s },%[SRC0]\n"                              \
  146                 "ld1 { v20.4s },%[SRC1]\n"                              \
  147                 "ld1 { v19.4s },%[SRC2]\n"                              \
  148                 "ld1 { v18.4s },%[SRC3]\n"                              \
  149                 "eor " VR0(r) ".16b," VR0(r) ".16b,v21.16b\n"           \
  150                 "eor " VR1(r) ".16b," VR1(r) ".16b,v20.16b\n"           \
  151                 "eor " VR2(r) ".16b," VR2(r) ".16b,v19.16b\n"           \
  152                 "eor " VR3(r) ".16b," VR3(r) ".16b,v18.16b\n"           \
  153                 "ld1 { v21.4s },%[SRC4]\n"                              \
  154                 "ld1 { v20.4s },%[SRC5]\n"                              \
  155                 "ld1 { v19.4s },%[SRC6]\n"                              \
  156                 "ld1 { v18.4s },%[SRC7]\n"                              \
  157                 "eor " VR4(r) ".16b," VR4(r) ".16b,v21.16b\n"           \
  158                 "eor " VR5(r) ".16b," VR5(r) ".16b,v20.16b\n"           \
  159                 "eor " VR6(r) ".16b," VR6(r) ".16b,v19.16b\n"           \
  160                 "eor " VR7(r) ".16b," VR7(r) ".16b,v18.16b\n"           \
  161                 :       UVR0(r), UVR1(r), UVR2(r), UVR3(r),             \
  162                         UVR4(r), UVR5(r), UVR6(r), UVR7(r)              \
  163                 :       [SRC0] "Q" (*(OFFSET(src, 0))),                 \
  164                 [SRC1] "Q" (*(OFFSET(src, 16))),                        \
  165                 [SRC2] "Q" (*(OFFSET(src, 32))),                        \
  166                 [SRC3] "Q" (*(OFFSET(src, 48))),                        \
  167                 [SRC4] "Q" (*(OFFSET(src, 64))),                        \
  168                 [SRC5] "Q" (*(OFFSET(src, 80))),                        \
  169                 [SRC6] "Q" (*(OFFSET(src, 96))),                        \
  170                 [SRC7] "Q" (*(OFFSET(src, 112)))                        \
  171                 :       "v18", "v19", "v20", "v21");                    \
  172                 break;                                                  \
  173         case 4:                                                         \
  174                 __asm(                                                  \
  175                 "ld1 { v21.4s },%[SRC0]\n"                              \
  176                 "ld1 { v20.4s },%[SRC1]\n"                              \
  177                 "ld1 { v19.4s },%[SRC2]\n"                              \
  178                 "ld1 { v18.4s },%[SRC3]\n"                              \
  179                 "eor " VR0(r) ".16b," VR0(r) ".16b,v21.16b\n"           \
  180                 "eor " VR1(r) ".16b," VR1(r) ".16b,v20.16b\n"           \
  181                 "eor " VR2(r) ".16b," VR2(r) ".16b,v19.16b\n"           \
  182                 "eor " VR3(r) ".16b," VR3(r) ".16b,v18.16b\n"           \
  183                 :       UVR0(r), UVR1(r), UVR2(r), UVR3(r)              \
  184                 :       [SRC0] "Q" (*(OFFSET(src, 0))),                 \
  185                 [SRC1] "Q" (*(OFFSET(src, 16))),                        \
  186                 [SRC2] "Q" (*(OFFSET(src, 32))),                        \
  187                 [SRC3] "Q" (*(OFFSET(src, 48)))                         \
  188                 :       "v18", "v19", "v20", "v21");                    \
  189                 break;                                                  \
  190         case 2:                                                         \
  191                 __asm(                                                  \
  192                 "ld1 { v21.4s },%[SRC0]\n"                              \
  193                 "ld1 { v20.4s },%[SRC1]\n"                              \
  194                 "eor " VR0(r) ".16b," VR0(r) ".16b,v21.16b\n"           \
  195                 "eor " VR1(r) ".16b," VR1(r) ".16b,v20.16b\n"           \
  196                 :       UVR0(r), UVR1(r)                                \
  197                 :       [SRC0] "Q" (*(OFFSET(src, 0))),                 \
  198                 [SRC1] "Q" (*(OFFSET(src, 16)))                         \
  199                 :       "v20", "v21");                                  \
  200                 break;                                                  \
  201         default:                                                        \
  202                 ZFS_ASM_BUG();                                          \
  203         }                                                               \
  204 }
  205 
  206 #define XOR(r...)                                                       \
  207 {                                                                       \
  208         switch (REG_CNT(r)) {                                           \
  209         case 8:                                                         \
  210                 __asm(                                                  \
  211                 "eor " VR4(r) ".16b," VR4(r) ".16b," VR0(r) ".16b\n"    \
  212                 "eor " VR5(r) ".16b," VR5(r) ".16b," VR1(r) ".16b\n"    \
  213                 "eor " VR6(r) ".16b," VR6(r) ".16b," VR2(r) ".16b\n"    \
  214                 "eor " VR7(r) ".16b," VR7(r) ".16b," VR3(r) ".16b\n"    \
  215                 :       UVR4(r), UVR5(r), UVR6(r), UVR7(r)              \
  216                 :       RVR0(r), RVR1(r), RVR2(r), RVR3(r));            \
  217                 break;                                                  \
  218         case 4:                                                         \
  219                 __asm(                                                  \
  220                 "eor " VR2(r) ".16b," VR2(r) ".16b," VR0(r) ".16b\n"    \
  221                 "eor " VR3(r) ".16b," VR3(r) ".16b," VR1(r) ".16b\n"    \
  222                 :       UVR2(r), UVR3(r)                                \
  223                 :       RVR0(r), RVR1(r));                              \
  224                 break;                                                  \
  225         default:                                                        \
  226                 ZFS_ASM_BUG();                                          \
  227         }                                                               \
  228 }
  229 
  230 #define ZERO(r...)                                                      \
  231 {                                                                       \
  232         switch (REG_CNT(r)) {                                           \
  233         case 8:                                                         \
  234                 __asm(                                                  \
  235                 "eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n"    \
  236                 "eor " VR1(r) ".16b," VR1(r) ".16b," VR1(r) ".16b\n"    \
  237                 "eor " VR2(r) ".16b," VR2(r) ".16b," VR2(r) ".16b\n"    \
  238                 "eor " VR3(r) ".16b," VR3(r) ".16b," VR3(r) ".16b\n"    \
  239                 "eor " VR4(r) ".16b," VR4(r) ".16b," VR4(r) ".16b\n"    \
  240                 "eor " VR5(r) ".16b," VR5(r) ".16b," VR5(r) ".16b\n"    \
  241                 "eor " VR6(r) ".16b," VR6(r) ".16b," VR6(r) ".16b\n"    \
  242                 "eor " VR7(r) ".16b," VR7(r) ".16b," VR7(r) ".16b\n"    \
  243                 :       WVR0(r), WVR1(r), WVR2(r), WVR3(r),             \
  244                         WVR4(r), WVR5(r), WVR6(r), WVR7(r));            \
  245                 break;                                                  \
  246         case 4:                                                         \
  247                 __asm(                                                  \
  248                 "eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n"    \
  249                 "eor " VR1(r) ".16b," VR1(r) ".16b," VR1(r) ".16b\n"    \
  250                 "eor " VR2(r) ".16b," VR2(r) ".16b," VR2(r) ".16b\n"    \
  251                 "eor " VR3(r) ".16b," VR3(r) ".16b," VR3(r) ".16b\n"    \
  252                 :       WVR0(r), WVR1(r), WVR2(r), WVR3(r));            \
  253                 break;                                                  \
  254         case 2:                                                         \
  255                 __asm(                                                  \
  256                 "eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n"    \
  257                 "eor " VR1(r) ".16b," VR1(r) ".16b," VR1(r) ".16b\n"    \
  258                 :       WVR0(r), WVR1(r));                              \
  259                 break;                                                  \
  260         default:                                                        \
  261                 ZFS_ASM_BUG();                                          \
  262         }                                                               \
  263 }
  264 
  265 #define COPY(r...)                                                      \
  266 {                                                                       \
  267         switch (REG_CNT(r)) {                                           \
  268         case 8:                                                         \
  269                 __asm(                                                  \
  270                 "mov " VR4(r) ".16b," VR0(r) ".16b\n"                   \
  271                 "mov " VR5(r) ".16b," VR1(r) ".16b\n"                   \
  272                 "mov " VR6(r) ".16b," VR2(r) ".16b\n"                   \
  273                 "mov " VR7(r) ".16b," VR3(r) ".16b\n"                   \
  274                 :       WVR4(r), WVR5(r), WVR6(r), WVR7(r)              \
  275                 :       RVR0(r), RVR1(r), RVR2(r), RVR3(r));            \
  276                 break;                                                  \
  277         case 4:                                                         \
  278                 __asm(                                                  \
  279                 "mov " VR2(r) ".16b," VR0(r) ".16b\n"                   \
  280                 "mov " VR3(r) ".16b," VR1(r) ".16b\n"                   \
  281                 :       WVR2(r), WVR3(r)                                \
  282                 :       RVR0(r), RVR1(r));                              \
  283                 break;                                                  \
  284         default:                                                        \
  285                 ZFS_ASM_BUG();                                          \
  286         }                                                               \
  287 }
  288 
  289 #define LOAD(src, r...)                                                 \
  290 {                                                                       \
  291         switch (REG_CNT(r)) {                                           \
  292         case 8:                                                         \
  293                 __asm(                                                  \
  294                 "ld1 { " VR0(r) ".4s },%[SRC0]\n"                       \
  295                 "ld1 { " VR1(r) ".4s },%[SRC1]\n"                       \
  296                 "ld1 { " VR2(r) ".4s },%[SRC2]\n"                       \
  297                 "ld1 { " VR3(r) ".4s },%[SRC3]\n"                       \
  298                 "ld1 { " VR4(r) ".4s },%[SRC4]\n"                       \
  299                 "ld1 { " VR5(r) ".4s },%[SRC5]\n"                       \
  300                 "ld1 { " VR6(r) ".4s },%[SRC6]\n"                       \
  301                 "ld1 { " VR7(r) ".4s },%[SRC7]\n"                       \
  302                 :       WVR0(r), WVR1(r), WVR2(r), WVR3(r),             \
  303                         WVR4(r), WVR5(r), WVR6(r), WVR7(r)              \
  304                 :       [SRC0] "Q" (*(OFFSET(src, 0))),                 \
  305                 [SRC1] "Q" (*(OFFSET(src, 16))),                        \
  306                 [SRC2] "Q" (*(OFFSET(src, 32))),                        \
  307                 [SRC3] "Q" (*(OFFSET(src, 48))),                        \
  308                 [SRC4] "Q" (*(OFFSET(src, 64))),                        \
  309                 [SRC5] "Q" (*(OFFSET(src, 80))),                        \
  310                 [SRC6] "Q" (*(OFFSET(src, 96))),                        \
  311                 [SRC7] "Q" (*(OFFSET(src, 112))));                      \
  312                 break;                                                  \
  313         case 4:                                                         \
  314                 __asm(                                                  \
  315                 "ld1 { " VR0(r) ".4s },%[SRC0]\n"                       \
  316                 "ld1 { " VR1(r) ".4s },%[SRC1]\n"                       \
  317                 "ld1 { " VR2(r) ".4s },%[SRC2]\n"                       \
  318                 "ld1 { " VR3(r) ".4s },%[SRC3]\n"                       \
  319                 :       WVR0(r), WVR1(r), WVR2(r), WVR3(r)              \
  320                 :       [SRC0] "Q" (*(OFFSET(src, 0))),                 \
  321                 [SRC1] "Q" (*(OFFSET(src, 16))),                        \
  322                 [SRC2] "Q" (*(OFFSET(src, 32))),                        \
  323                 [SRC3] "Q" (*(OFFSET(src, 48))));                       \
  324                 break;                                                  \
  325         case 2:                                                         \
  326                 __asm(                                                  \
  327                 "ld1 { " VR0(r) ".4s },%[SRC0]\n"                       \
  328                 "ld1 { " VR1(r) ".4s },%[SRC1]\n"                       \
  329                 :       WVR0(r), WVR1(r)                                \
  330                 :       [SRC0] "Q" (*(OFFSET(src, 0))),                 \
  331                 [SRC1] "Q" (*(OFFSET(src, 16))));                       \
  332                 break;                                                  \
  333         default:                                                        \
  334                 ZFS_ASM_BUG();                                          \
  335         }                                                               \
  336 }
  337 
  338 #define STORE(dst, r...)                                                \
  339 {                                                                       \
  340         switch (REG_CNT(r)) {                                           \
  341         case 8:                                                         \
  342                 __asm(                                                  \
  343                 "st1 { " VR0(r) ".4s },%[DST0]\n"                       \
  344                 "st1 { " VR1(r) ".4s },%[DST1]\n"                       \
  345                 "st1 { " VR2(r) ".4s },%[DST2]\n"                       \
  346                 "st1 { " VR3(r) ".4s },%[DST3]\n"                       \
  347                 "st1 { " VR4(r) ".4s },%[DST4]\n"                       \
  348                 "st1 { " VR5(r) ".4s },%[DST5]\n"                       \
  349                 "st1 { " VR6(r) ".4s },%[DST6]\n"                       \
  350                 "st1 { " VR7(r) ".4s },%[DST7]\n"                       \
  351                 :       [DST0] "=Q" (*(OFFSET(dst, 0))),                \
  352                 [DST1] "=Q" (*(OFFSET(dst, 16))),                       \
  353                 [DST2] "=Q" (*(OFFSET(dst, 32))),                       \
  354                 [DST3] "=Q" (*(OFFSET(dst, 48))),                       \
  355                 [DST4] "=Q" (*(OFFSET(dst, 64))),                       \
  356                 [DST5] "=Q" (*(OFFSET(dst, 80))),                       \
  357                 [DST6] "=Q" (*(OFFSET(dst, 96))),                       \
  358                 [DST7] "=Q" (*(OFFSET(dst, 112)))                       \
  359                 :       RVR0(r), RVR1(r), RVR2(r), RVR3(r),             \
  360                         RVR4(r), RVR5(r), RVR6(r), RVR7(r));            \
  361                 break;                                                  \
  362         case 4:                                                         \
  363                 __asm(                                                  \
  364                 "st1 { " VR0(r) ".4s },%[DST0]\n"                       \
  365                 "st1 { " VR1(r) ".4s },%[DST1]\n"                       \
  366                 "st1 { " VR2(r) ".4s },%[DST2]\n"                       \
  367                 "st1 { " VR3(r) ".4s },%[DST3]\n"                       \
  368                 :       [DST0] "=Q" (*(OFFSET(dst, 0))),                \
  369                 [DST1] "=Q" (*(OFFSET(dst, 16))),                       \
  370                 [DST2] "=Q" (*(OFFSET(dst, 32))),                       \
  371                 [DST3] "=Q" (*(OFFSET(dst, 48)))                        \
  372                 :       RVR0(r), RVR1(r), RVR2(r), RVR3(r));            \
  373                 break;                                                  \
  374         case 2:                                                         \
  375                 __asm(                                                  \
  376                 "st1 { " VR0(r) ".4s },%[DST0]\n"                       \
  377                 "st1 { " VR1(r) ".4s },%[DST1]\n"                       \
  378                 :       [DST0] "=Q" (*(OFFSET(dst, 0))),                \
  379                 [DST1] "=Q" (*(OFFSET(dst, 16)))                        \
  380                 :       RVR0(r), RVR1(r));                              \
  381                 break;                                                  \
  382         default:                                                        \
  383                 ZFS_ASM_BUG();                                          \
  384         }                                                               \
  385 }
  386 
  387 /*
  388  * Unfortunately cannot use the macro, because GCC
  389  * will try to use the macro name and not value
  390  * later on...
  391  * Kept as a reference to what a numbered variable is
  392  */
  393 #define _00     "v17"
  394 #define _1d     "v16"
  395 #define _temp0  "v19"
  396 #define _temp1  "v18"
  397 
  398 #define MUL2_SETUP()                                                    \
  399 {                                                                       \
  400         __asm(                                                          \
  401         "eor " VR(17) ".16b," VR(17) ".16b," VR(17) ".16b\n"            \
  402         "movi " VR(16) ".16b,#0x1d\n"                                   \
  403         :       WVR(16), WVR(17));                                      \
  404 }
  405 
  406 #define MUL2(r...)                                                      \
  407 {                                                                       \
  408         switch (REG_CNT(r)) {                                           \
  409         case 4:                                                         \
  410                 __asm(                                                  \
  411                 "cmgt v19.16b," VR(17) ".16b," VR0(r) ".16b\n"          \
  412                 "cmgt v18.16b," VR(17) ".16b," VR1(r) ".16b\n"          \
  413                 "cmgt v21.16b," VR(17) ".16b," VR2(r) ".16b\n"          \
  414                 "cmgt v20.16b," VR(17) ".16b," VR3(r) ".16b\n"          \
  415                 "and v19.16b,v19.16b," VR(16) ".16b\n"                  \
  416                 "and v18.16b,v18.16b," VR(16) ".16b\n"                  \
  417                 "and v21.16b,v21.16b," VR(16) ".16b\n"                  \
  418                 "and v20.16b,v20.16b," VR(16) ".16b\n"                  \
  419                 "shl " VR0(r) ".16b," VR0(r) ".16b,#1\n"                \
  420                 "shl " VR1(r) ".16b," VR1(r) ".16b,#1\n"                \
  421                 "shl " VR2(r) ".16b," VR2(r) ".16b,#1\n"                \
  422                 "shl " VR3(r) ".16b," VR3(r) ".16b,#1\n"                \
  423                 "eor " VR0(r) ".16b,v19.16b," VR0(r) ".16b\n"           \
  424                 "eor " VR1(r) ".16b,v18.16b," VR1(r) ".16b\n"           \
  425                 "eor " VR2(r) ".16b,v21.16b," VR2(r) ".16b\n"           \
  426                 "eor " VR3(r) ".16b,v20.16b," VR3(r) ".16b\n"           \
  427                 :       UVR0(r), UVR1(r), UVR2(r), UVR3(r)              \
  428                 :       RVR(17), RVR(16)                                \
  429                 :       "v18", "v19", "v20", "v21");                    \
  430                 break;                                                  \
  431         case 2:                                                         \
  432                 __asm(                                                  \
  433                 "cmgt v19.16b," VR(17) ".16b," VR0(r) ".16b\n"          \
  434                 "cmgt v18.16b," VR(17) ".16b," VR1(r) ".16b\n"          \
  435                 "and v19.16b,v19.16b," VR(16) ".16b\n"                  \
  436                 "and v18.16b,v18.16b," VR(16) ".16b\n"                  \
  437                 "shl " VR0(r) ".16b," VR0(r) ".16b,#1\n"                \
  438                 "shl " VR1(r) ".16b," VR1(r) ".16b,#1\n"                \
  439                 "eor " VR0(r) ".16b,v19.16b," VR0(r) ".16b\n"           \
  440                 "eor " VR1(r) ".16b,v18.16b," VR1(r) ".16b\n"           \
  441                 :       UVR0(r), UVR1(r)                                \
  442                 :       RVR(17), RVR(16)                                \
  443                 :       "v18", "v19");                                  \
  444                 break;                                                  \
  445         default:                                                        \
  446                 ZFS_ASM_BUG();                                          \
  447         }                                                               \
  448 }
  449 
  450 #define MUL4(r...)                                                      \
  451 {                                                                       \
  452         MUL2(r);                                                        \
  453         MUL2(r);                                                        \
  454 }
  455 
  456 /*
  457  * Unfortunately cannot use the macro, because GCC
  458  * will try to use the macro name and not value
  459  * later on...
  460  * Kept as a reference to what a register is
  461  * (here we're using actual registers for the
  462  * clobbered ones)
  463  */
  464 #define _0f             "v15"
  465 #define _a_save         "v14"
  466 #define _b_save         "v13"
  467 #define _lt_mod_a       "v12"
  468 #define _lt_clmul_a     "v11"
  469 #define _lt_mod_b       "v10"
  470 #define _lt_clmul_b     "v15"
  471 
  472 #define _MULx2(c, r...)                                                 \
  473 {                                                                       \
  474         switch (REG_CNT(r)) {                                           \
  475         case 2:                                                         \
  476                 __asm(                                                  \
  477                 /* lts for upper part */                                \
  478                 "movi v15.16b,#0x0f\n"                                  \
  479                 "ld1 { v10.4s },%[lt0]\n"                               \
  480                 "ld1 { v11.4s },%[lt1]\n"                               \
  481                 /* upper part */                                        \
  482                 "and v14.16b," VR0(r) ".16b,v15.16b\n"                  \
  483                 "and v13.16b," VR1(r) ".16b,v15.16b\n"                  \
  484                 "ushr " VR0(r) ".16b," VR0(r) ".16b,#4\n"               \
  485                 "ushr " VR1(r) ".16b," VR1(r) ".16b,#4\n"               \
  486                                                                         \
  487                 "tbl v12.16b,{v10.16b}," VR0(r) ".16b\n"                \
  488                 "tbl v10.16b,{v10.16b}," VR1(r) ".16b\n"                \
  489                 "tbl v15.16b,{v11.16b}," VR0(r) ".16b\n"                \
  490                 "tbl v11.16b,{v11.16b}," VR1(r) ".16b\n"                \
  491                                                                         \
  492                 "eor " VR0(r) ".16b,v15.16b,v12.16b\n"                  \
  493                 "eor " VR1(r) ".16b,v11.16b,v10.16b\n"                  \
  494                 /* lts for lower part */                                \
  495                 "ld1 { v10.4s },%[lt2]\n"                               \
  496                 "ld1 { v15.4s },%[lt3]\n"                               \
  497                 /* lower part */                                        \
  498                 "tbl v12.16b,{v10.16b},v14.16b\n"                       \
  499                 "tbl v10.16b,{v10.16b},v13.16b\n"                       \
  500                 "tbl v11.16b,{v15.16b},v14.16b\n"                       \
  501                 "tbl v15.16b,{v15.16b},v13.16b\n"                       \
  502                                                                         \
  503                 "eor " VR0(r) ".16b," VR0(r) ".16b,v12.16b\n"           \
  504                 "eor " VR1(r) ".16b," VR1(r) ".16b,v10.16b\n"           \
  505                 "eor " VR0(r) ".16b," VR0(r) ".16b,v11.16b\n"           \
  506                 "eor " VR1(r) ".16b," VR1(r) ".16b,v15.16b\n"           \
  507                 :       UVR0(r), UVR1(r)                                \
  508                 :       [lt0] "Q" ((gf_clmul_mod_lt[4*(c)+0][0])),      \
  509                 [lt1] "Q" ((gf_clmul_mod_lt[4*(c)+1][0])),              \
  510                 [lt2] "Q" ((gf_clmul_mod_lt[4*(c)+2][0])),              \
  511                 [lt3] "Q" ((gf_clmul_mod_lt[4*(c)+3][0]))               \
  512                 :       "v10", "v11", "v12", "v13", "v14", "v15");      \
  513                 break;                                                  \
  514         default:                                                        \
  515                 ZFS_ASM_BUG();                                          \
  516         }                                                               \
  517 }
  518 
  519 #define MUL(c, r...)                                                    \
  520 {                                                                       \
  521         switch (REG_CNT(r)) {                                           \
  522         case 4:                                                         \
  523                 _MULx2(c, R_23(r));                                     \
  524                 _MULx2(c, R_01(r));                                     \
  525                 break;                                                  \
  526         case 2:                                                         \
  527                 _MULx2(c, R_01(r));                                     \
  528                 break;                                                  \
  529         default:                                                        \
  530                 ZFS_ASM_BUG();                                          \
  531         }                                                               \
  532 }
  533 
  534 #define raidz_math_begin()      kfpu_begin()
  535 #define raidz_math_end()        kfpu_end()
  536 
  537 /* Overkill... */
  538 #if defined(_KERNEL)
  539 #define GEN_X_DEFINE_0_3()      \
  540 register unsigned char w0 asm("v0") __attribute__((vector_size(16)));   \
  541 register unsigned char w1 asm("v1") __attribute__((vector_size(16)));   \
  542 register unsigned char w2 asm("v2") __attribute__((vector_size(16)));   \
  543 register unsigned char w3 asm("v3") __attribute__((vector_size(16)));
  544 #define GEN_X_DEFINE_4_5()      \
  545 register unsigned char w4 asm("v4") __attribute__((vector_size(16)));   \
  546 register unsigned char w5 asm("v5") __attribute__((vector_size(16)));
  547 #define GEN_X_DEFINE_6_7()      \
  548 register unsigned char w6 asm("v6") __attribute__((vector_size(16)));   \
  549 register unsigned char w7 asm("v7") __attribute__((vector_size(16)));
  550 #define GEN_X_DEFINE_8_9()      \
  551 register unsigned char w8 asm("v8") __attribute__((vector_size(16)));   \
  552 register unsigned char w9 asm("v9") __attribute__((vector_size(16)));
  553 #define GEN_X_DEFINE_10_11()    \
  554 register unsigned char w10 asm("v10") __attribute__((vector_size(16))); \
  555 register unsigned char w11 asm("v11") __attribute__((vector_size(16)));
  556 #define GEN_X_DEFINE_12_15()    \
  557 register unsigned char w12 asm("v12") __attribute__((vector_size(16))); \
  558 register unsigned char w13 asm("v13") __attribute__((vector_size(16))); \
  559 register unsigned char w14 asm("v14") __attribute__((vector_size(16))); \
  560 register unsigned char w15 asm("v15") __attribute__((vector_size(16)));
  561 #define GEN_X_DEFINE_16()       \
  562 register unsigned char w16 asm("v16") __attribute__((vector_size(16)));
  563 #define GEN_X_DEFINE_17()       \
  564 register unsigned char w17 asm("v17") __attribute__((vector_size(16)));
  565 #define GEN_X_DEFINE_18_21()    \
  566 register unsigned char w18 asm("v18") __attribute__((vector_size(16))); \
  567 register unsigned char w19 asm("v19") __attribute__((vector_size(16))); \
  568 register unsigned char w20 asm("v20") __attribute__((vector_size(16))); \
  569 register unsigned char w21 asm("v21") __attribute__((vector_size(16)));
  570 #define GEN_X_DEFINE_22_23()    \
  571 register unsigned char w22 asm("v22") __attribute__((vector_size(16))); \
  572 register unsigned char w23 asm("v23") __attribute__((vector_size(16)));
  573 #define GEN_X_DEFINE_24_27()    \
  574 register unsigned char w24 asm("v24") __attribute__((vector_size(16))); \
  575 register unsigned char w25 asm("v25") __attribute__((vector_size(16))); \
  576 register unsigned char w26 asm("v26") __attribute__((vector_size(16))); \
  577 register unsigned char w27 asm("v27") __attribute__((vector_size(16)));
  578 #define GEN_X_DEFINE_28_30()    \
  579 register unsigned char w28 asm("v28") __attribute__((vector_size(16))); \
  580 register unsigned char w29 asm("v29") __attribute__((vector_size(16))); \
  581 register unsigned char w30 asm("v30") __attribute__((vector_size(16)));
  582 #define GEN_X_DEFINE_31()       \
  583 register unsigned char w31 asm("v31") __attribute__((vector_size(16)));
  584 #define GEN_X_DEFINE_32()       \
  585 register unsigned char w32 asm("v31") __attribute__((vector_size(16)));
  586 #define GEN_X_DEFINE_33_36()    \
  587 register unsigned char w33 asm("v31") __attribute__((vector_size(16))); \
  588 register unsigned char w34 asm("v31") __attribute__((vector_size(16))); \
  589 register unsigned char w35 asm("v31") __attribute__((vector_size(16))); \
  590 register unsigned char w36 asm("v31") __attribute__((vector_size(16)));
  591 #define GEN_X_DEFINE_37_38()    \
  592 register unsigned char w37 asm("v31") __attribute__((vector_size(16))); \
  593 register unsigned char w38 asm("v31") __attribute__((vector_size(16)));
  594 #define GEN_X_DEFINE_ALL()      \
  595         GEN_X_DEFINE_0_3()      \
  596         GEN_X_DEFINE_4_5()      \
  597         GEN_X_DEFINE_6_7()      \
  598         GEN_X_DEFINE_8_9()      \
  599         GEN_X_DEFINE_10_11()    \
  600         GEN_X_DEFINE_12_15()    \
  601         GEN_X_DEFINE_16()       \
  602         GEN_X_DEFINE_17()       \
  603         GEN_X_DEFINE_18_21()    \
  604         GEN_X_DEFINE_22_23()    \
  605         GEN_X_DEFINE_24_27()    \
  606         GEN_X_DEFINE_28_30()    \
  607         GEN_X_DEFINE_31()       \
  608         GEN_X_DEFINE_32()       \
  609         GEN_X_DEFINE_33_36()    \
  610         GEN_X_DEFINE_37_38()
  611 #else
  612 #define GEN_X_DEFINE_0_3()      \
  613         unsigned char w0 __attribute__((vector_size(16)));      \
  614         unsigned char w1 __attribute__((vector_size(16)));      \
  615         unsigned char w2 __attribute__((vector_size(16)));      \
  616         unsigned char w3 __attribute__((vector_size(16)));
  617 #define GEN_X_DEFINE_4_5()      \
  618         unsigned char w4 __attribute__((vector_size(16)));      \
  619         unsigned char w5 __attribute__((vector_size(16)));
  620 #define GEN_X_DEFINE_6_7()      \
  621         unsigned char w6 __attribute__((vector_size(16)));      \
  622         unsigned char w7 __attribute__((vector_size(16)));
  623 #define GEN_X_DEFINE_8_9()      \
  624         unsigned char w8 __attribute__((vector_size(16)));      \
  625         unsigned char w9 __attribute__((vector_size(16)));
  626 #define GEN_X_DEFINE_10_11()    \
  627         unsigned char w10 __attribute__((vector_size(16)));     \
  628         unsigned char w11 __attribute__((vector_size(16)));
  629 #define GEN_X_DEFINE_12_15()    \
  630         unsigned char w12 __attribute__((vector_size(16)));     \
  631         unsigned char w13 __attribute__((vector_size(16)));     \
  632         unsigned char w14 __attribute__((vector_size(16)));     \
  633         unsigned char w15 __attribute__((vector_size(16)));
  634 #define GEN_X_DEFINE_16()       \
  635         unsigned char w16 __attribute__((vector_size(16)));
  636 #define GEN_X_DEFINE_17()       \
  637         unsigned char w17 __attribute__((vector_size(16)));
  638 #define GEN_X_DEFINE_18_21()    \
  639         unsigned char w18 __attribute__((vector_size(16)));     \
  640         unsigned char w19 __attribute__((vector_size(16)));     \
  641         unsigned char w20 __attribute__((vector_size(16)));     \
  642         unsigned char w21 __attribute__((vector_size(16)));
  643 #define GEN_X_DEFINE_22_23()    \
  644         unsigned char w22 __attribute__((vector_size(16)));     \
  645         unsigned char w23 __attribute__((vector_size(16)));
  646 #define GEN_X_DEFINE_24_27()    \
  647         unsigned char w24 __attribute__((vector_size(16)));     \
  648         unsigned char w25 __attribute__((vector_size(16)));     \
  649         unsigned char w26 __attribute__((vector_size(16)));     \
  650         unsigned char w27 __attribute__((vector_size(16)));
  651 #define GEN_X_DEFINE_28_30()    \
  652         unsigned char w28 __attribute__((vector_size(16)));     \
  653         unsigned char w29 __attribute__((vector_size(16)));     \
  654         unsigned char w30 __attribute__((vector_size(16)));
  655 #define GEN_X_DEFINE_31()       \
  656         unsigned char w31 __attribute__((vector_size(16)));
  657 #define GEN_X_DEFINE_32()       \
  658         unsigned char w32 __attribute__((vector_size(16)));
  659 #define GEN_X_DEFINE_33_36()    \
  660         unsigned char w33 __attribute__((vector_size(16)));     \
  661         unsigned char w34 __attribute__((vector_size(16)));     \
  662         unsigned char w35 __attribute__((vector_size(16)));     \
  663         unsigned char w36 __attribute__((vector_size(16)));
  664 #define GEN_X_DEFINE_37_38()    \
  665         unsigned char w37 __attribute__((vector_size(16)));     \
  666         unsigned char w38 __attribute__((vector_size(16)));
  667 #define GEN_X_DEFINE_ALL()      \
  668         GEN_X_DEFINE_0_3()      \
  669         GEN_X_DEFINE_4_5()      \
  670         GEN_X_DEFINE_6_7()      \
  671         GEN_X_DEFINE_8_9()      \
  672         GEN_X_DEFINE_10_11()    \
  673         GEN_X_DEFINE_12_15()    \
  674         GEN_X_DEFINE_16()       \
  675         GEN_X_DEFINE_17()       \
  676         GEN_X_DEFINE_18_21()    \
  677         GEN_X_DEFINE_22_23()    \
  678         GEN_X_DEFINE_24_27()    \
  679         GEN_X_DEFINE_28_30()    \
  680         GEN_X_DEFINE_31()       \
  681         GEN_X_DEFINE_32()       \
  682         GEN_X_DEFINE_33_36()    \
  683         GEN_X_DEFINE_37_38()
  684 #endif

Cache object: f4e3d729fb532132201320e8cb2748d7


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.