The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/zfs/vdev_raidz_math_powerpc_altivec_common.h

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 /*
   22  * Copyright (C) 2019 Romain Dolbeau. All rights reserved.
   23  *           <romain.dolbeau@european-processor-initiative.eu>
   24  */
   25 
   26 #include <sys/types.h>
   27 #include <sys/simd.h>
   28 
   29 #define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
   30 #define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
   31 
   32 #define VR0_(REG, ...) "%[w"#REG"]"
   33 #define VR1_(_1, REG, ...) "%[w"#REG"]"
   34 #define VR2_(_1, _2, REG, ...) "%[w"#REG"]"
   35 #define VR3_(_1, _2, _3, REG, ...) "%[w"#REG"]"
   36 #define VR4_(_1, _2, _3, _4, REG, ...) "%[w"#REG"]"
   37 #define VR5_(_1, _2, _3, _4, _5, REG, ...) "%[w"#REG"]"
   38 #define VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "%[w"#REG"]"
   39 #define VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "%[w"#REG"]"
   40 
   41 /*
   42  * Here we need registers not used otherwise.
   43  * They will be used in unused ASM for the case
   44  * with more registers than required... but GCC
   45  * will still need to make sure the constraints
   46  * are correct, and duplicate constraints are illegal
   47  * ... and we use the "register" number as a name
   48  */
   49 
   50 #define VR0(r...) VR0_(r)
   51 #define VR1(r...) VR1_(r)
   52 #define VR2(r...) VR2_(r, 36)
   53 #define VR3(r...) VR3_(r, 36, 35)
   54 #define VR4(r...) VR4_(r, 36, 35, 34, 33)
   55 #define VR5(r...) VR5_(r, 36, 35, 34, 33, 32)
   56 #define VR6(r...) VR6_(r, 36, 35, 34, 33, 32, 31)
   57 #define VR7(r...) VR7_(r, 36, 35, 34, 33, 32, 31, 30)
   58 
   59 #define VR(X) "%[w"#X"]"
   60 
   61 #define RVR0_(REG, ...) [w##REG] "v" (w##REG)
   62 #define RVR1_(_1, REG, ...) [w##REG] "v" (w##REG)
   63 #define RVR2_(_1, _2, REG, ...) [w##REG] "v" (w##REG)
   64 #define RVR3_(_1, _2, _3, REG, ...) [w##REG] "v" (w##REG)
   65 #define RVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "v" (w##REG)
   66 #define RVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "v" (w##REG)
   67 #define RVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "v" (w##REG)
   68 #define RVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "v" (w##REG)
   69 
   70 #define RVR0(r...) RVR0_(r)
   71 #define RVR1(r...) RVR1_(r)
   72 #define RVR2(r...) RVR2_(r, 36)
   73 #define RVR3(r...) RVR3_(r, 36, 35)
   74 #define RVR4(r...) RVR4_(r, 36, 35, 34, 33)
   75 #define RVR5(r...) RVR5_(r, 36, 35, 34, 33, 32)
   76 #define RVR6(r...) RVR6_(r, 36, 35, 34, 33, 32, 31)
   77 #define RVR7(r...) RVR7_(r, 36, 35, 34, 33, 32, 31, 30)
   78 
   79 #define RVR(X) [w##X] "v" (w##X)
   80 
   81 #define WVR0_(REG, ...) [w##REG] "=v" (w##REG)
   82 #define WVR1_(_1, REG, ...) [w##REG] "=v" (w##REG)
   83 #define WVR2_(_1, _2, REG, ...) [w##REG] "=v" (w##REG)
   84 #define WVR3_(_1, _2, _3, REG, ...) [w##REG] "=v" (w##REG)
   85 #define WVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "=v" (w##REG)
   86 #define WVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "=v" (w##REG)
   87 #define WVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "=v" (w##REG)
   88 #define WVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "=v" (w##REG)
   89 
   90 #define WVR0(r...) WVR0_(r)
   91 #define WVR1(r...) WVR1_(r)
   92 #define WVR2(r...) WVR2_(r, 36)
   93 #define WVR3(r...) WVR3_(r, 36, 35)
   94 #define WVR4(r...) WVR4_(r, 36, 35, 34, 33)
   95 #define WVR5(r...) WVR5_(r, 36, 35, 34, 33, 32)
   96 #define WVR6(r...) WVR6_(r, 36, 35, 34, 33, 32, 31)
   97 #define WVR7(r...) WVR7_(r, 36, 35, 34, 33, 32, 31, 30)
   98 
   99 #define WVR(X) [w##X] "=v" (w##X)
  100 
  101 #define UVR0_(REG, ...) [w##REG] "+&v" (w##REG)
  102 #define UVR1_(_1, REG, ...) [w##REG] "+&v" (w##REG)
  103 #define UVR2_(_1, _2, REG, ...) [w##REG] "+&v" (w##REG)
  104 #define UVR3_(_1, _2, _3, REG, ...) [w##REG] "+&v" (w##REG)
  105 #define UVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "+&v" (w##REG)
  106 #define UVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "+&v" (w##REG)
  107 #define UVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "+&v" (w##REG)
  108 #define UVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "+&v" (w##REG)
  109 
  110 #define UVR0(r...) UVR0_(r)
  111 #define UVR1(r...) UVR1_(r)
  112 #define UVR2(r...) UVR2_(r, 36)
  113 #define UVR3(r...) UVR3_(r, 36, 35)
  114 #define UVR4(r...) UVR4_(r, 36, 35, 34, 33)
  115 #define UVR5(r...) UVR5_(r, 36, 35, 34, 33, 32)
  116 #define UVR6(r...) UVR6_(r, 36, 35, 34, 33, 32, 31)
  117 #define UVR7(r...) UVR7_(r, 36, 35, 34, 33, 32, 31, 30)
  118 
  119 #define UVR(X) [w##X] "+&v" (w##X)
  120 
  121 #define R_01(REG1, REG2, ...) REG1, REG2
  122 #define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3
  123 #define R_23(REG...) _R_23(REG, 1, 2, 3)
  124 
  125 #define ZFS_ASM_BUG()   ASSERT(0)
  126 
  127 #define OFFSET(ptr, val)        (((unsigned char *)(ptr))+val)
  128 
  129 extern const uint8_t gf_clmul_mod_lt[4*256][16];
  130 
  131 #define ELEM_SIZE 16
  132 
  133 typedef struct v {
  134         uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
  135 } v_t;
  136 
  137 #define XOR_ACC(src, r...)                                      \
  138 {                                                               \
  139         switch (REG_CNT(r)) {                                   \
  140         case 8:                                                 \
  141                 __asm__ __volatile__(                           \
  142                 "lvx 21,0,%[SRC0]\n"                            \
  143                 "lvx 20,0,%[SRC1]\n"                            \
  144                 "lvx 19,0,%[SRC2]\n"                            \
  145                 "lvx 18,0,%[SRC3]\n"                            \
  146                 "vxor " VR0(r) "," VR0(r) ",21\n"               \
  147                 "vxor " VR1(r) "," VR1(r) ",20\n"               \
  148                 "vxor " VR2(r) "," VR2(r) ",19\n"               \
  149                 "vxor " VR3(r) "," VR3(r) ",18\n"               \
  150                 "lvx 21,0,%[SRC4]\n"                            \
  151                 "lvx 20,0,%[SRC5]\n"                            \
  152                 "lvx 19,0,%[SRC6]\n"                            \
  153                 "lvx 18,0,%[SRC7]\n"                            \
  154                 "vxor " VR4(r) "," VR4(r) ",21\n"               \
  155                 "vxor " VR5(r) "," VR5(r) ",20\n"               \
  156                 "vxor " VR6(r) "," VR6(r) ",19\n"               \
  157                 "vxor " VR7(r) "," VR7(r) ",18\n"               \
  158                 :       UVR0(r), UVR1(r), UVR2(r), UVR3(r),     \
  159                         UVR4(r), UVR5(r), UVR6(r), UVR7(r)      \
  160                 :       [SRC0] "r" ((OFFSET(src, 0))),          \
  161                 [SRC1] "r" ((OFFSET(src, 16))),                 \
  162                 [SRC2] "r" ((OFFSET(src, 32))),                 \
  163                 [SRC3] "r" ((OFFSET(src, 48))),                 \
  164                 [SRC4] "r" ((OFFSET(src, 64))),                 \
  165                 [SRC5] "r" ((OFFSET(src, 80))),                 \
  166                 [SRC6] "r" ((OFFSET(src, 96))),                 \
  167                 [SRC7] "r" ((OFFSET(src, 112)))                 \
  168                 :       "v18", "v19", "v20", "v21");            \
  169                 break;                                          \
  170         case 4:                                                 \
  171                 __asm__ __volatile__(                           \
  172                 "lvx 21,0,%[SRC0]\n"                            \
  173                 "lvx 20,0,%[SRC1]\n"                            \
  174                 "lvx 19,0,%[SRC2]\n"                            \
  175                 "lvx 18,0,%[SRC3]\n"                            \
  176                 "vxor " VR0(r) "," VR0(r) ",21\n"               \
  177                 "vxor " VR1(r) "," VR1(r) ",20\n"               \
  178                 "vxor " VR2(r) "," VR2(r) ",19\n"               \
  179                 "vxor " VR3(r) "," VR3(r) ",18\n"               \
  180                 :       UVR0(r), UVR1(r), UVR2(r), UVR3(r)      \
  181                 :       [SRC0] "r" ((OFFSET(src, 0))),          \
  182                 [SRC1] "r" ((OFFSET(src, 16))),                 \
  183                 [SRC2] "r" ((OFFSET(src, 32))),                 \
  184                 [SRC3] "r" ((OFFSET(src, 48)))                  \
  185                 :       "v18", "v19", "v20", "v21");            \
  186                 break;                                          \
  187         case 2:                                                 \
  188                 __asm__ __volatile__(                           \
  189                 "lvx 21,0,%[SRC0]\n"                            \
  190                 "lvx 20,0,%[SRC1]\n"                            \
  191                 "vxor " VR0(r) "," VR0(r) ",21\n"               \
  192                 "vxor " VR1(r) "," VR1(r) ",20\n"               \
  193                 :       UVR0(r), UVR1(r)                        \
  194                 :       [SRC0] "r" ((OFFSET(src, 0))),          \
  195                 [SRC1] "r" ((OFFSET(src, 16)))                  \
  196                 :       "v20", "v21");                          \
  197                 break;                                          \
  198         default:                                                \
  199                 ZFS_ASM_BUG();                                  \
  200         }                                                       \
  201 }
  202 
  203 #define XOR(r...)                                               \
  204 {                                                               \
  205         switch (REG_CNT(r)) {                                   \
  206         case 8:                                                 \
  207                 __asm__ __volatile__(                           \
  208                 "vxor " VR4(r) "," VR4(r) "," VR0(r) "\n"       \
  209                 "vxor " VR5(r) "," VR5(r) "," VR1(r) "\n"       \
  210                 "vxor " VR6(r) "," VR6(r) "," VR2(r) "\n"       \
  211                 "vxor " VR7(r) "," VR7(r) "," VR3(r) "\n"       \
  212                 :       UVR4(r), UVR5(r), UVR6(r), UVR7(r)      \
  213                 :       RVR0(r), RVR1(r), RVR2(r), RVR3(r));    \
  214                 break;                                          \
  215         case 4:                                                 \
  216                 __asm__ __volatile__(                           \
  217                 "vxor " VR2(r) "," VR2(r) "," VR0(r) "\n"       \
  218                 "vxor " VR3(r) "," VR3(r) "," VR1(r) "\n"       \
  219                 :       UVR2(r), UVR3(r)                        \
  220                 :       RVR0(r), RVR1(r));                      \
  221                 break;                                          \
  222         default:                                                \
  223                 ZFS_ASM_BUG();                                  \
  224         }                                                       \
  225 }
  226 
  227 #define ZERO(r...)                                              \
  228 {                                                               \
  229         switch (REG_CNT(r)) {                                   \
  230         case 8:                                                 \
  231                 __asm__ __volatile__(                           \
  232                 "vxor " VR0(r) "," VR0(r) "," VR0(r) "\n"       \
  233                 "vxor " VR1(r) "," VR1(r) "," VR1(r) "\n"       \
  234                 "vxor " VR2(r) "," VR2(r) "," VR2(r) "\n"       \
  235                 "vxor " VR3(r) "," VR3(r) "," VR3(r) "\n"       \
  236                 "vxor " VR4(r) "," VR4(r) "," VR4(r) "\n"       \
  237                 "vxor " VR5(r) "," VR5(r) "," VR5(r) "\n"       \
  238                 "vxor " VR6(r) "," VR6(r) "," VR6(r) "\n"       \
  239                 "vxor " VR7(r) "," VR7(r) "," VR7(r) "\n"       \
  240                 :       WVR0(r), WVR1(r), WVR2(r), WVR3(r),     \
  241                         WVR4(r), WVR5(r), WVR6(r), WVR7(r));    \
  242                 break;                                          \
  243         case 4:                                                 \
  244                 __asm__ __volatile__(                           \
  245                 "vxor " VR0(r) "," VR0(r) "," VR0(r) "\n"       \
  246                 "vxor " VR1(r) "," VR1(r) "," VR1(r) "\n"       \
  247                 "vxor " VR2(r) "," VR2(r) "," VR2(r) "\n"       \
  248                 "vxor " VR3(r) "," VR3(r) "," VR3(r) "\n"       \
  249                 :       WVR0(r), WVR1(r), WVR2(r), WVR3(r));    \
  250                 break;                                          \
  251         case 2:                                                 \
  252                 __asm__ __volatile__(                           \
  253                 "vxor " VR0(r) "," VR0(r) "," VR0(r) "\n"       \
  254                 "vxor " VR1(r) "," VR1(r) "," VR1(r) "\n"       \
  255                 :       WVR0(r), WVR1(r));                      \
  256                 break;                                          \
  257         default:                                                \
  258                 ZFS_ASM_BUG();                                  \
  259         }                                                       \
  260 }
  261 
  262 #define COPY(r...)                                              \
  263 {                                                               \
  264         switch (REG_CNT(r)) {                                   \
  265         case 8:                                                 \
  266                 __asm__ __volatile__(                           \
  267                 "vor " VR4(r) "," VR0(r) "," VR0(r) "\n"        \
  268                 "vor " VR5(r) "," VR1(r) "," VR1(r) "\n"        \
  269                 "vor " VR6(r) "," VR2(r) "," VR2(r) "\n"        \
  270                 "vor " VR7(r) "," VR3(r) "," VR3(r) "\n"        \
  271                 :       WVR4(r), WVR5(r), WVR6(r), WVR7(r)      \
  272                 :       RVR0(r), RVR1(r), RVR2(r), RVR3(r));    \
  273                 break;                                          \
  274         case 4:                                                 \
  275                 __asm__ __volatile__(                           \
  276                 "vor " VR2(r) "," VR0(r) "," VR0(r) "\n"        \
  277                 "vor " VR3(r) "," VR1(r) "," VR1(r) "\n"        \
  278                 :       WVR2(r), WVR3(r)                        \
  279                 :       RVR0(r), RVR1(r));                      \
  280                 break;                                          \
  281         default:                                                \
  282                 ZFS_ASM_BUG();                                  \
  283         }                                                       \
  284 }
  285 
  286 #define LOAD(src, r...)                                         \
  287 {                                                               \
  288         switch (REG_CNT(r)) {                                   \
  289         case 8:                                                 \
  290                 __asm__ __volatile__(                           \
  291                 "lvx " VR0(r) " ,0,%[SRC0]\n"                   \
  292                 "lvx " VR1(r) " ,0,%[SRC1]\n"                   \
  293                 "lvx " VR2(r) " ,0,%[SRC2]\n"                   \
  294                 "lvx " VR3(r) " ,0,%[SRC3]\n"                   \
  295                 "lvx " VR4(r) " ,0,%[SRC4]\n"                   \
  296                 "lvx " VR5(r) " ,0,%[SRC5]\n"                   \
  297                 "lvx " VR6(r) " ,0,%[SRC6]\n"                   \
  298                 "lvx " VR7(r) " ,0,%[SRC7]\n"                   \
  299                 :       WVR0(r), WVR1(r), WVR2(r), WVR3(r),     \
  300                         WVR4(r), WVR5(r), WVR6(r), WVR7(r)      \
  301                 :       [SRC0] "r" ((OFFSET(src, 0))),          \
  302                 [SRC1] "r" ((OFFSET(src, 16))),                 \
  303                 [SRC2] "r" ((OFFSET(src, 32))),                 \
  304                 [SRC3] "r" ((OFFSET(src, 48))),                 \
  305                 [SRC4] "r" ((OFFSET(src, 64))),                 \
  306                 [SRC5] "r" ((OFFSET(src, 80))),                 \
  307                 [SRC6] "r" ((OFFSET(src, 96))),                 \
  308                 [SRC7] "r" ((OFFSET(src, 112))));               \
  309                 break;                                          \
  310         case 4:                                                 \
  311                 __asm__ __volatile__(                           \
  312                 "lvx " VR0(r) " ,0,%[SRC0]\n"                   \
  313                 "lvx " VR1(r) " ,0,%[SRC1]\n"                   \
  314                 "lvx " VR2(r) " ,0,%[SRC2]\n"                   \
  315                 "lvx " VR3(r) " ,0,%[SRC3]\n"                   \
  316                 :       WVR0(r), WVR1(r), WVR2(r), WVR3(r)      \
  317                 :       [SRC0] "r" ((OFFSET(src, 0))),          \
  318                 [SRC1] "r" ((OFFSET(src, 16))),                 \
  319                 [SRC2] "r" ((OFFSET(src, 32))),                 \
  320                 [SRC3] "r" ((OFFSET(src, 48))));                \
  321                 break;                                          \
  322         case 2:                                                 \
  323                 __asm__ __volatile__(                           \
  324                 "lvx " VR0(r) " ,0,%[SRC0]\n"                   \
  325                 "lvx " VR1(r) " ,0,%[SRC1]\n"                   \
  326                 :       WVR0(r), WVR1(r)                        \
  327                 :       [SRC0] "r" ((OFFSET(src, 0))),          \
  328                 [SRC1] "r" ((OFFSET(src, 16))));                \
  329                 break;                                          \
  330         default:                                                \
  331                 ZFS_ASM_BUG();                                  \
  332         }                                                       \
  333 }
  334 
  335 #define STORE(dst, r...)                                        \
  336 {                                                               \
  337         switch (REG_CNT(r)) {                                   \
  338         case 8:                                                 \
  339                 __asm__ __volatile__(                           \
  340                 "stvx " VR0(r) " ,0,%[DST0]\n"                  \
  341                 "stvx " VR1(r) " ,0,%[DST1]\n"                  \
  342                 "stvx " VR2(r) " ,0,%[DST2]\n"                  \
  343                 "stvx " VR3(r) " ,0,%[DST3]\n"                  \
  344                 "stvx " VR4(r) " ,0,%[DST4]\n"                  \
  345                 "stvx " VR5(r) " ,0,%[DST5]\n"                  \
  346                 "stvx " VR6(r) " ,0,%[DST6]\n"                  \
  347                 "stvx " VR7(r) " ,0,%[DST7]\n"                  \
  348                 : :     [DST0] "r" ((OFFSET(dst, 0))),          \
  349                 [DST1] "r" ((OFFSET(dst, 16))),                 \
  350                 [DST2] "r" ((OFFSET(dst, 32))),                 \
  351                 [DST3] "r" ((OFFSET(dst, 48))),                 \
  352                 [DST4] "r" ((OFFSET(dst, 64))),                 \
  353                 [DST5] "r" ((OFFSET(dst, 80))),                 \
  354                 [DST6] "r" ((OFFSET(dst, 96))),                 \
  355                 [DST7] "r" ((OFFSET(dst, 112))),                \
  356                 RVR0(r), RVR1(r), RVR2(r), RVR3(r),             \
  357                 RVR4(r), RVR5(r), RVR6(r), RVR7(r)              \
  358                 :       "memory");                              \
  359                 break;                                          \
  360         case 4:                                                 \
  361                 __asm__ __volatile__(                           \
  362                 "stvx " VR0(r) " ,0,%[DST0]\n"                  \
  363                 "stvx " VR1(r) " ,0,%[DST1]\n"                  \
  364                 "stvx " VR2(r) " ,0,%[DST2]\n"                  \
  365                 "stvx " VR3(r) " ,0,%[DST3]\n"                  \
  366                 : :     [DST0] "r" ((OFFSET(dst, 0))),          \
  367                 [DST1] "r" ((OFFSET(dst, 16))),                 \
  368                 [DST2] "r" ((OFFSET(dst, 32))),                 \
  369                 [DST3] "r" ((OFFSET(dst, 48))),                 \
  370                 RVR0(r), RVR1(r), RVR2(r), RVR3(r)              \
  371                 : "memory");                                    \
  372                 break;                                          \
  373         case 2:                                                 \
  374                 __asm__ __volatile__(                           \
  375                 "stvx " VR0(r) " ,0,%[DST0]\n"                  \
  376                 "stvx " VR1(r) " ,0,%[DST1]\n"                  \
  377                 : :     [DST0] "r" ((OFFSET(dst, 0))),          \
  378                 [DST1] "r" ((OFFSET(dst, 16))),                 \
  379                 RVR0(r), RVR1(r) : "memory");                   \
  380                 break;                                          \
  381         default:                                                \
  382                 ZFS_ASM_BUG();                                  \
  383         }                                                       \
  384 }
  385 
  386 /*
  387  * Unfortunately cannot use the macro, because GCC
  388  * will try to use the macro name and not value
  389  * later on...
  390  * Kept as a reference to what a numbered variable is
  391  */
  392 #define _00     "17"
  393 #define _1d     "16"
  394 #define _temp0  "19"
  395 #define _temp1  "18"
  396 
  397 #define MUL2_SETUP()                                            \
  398 {                                                               \
  399         __asm__ __volatile__(                                   \
  400                 "vspltisb " VR(16) ",14\n"                      \
  401                 "vspltisb " VR(17) ",15\n"                      \
  402                 "vaddubm " VR(16) "," VR(17) "," VR(16) "\n"    \
  403                 "vxor " VR(17) "," VR(17) "," VR(17) "\n"       \
  404                 :       WVR(16), WVR(17));                      \
  405 }
  406 
  407 #define MUL2(r...)                                              \
  408 {                                                               \
  409         switch (REG_CNT(r)) {                                   \
  410         case 4:                                                 \
  411                 __asm__ __volatile__(                           \
  412                 "vcmpgtsb 19," VR(17) "," VR0(r) "\n"           \
  413                 "vcmpgtsb 18," VR(17) "," VR1(r) "\n"           \
  414                 "vcmpgtsb 21," VR(17) "," VR2(r) "\n"           \
  415                 "vcmpgtsb 20," VR(17) "," VR3(r) "\n"           \
  416                 "vand 19,19," VR(16) "\n"                       \
  417                 "vand 18,18," VR(16) "\n"                       \
  418                 "vand 21,21," VR(16) "\n"                       \
  419                 "vand 20,20," VR(16) "\n"                       \
  420                 "vaddubm " VR0(r) "," VR0(r) "," VR0(r) "\n"    \
  421                 "vaddubm " VR1(r) "," VR1(r) "," VR1(r) "\n"    \
  422                 "vaddubm " VR2(r) "," VR2(r) "," VR2(r) "\n"    \
  423                 "vaddubm " VR3(r) "," VR3(r) "," VR3(r) "\n"    \
  424                 "vxor " VR0(r) ",19," VR0(r) "\n"               \
  425                 "vxor " VR1(r) ",18," VR1(r) "\n"               \
  426                 "vxor " VR2(r) ",21," VR2(r) "\n"               \
  427                 "vxor " VR3(r) ",20," VR3(r) "\n"               \
  428                 :       UVR0(r), UVR1(r), UVR2(r), UVR3(r)      \
  429                 :       RVR(17), RVR(16)                        \
  430                 :       "v18", "v19", "v20", "v21");            \
  431                 break;                                          \
  432         case 2:                                                 \
  433                 __asm__ __volatile__(                           \
  434                 "vcmpgtsb 19," VR(17) "," VR0(r) "\n"           \
  435                 "vcmpgtsb 18," VR(17) "," VR1(r) "\n"           \
  436                 "vand 19,19," VR(16) "\n"                       \
  437                 "vand 18,18," VR(16) "\n"                       \
  438                 "vaddubm " VR0(r) "," VR0(r) "," VR0(r) "\n"    \
  439                 "vaddubm " VR1(r) "," VR1(r) "," VR1(r) "\n"    \
  440                 "vxor " VR0(r) ",19," VR0(r) "\n"               \
  441                 "vxor " VR1(r) ",18," VR1(r) "\n"               \
  442                 :       UVR0(r), UVR1(r)                        \
  443                 :       RVR(17), RVR(16)                        \
  444                 :       "v18", "v19");                          \
  445                 break;                                          \
  446         default:                                                \
  447                 ZFS_ASM_BUG();                                  \
  448         }                                                       \
  449 }
  450 
  451 #define MUL4(r...)                                              \
  452 {                                                               \
  453         MUL2(r);                                                \
  454         MUL2(r);                                                \
  455 }
  456 
  457 /*
  458  * Unfortunately cannot use the macro, because GCC
  459  * will try to use the macro name and not value
  460  * later on...
  461  * Kept as a reference to what a register is
  462  * (here we're using actual registers for the
  463  * clobbered ones)
  464  */
  465 #define _0f             "15"
  466 #define _a_save         "14"
  467 #define _b_save         "13"
  468 #define _lt_mod_a       "12"
  469 #define _lt_clmul_a     "11"
  470 #define _lt_mod_b       "10"
  471 #define _lt_clmul_b     "15"
  472 
  473 #define _MULx2(c, r...)                                         \
  474 {                                                               \
  475         switch (REG_CNT(r)) {                                   \
  476         case 2:                                                 \
  477                 __asm__ __volatile__(                           \
  478                 /* lts for upper part */                        \
  479                 "vspltisb 15,15\n"                              \
  480                 "lvx 10,0,%[lt0]\n"                             \
  481                 "lvx 11,0,%[lt1]\n"                             \
  482                 /* upper part */                                \
  483                 "vand 14," VR0(r) ",15\n"                       \
  484                 "vand 13," VR1(r) ",15\n"                       \
  485                 "vspltisb 15,4\n"                               \
  486                 "vsrab " VR0(r) "," VR0(r) ",15\n"              \
  487                 "vsrab " VR1(r) "," VR1(r) ",15\n"              \
  488                                                                 \
  489                 "vperm 12,10,10," VR0(r) "\n"                   \
  490                 "vperm 10,10,10," VR1(r) "\n"                   \
  491                 "vperm 15,11,11," VR0(r) "\n"                   \
  492                 "vperm 11,11,11," VR1(r) "\n"                   \
  493                                                                 \
  494                 "vxor " VR0(r) ",15,12\n"                       \
  495                 "vxor " VR1(r) ",11,10\n"                       \
  496                 /* lts for lower part */                        \
  497                 "lvx 10,0,%[lt2]\n"                             \
  498                 "lvx 15,0,%[lt3]\n"                             \
  499                 /* lower part */                                \
  500                 "vperm 12,10,10,14\n"                           \
  501                 "vperm 10,10,10,13\n"                           \
  502                 "vperm 11,15,15,14\n"                           \
  503                 "vperm 15,15,15,13\n"                           \
  504                                                                 \
  505                 "vxor " VR0(r) "," VR0(r) ",12\n"               \
  506                 "vxor " VR1(r) "," VR1(r) ",10\n"               \
  507                 "vxor " VR0(r) "," VR0(r) ",11\n"               \
  508                 "vxor " VR1(r) "," VR1(r) ",15\n"               \
  509                 : UVR0(r), UVR1(r)                              \
  510                 : [lt0] "r" (&(gf_clmul_mod_lt[4*(c)+0][0])),   \
  511                 [lt1] "r" (&(gf_clmul_mod_lt[4*(c)+1][0])),     \
  512                 [lt2] "r" (&(gf_clmul_mod_lt[4*(c)+2][0])),     \
  513                 [lt3] "r" (&(gf_clmul_mod_lt[4*(c)+3][0]))      \
  514                 : "v10", "v11", "v12", "v13", "v14", "v15");    \
  515                 break;                                          \
  516         default:                                                \
  517                 ZFS_ASM_BUG();                                  \
  518         }                                                       \
  519 }
  520 
  521 #define MUL(c, r...)                                            \
  522 {                                                               \
  523         switch (REG_CNT(r)) {                                   \
  524         case 4:                                                 \
  525                 _MULx2(c, R_23(r));                             \
  526                 _MULx2(c, R_01(r));                             \
  527                 break;                                          \
  528         case 2:                                                 \
  529                 _MULx2(c, R_01(r));                             \
  530                 break;                                          \
  531         default:                                                \
  532                 ZFS_ASM_BUG();                                  \
  533         }                                                       \
  534 }
  535 
  536 #define raidz_math_begin()      kfpu_begin()
  537 #define raidz_math_end()        kfpu_end()
  538 
  539 /* Overkill... */
  540 #if 0 // defined(_KERNEL)
  541 #define GEN_X_DEFINE_0_3()      \
  542 register unsigned char w0 asm("") __attribute__((vector_size(16)));    \
  543 register unsigned char w1 asm("1") __attribute__((vector_size(16)));    \
  544 register unsigned char w2 asm("2") __attribute__((vector_size(16)));    \
  545 register unsigned char w3 asm("3") __attribute__((vector_size(16)));
  546 #define GEN_X_DEFINE_4_5()      \
  547 register unsigned char w4 asm("4") __attribute__((vector_size(16)));    \
  548 register unsigned char w5 asm("5") __attribute__((vector_size(16)));
  549 #define GEN_X_DEFINE_6_7()      \
  550 register unsigned char w6 asm("6") __attribute__((vector_size(16)));    \
  551 register unsigned char w7 asm("7") __attribute__((vector_size(16)));
  552 #define GEN_X_DEFINE_8_9()      \
  553 register unsigned char w8 asm("8") __attribute__((vector_size(16)));    \
  554 register unsigned char w9 asm("9") __attribute__((vector_size(16)));
  555 #define GEN_X_DEFINE_10_11()    \
  556 register unsigned char w10 asm("10") __attribute__((vector_size(16)));  \
  557 register unsigned char w11 asm("11") __attribute__((vector_size(16)));
  558 #define GEN_X_DEFINE_12_15()    \
  559 register unsigned char w12 asm("12") __attribute__((vector_size(16)));  \
  560 register unsigned char w13 asm("13") __attribute__((vector_size(16)));  \
  561 register unsigned char w14 asm("14") __attribute__((vector_size(16)));  \
  562 register unsigned char w15 asm("15") __attribute__((vector_size(16)));
  563 #define GEN_X_DEFINE_16()       \
  564 register unsigned char w16 asm("16") __attribute__((vector_size(16)));
  565 #define GEN_X_DEFINE_17()       \
  566 register unsigned char w17 asm("17") __attribute__((vector_size(16)));
  567 #define GEN_X_DEFINE_18_21()    \
  568 register unsigned char w18 asm("18") __attribute__((vector_size(16)));  \
  569 register unsigned char w19 asm("19") __attribute__((vector_size(16)));  \
  570 register unsigned char w20 asm("20") __attribute__((vector_size(16)));  \
  571 register unsigned char w21 asm("21") __attribute__((vector_size(16)));
  572 #define GEN_X_DEFINE_22_23()    \
  573 register unsigned char w22 asm("22") __attribute__((vector_size(16)));  \
  574 register unsigned char w23 asm("23") __attribute__((vector_size(16)));
  575 #define GEN_X_DEFINE_24_27()    \
  576 register unsigned char w24 asm("24") __attribute__((vector_size(16)));  \
  577 register unsigned char w25 asm("25") __attribute__((vector_size(16)));  \
  578 register unsigned char w26 asm("26") __attribute__((vector_size(16)));  \
  579 register unsigned char w27 asm("27") __attribute__((vector_size(16)));
  580 #define GEN_X_DEFINE_28_30()    \
  581 register unsigned char w28 asm("28") __attribute__((vector_size(16)));  \
  582 register unsigned char w29 asm("29") __attribute__((vector_size(16)));  \
  583 register unsigned char w30 asm("30") __attribute__((vector_size(16)));
  584 #define GEN_X_DEFINE_31()       \
  585 register unsigned char w31 asm("31") __attribute__((vector_size(16)));
  586 #define GEN_X_DEFINE_32()       \
  587 register unsigned char w32 asm("31") __attribute__((vector_size(16)));
  588 #define GEN_X_DEFINE_33_36()    \
  589 register unsigned char w33 asm("31") __attribute__((vector_size(16)));  \
  590 register unsigned char w34 asm("31") __attribute__((vector_size(16)));  \
  591 register unsigned char w35 asm("31") __attribute__((vector_size(16)));  \
  592 register unsigned char w36 asm("31") __attribute__((vector_size(16)));
  593 #define GEN_X_DEFINE_37_38()    \
  594 register unsigned char w37 asm("31") __attribute__((vector_size(16)));  \
  595 register unsigned char w38 asm("31") __attribute__((vector_size(16)));
  596 #define GEN_X_DEFINE_ALL()      \
  597         GEN_X_DEFINE_0_3()      \
  598         GEN_X_DEFINE_4_5()      \
  599         GEN_X_DEFINE_6_7()      \
  600         GEN_X_DEFINE_8_9()      \
  601         GEN_X_DEFINE_10_11()    \
  602         GEN_X_DEFINE_12_15()    \
  603         GEN_X_DEFINE_16()       \
  604         GEN_X_DEFINE_17()       \
  605         GEN_X_DEFINE_18_21()    \
  606         GEN_X_DEFINE_22_23()    \
  607         GEN_X_DEFINE_24_27()    \
  608         GEN_X_DEFINE_28_30()    \
  609         GEN_X_DEFINE_31()       \
  610         GEN_X_DEFINE_32()       \
  611         GEN_X_DEFINE_33_36()    \
  612         GEN_X_DEFINE_37_38()
  613 #else
  614 #define GEN_X_DEFINE_0_3()      \
  615         unsigned char w0 __attribute__((vector_size(16)));      \
  616         unsigned char w1 __attribute__((vector_size(16)));      \
  617         unsigned char w2 __attribute__((vector_size(16)));      \
  618         unsigned char w3 __attribute__((vector_size(16)));
  619 #define GEN_X_DEFINE_4_5()      \
  620         unsigned char w4 __attribute__((vector_size(16)));      \
  621         unsigned char w5 __attribute__((vector_size(16)));
  622 #define GEN_X_DEFINE_6_7()      \
  623         unsigned char w6 __attribute__((vector_size(16)));      \
  624         unsigned char w7 __attribute__((vector_size(16)));
  625 #define GEN_X_DEFINE_8_9()      \
  626         unsigned char w8 __attribute__((vector_size(16)));      \
  627         unsigned char w9 __attribute__((vector_size(16)));
  628 #define GEN_X_DEFINE_10_11()    \
  629         unsigned char w10 __attribute__((vector_size(16)));     \
  630         unsigned char w11 __attribute__((vector_size(16)));
  631 #define GEN_X_DEFINE_12_15()    \
  632         unsigned char w12 __attribute__((vector_size(16)));     \
  633         unsigned char w13 __attribute__((vector_size(16)));     \
  634         unsigned char w14 __attribute__((vector_size(16)));     \
  635         unsigned char w15 __attribute__((vector_size(16)));
  636 #define GEN_X_DEFINE_16()       \
  637         unsigned char w16 __attribute__((vector_size(16)));
  638 #define GEN_X_DEFINE_17()       \
  639         unsigned char w17 __attribute__((vector_size(16)));
  640 #define GEN_X_DEFINE_18_21()    \
  641         unsigned char w18 __attribute__((vector_size(16)));     \
  642         unsigned char w19 __attribute__((vector_size(16)));     \
  643         unsigned char w20 __attribute__((vector_size(16)));     \
  644         unsigned char w21 __attribute__((vector_size(16)));
  645 #define GEN_X_DEFINE_22_23()    \
  646         unsigned char w22 __attribute__((vector_size(16)));     \
  647         unsigned char w23 __attribute__((vector_size(16)));
  648 #define GEN_X_DEFINE_24_27()    \
  649         unsigned char w24 __attribute__((vector_size(16)));     \
  650         unsigned char w25 __attribute__((vector_size(16)));     \
  651         unsigned char w26 __attribute__((vector_size(16)));     \
  652         unsigned char w27 __attribute__((vector_size(16)));
  653 #define GEN_X_DEFINE_28_30()    \
  654         unsigned char w28 __attribute__((vector_size(16)));     \
  655         unsigned char w29 __attribute__((vector_size(16)));     \
  656         unsigned char w30 __attribute__((vector_size(16)));
  657 #define GEN_X_DEFINE_31()       \
  658         unsigned char w31 __attribute__((vector_size(16)));
  659 #define GEN_X_DEFINE_32()       \
  660         unsigned char w32 __attribute__((vector_size(16)));
  661 #define GEN_X_DEFINE_33_36()    \
  662         unsigned char w33 __attribute__((vector_size(16)));     \
  663         unsigned char w34 __attribute__((vector_size(16)));     \
  664         unsigned char w35 __attribute__((vector_size(16)));     \
  665         unsigned char w36 __attribute__((vector_size(16)));
  666 #define GEN_X_DEFINE_37_38()    \
  667         unsigned char w37 __attribute__((vector_size(16)));     \
  668         unsigned char w38 __attribute__((vector_size(16)));
  669 #define GEN_X_DEFINE_ALL()      \
  670         GEN_X_DEFINE_0_3()      \
  671         GEN_X_DEFINE_4_5()      \
  672         GEN_X_DEFINE_6_7()      \
  673         GEN_X_DEFINE_8_9()      \
  674         GEN_X_DEFINE_10_11()    \
  675         GEN_X_DEFINE_12_15()    \
  676         GEN_X_DEFINE_16()       \
  677         GEN_X_DEFINE_17()       \
  678         GEN_X_DEFINE_18_21()    \
  679         GEN_X_DEFINE_22_23()    \
  680         GEN_X_DEFINE_24_27()    \
  681         GEN_X_DEFINE_28_30()    \
  682         GEN_X_DEFINE_31()       \
  683         GEN_X_DEFINE_32()       \
  684         GEN_X_DEFINE_33_36()    \
  685         GEN_X_DEFINE_37_38()
  686 #endif

Cache object: a29c979dae321c68c366e102ac5f269f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.