| 
     1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 
   22 /*
   23  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
   24  */
   25 
   26 #include <sys/vdev_raidz_impl.h>
   27 
   28 /*
   29  * Provide native CPU scalar routines.
   30  * Support 32bit and 64bit CPUs.
   31  */
   32 #if ((~(0x0ULL)) >> 24) == 0xffULL
   33 #define ELEM_SIZE       4
   34 typedef uint32_t iv_t;
   35 #elif ((~(0x0ULL)) >> 56) == 0xffULL
   36 #define ELEM_SIZE       8
   37 typedef uint64_t iv_t;
   38 #endif
   39 
   40 /*
   41  * Vector type used in scalar implementation
   42  *
   43  * The union is expected to be of native CPU register size. Since addition
   44  * uses XOR operation, it can be performed an all byte elements at once.
   45  * Multiplication requires per byte access.
   46  */
   47 typedef union {
   48         iv_t e;
   49         uint8_t b[ELEM_SIZE];
   50 } v_t;
   51 
   52 /*
   53  * Precomputed lookup tables for multiplication by a constant
   54  *
   55  * Reconstruction path requires multiplication by a constant factors. Instead of
   56  * performing two step lookup (log & exp tables), a direct lookup can be used
   57  * instead. Multiplication of element 'a' by a constant 'c' is obtained as:
   58  *
   59  *      r = vdev_raidz_mul_lt[c_log][a];
   60  *
   61  * where c_log = vdev_raidz_log2[c]. Log of coefficient factors is used because
   62  * they are faster to obtain while solving the syndrome equations.
   63  *
   64  * PERFORMANCE NOTE:
   65  * Even though the complete lookup table uses 64kiB, only relatively small
   66  * portion of it is used at the same time. Following shows number of accessed
   67  * bytes for different cases:
   68  *      - 1 failed disk: 256B (1 mul. coefficient)
   69  *      - 2 failed disks: 512B (2 mul. coefficients)
   70  *      - 3 failed disks: 1536B (6 mul. coefficients)
   71  *
   72  * Size of actually accessed lookup table regions is only larger for
   73  * reconstruction of 3 failed disks, when compared to traditional log/exp
   74  * method. But since the result is obtained in one lookup step performance is
   75  * doubled.
   76  */
   77 static uint8_t vdev_raidz_mul_lt[256][256] __attribute__((aligned(256)));
   78 
   79 static void
   80 raidz_init_scalar(void)
   81 {
   82         int c, i;
   83         for (c = 0; c < 256; c++)
   84                 for (i = 0; i < 256; i++)
   85                         vdev_raidz_mul_lt[c][i] = gf_mul(c, i);
   86 
   87 }
   88 
   89 #define PREFETCHNTA(ptr, offset)        {}
   90 #define PREFETCH(ptr, offset)           {}
   91 
   92 #define XOR_ACC(src, acc)       acc.e ^= ((v_t *)src)[0].e
   93 #define XOR(src, acc)           acc.e ^= src.e
   94 #define ZERO(acc)               acc.e = 0
   95 #define COPY(src, dst)          dst = src
   96 #define LOAD(src, val)          val = ((v_t *)src)[0]
   97 #define STORE(dst, val)         ((v_t *)dst)[0] = val
   98 
   99 /*
  100  * Constants used for optimized multiplication by 2.
  101  */
  102 static const struct {
  103         iv_t mod;
  104         iv_t mask;
  105         iv_t msb;
  106 } scalar_mul2_consts = {
  107 #if ELEM_SIZE == 8
  108         .mod    = 0x1d1d1d1d1d1d1d1dULL,
  109         .mask   = 0xfefefefefefefefeULL,
  110         .msb    = 0x8080808080808080ULL,
  111 #else
  112         .mod    = 0x1d1d1d1dULL,
  113         .mask   = 0xfefefefeULL,
  114         .msb    = 0x80808080ULL,
  115 #endif
  116 };
  117 
  118 #define MUL2_SETUP() {}
  119 
  120 #define MUL2(a)                                                         \
  121 {                                                                       \
  122         iv_t _mask;                                                     \
  123                                                                         \
  124         _mask = (a).e & scalar_mul2_consts.msb;                         \
  125         _mask = (_mask << 1) - (_mask >> 7);                            \
  126         (a).e = ((a).e << 1) & scalar_mul2_consts.mask;                 \
  127         (a).e = (a).e ^ (_mask & scalar_mul2_consts.mod);               \
  128 }
  129 
  130 #define MUL4(a)                                                         \
  131 {                                                                       \
  132         MUL2(a);                                                        \
  133         MUL2(a);                                                        \
  134 }
  135 
  136 #define MUL(c, a)                                                       \
  137 {                                                                       \
  138         const uint8_t *mul_lt = vdev_raidz_mul_lt[c];                   \
  139         switch (ELEM_SIZE) {                                            \
  140         case 8:                                                         \
  141                 a.b[7] = mul_lt[a.b[7]];                                \
  142                 a.b[6] = mul_lt[a.b[6]];                                \
  143                 a.b[5] = mul_lt[a.b[5]];                                \
  144                 a.b[4] = mul_lt[a.b[4]];                                \
  145                 zfs_fallthrough;                                        \
  146         case 4:                                                         \
  147                 a.b[3] = mul_lt[a.b[3]];                                \
  148                 a.b[2] = mul_lt[a.b[2]];                                \
  149                 a.b[1] = mul_lt[a.b[1]];                                \
  150                 a.b[0] = mul_lt[a.b[0]];                                \
  151                 break;                                                  \
  152         }                                                               \
  153 }
  154 
  155 #define raidz_math_begin()      {}
  156 #define raidz_math_end()        {}
  157 
  158 #define SYN_STRIDE              1
  159 
  160 #define ZERO_DEFINE()           v_t d0
  161 #define ZERO_STRIDE             1
  162 #define ZERO_D                  d0
  163 
  164 #define COPY_DEFINE()           v_t d0
  165 #define COPY_STRIDE             1
  166 #define COPY_D                  d0
  167 
  168 #define ADD_DEFINE()            v_t d0
  169 #define ADD_STRIDE              1
  170 #define ADD_D                   d0
  171 
  172 #define MUL_DEFINE()            v_t d0
  173 #define MUL_STRIDE              1
  174 #define MUL_D                   d0
  175 
  176 #define GEN_P_STRIDE            1
  177 #define GEN_P_DEFINE()          v_t p0
  178 #define GEN_P_P                 p0
  179 
  180 #define GEN_PQ_STRIDE           1
  181 #define GEN_PQ_DEFINE()         v_t d0, c0
  182 #define GEN_PQ_D                d0
  183 #define GEN_PQ_C                c0
  184 
  185 #define GEN_PQR_STRIDE          1
  186 #define GEN_PQR_DEFINE()        v_t d0, c0
  187 #define GEN_PQR_D               d0
  188 #define GEN_PQR_C               c0
  189 
  190 #define SYN_Q_DEFINE()          v_t d0, x0
  191 #define SYN_Q_D                 d0
  192 #define SYN_Q_X                 x0
  193 
  194 
  195 #define SYN_R_DEFINE()          v_t d0, x0
  196 #define SYN_R_D                 d0
  197 #define SYN_R_X                 x0
  198 
  199 
  200 #define SYN_PQ_DEFINE()         v_t d0, x0
  201 #define SYN_PQ_D                d0
  202 #define SYN_PQ_X                x0
  203 
  204 
  205 #define REC_PQ_STRIDE           1
  206 #define REC_PQ_DEFINE()         v_t x0, y0, t0
  207 #define REC_PQ_X                x0
  208 #define REC_PQ_Y                y0
  209 #define REC_PQ_T                t0
  210 
  211 
  212 #define SYN_PR_DEFINE()         v_t d0, x0
  213 #define SYN_PR_D                d0
  214 #define SYN_PR_X                x0
  215 
  216 #define REC_PR_STRIDE           1
  217 #define REC_PR_DEFINE()         v_t x0, y0, t0
  218 #define REC_PR_X                x0
  219 #define REC_PR_Y                y0
  220 #define REC_PR_T                t0
  221 
  222 
  223 #define SYN_QR_DEFINE()         v_t d0, x0
  224 #define SYN_QR_D                d0
  225 #define SYN_QR_X                x0
  226 
  227 
  228 #define REC_QR_STRIDE           1
  229 #define REC_QR_DEFINE()         v_t x0, y0, t0
  230 #define REC_QR_X                x0
  231 #define REC_QR_Y                y0
  232 #define REC_QR_T                t0
  233 
  234 
  235 #define SYN_PQR_DEFINE()        v_t d0, x0
  236 #define SYN_PQR_D               d0
  237 #define SYN_PQR_X               x0
  238 
  239 #define REC_PQR_STRIDE          1
  240 #define REC_PQR_DEFINE()        v_t x0, y0, z0, xs0, ys0
  241 #define REC_PQR_X               x0
  242 #define REC_PQR_Y               y0
  243 #define REC_PQR_Z               z0
  244 #define REC_PQR_XS              xs0
  245 #define REC_PQR_YS              ys0
  246 
  247 #include "vdev_raidz_math_impl.h"
  248 
  249 DEFINE_GEN_METHODS(scalar);
  250 DEFINE_REC_METHODS(scalar);
  251 
  252 boolean_t
  253 raidz_will_scalar_work(void)
  254 {
  255         return (B_TRUE); /* always */
  256 }
  257 
  258 const raidz_impl_ops_t vdev_raidz_scalar_impl = {
  259         .init = raidz_init_scalar,
  260         .fini = NULL,
  261         .gen = RAIDZ_GEN_METHODS(scalar),
  262         .rec = RAIDZ_REC_METHODS(scalar),
  263         .is_supported = &raidz_will_scalar_work,
  264         .name = "scalar"
  265 };
  266 
  267 /* Powers of 2 in the RAID-Z Galois field. */
  268 const uint8_t vdev_raidz_pow2[256] __attribute__((aligned(256))) = {
  269         0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
  270         0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
  271         0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
  272         0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
  273         0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
  274         0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
  275         0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
  276         0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
  277         0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
  278         0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
  279         0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
  280         0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
  281         0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
  282         0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
  283         0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
  284         0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
  285         0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
  286         0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
  287         0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
  288         0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
  289         0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
  290         0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
  291         0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
  292         0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
  293         0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
  294         0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
  295         0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
  296         0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
  297         0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
  298         0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
  299         0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
  300         0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
  301 };
  302 
  303 /* Logs of 2 in the RAID-Z Galois field. */
  304 const uint8_t vdev_raidz_log2[256] __attribute__((aligned(256))) = {
  305         0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
  306         0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
  307         0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
  308         0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
  309         0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
  310         0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
  311         0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
  312         0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
  313         0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
  314         0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
  315         0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
  316         0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
  317         0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
  318         0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
  319         0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
  320         0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
  321         0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
  322         0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
  323         0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
  324         0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
  325         0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
  326         0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
  327         0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
  328         0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
  329         0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
  330         0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
  331         0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
  332         0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
  333         0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
  334         0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
  335         0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
  336         0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,
  337 };
Cache object: ab0a8a3ba193cc41da923695ef68a9d7 
 
 |