The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/cmd/raidz_test/raidz_test.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 
   22 /*
   23  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
   24  */
   25 
   26 #include <sys/zfs_context.h>
   27 #include <sys/time.h>
   28 #include <sys/wait.h>
   29 #include <sys/zio.h>
   30 #include <umem.h>
   31 #include <sys/vdev_raidz.h>
   32 #include <sys/vdev_raidz_impl.h>
   33 #include <assert.h>
   34 #include <stdio.h>
   35 #include "raidz_test.h"
   36 
   37 static int *rand_data;
   38 raidz_test_opts_t rto_opts;
   39 
   40 static char pid_s[16];
   41 
   42 static void sig_handler(int signo)
   43 {
   44         int old_errno = errno;
   45         struct sigaction action;
   46         /*
   47          * Restore default action and re-raise signal so SIGSEGV and
   48          * SIGABRT can trigger a core dump.
   49          */
   50         action.sa_handler = SIG_DFL;
   51         sigemptyset(&action.sa_mask);
   52         action.sa_flags = 0;
   53         (void) sigaction(signo, &action, NULL);
   54 
   55         if (rto_opts.rto_gdb) {
   56                 pid_t pid = fork();
   57                 if (pid == 0) {
   58                         execlp("gdb", "gdb", "-ex", "set pagination 0",
   59                             "-p", pid_s, NULL);
   60                         _exit(-1);
   61                 } else if (pid > 0)
   62                         while (waitpid(pid, NULL, 0) == -1 && errno == EINTR)
   63                                 ;
   64         }
   65 
   66         raise(signo);
   67         errno = old_errno;
   68 }
   69 
   70 static void print_opts(raidz_test_opts_t *opts, boolean_t force)
   71 {
   72         const char *verbose;
   73         switch (opts->rto_v) {
   74                 case D_ALL:
   75                         verbose = "no";
   76                         break;
   77                 case D_INFO:
   78                         verbose = "info";
   79                         break;
   80                 case D_DEBUG:
   81                 default:
   82                         verbose = "debug";
   83                         break;
   84         }
   85 
   86         if (force || opts->rto_v >= D_INFO) {
   87                 (void) fprintf(stdout, DBLSEP "Running with options:\n"
   88                     "  (-a) zio ashift                   : %zu\n"
   89                     "  (-o) zio offset                   : 1 << %zu\n"
   90                     "  (-e) expanded map                 : %s\n"
   91                     "  (-r) reflow offset                : %llx\n"
   92                     "  (-d) number of raidz data columns : %zu\n"
   93                     "  (-s) size of DATA                 : 1 << %zu\n"
   94                     "  (-S) sweep parameters             : %s \n"
   95                     "  (-v) verbose                      : %s \n\n",
   96                     opts->rto_ashift,                           /* -a */
   97                     ilog2(opts->rto_offset),                    /* -o */
   98                     opts->rto_expand ? "yes" : "no",            /* -e */
   99                     (u_longlong_t)opts->rto_expand_offset,      /* -r */
  100                     opts->rto_dcols,                            /* -d */
  101                     ilog2(opts->rto_dsize),                     /* -s */
  102                     opts->rto_sweep ? "yes" : "no",             /* -S */
  103                     verbose);                                   /* -v */
  104         }
  105 }
  106 
  107 static void usage(boolean_t requested)
  108 {
  109         const raidz_test_opts_t *o = &rto_opts_defaults;
  110 
  111         FILE *fp = requested ? stdout : stderr;
  112 
  113         (void) fprintf(fp, "Usage:\n"
  114             "\t[-a zio ashift (default: %zu)]\n"
  115             "\t[-o zio offset, exponent radix 2 (default: %zu)]\n"
  116             "\t[-d number of raidz data columns (default: %zu)]\n"
  117             "\t[-s zio size, exponent radix 2 (default: %zu)]\n"
  118             "\t[-S parameter sweep (default: %s)]\n"
  119             "\t[-t timeout for parameter sweep test]\n"
  120             "\t[-B benchmark all raidz implementations]\n"
  121             "\t[-e use expanded raidz map (default: %s)]\n"
  122             "\t[-r expanded raidz map reflow offset (default: %llx)]\n"
  123             "\t[-v increase verbosity (default: %d)]\n"
  124             "\t[-h (print help)]\n"
  125             "\t[-T test the test, see if failure would be detected]\n"
  126             "\t[-D debug (attach gdb on SIGSEGV)]\n"
  127             "",
  128             o->rto_ashift,                              /* -a */
  129             ilog2(o->rto_offset),                       /* -o */
  130             o->rto_dcols,                               /* -d */
  131             ilog2(o->rto_dsize),                        /* -s */
  132             rto_opts.rto_sweep ? "yes" : "no",          /* -S */
  133             rto_opts.rto_expand ? "yes" : "no",         /* -e */
  134             (u_longlong_t)o->rto_expand_offset,         /* -r */
  135             o->rto_v);                                  /* -v */
  136 
  137         exit(requested ? 0 : 1);
  138 }
  139 
  140 static void process_options(int argc, char **argv)
  141 {
  142         size_t value;
  143         int opt;
  144         raidz_test_opts_t *o = &rto_opts;
  145 
  146         memcpy(o, &rto_opts_defaults, sizeof (*o));
  147 
  148         while ((opt = getopt(argc, argv, "TDBSvha:er:o:d:s:t:")) != -1) {
  149                 switch (opt) {
  150                 case 'a':
  151                         value = strtoull(optarg, NULL, 0);
  152                         o->rto_ashift = MIN(13, MAX(9, value));
  153                         break;
  154                 case 'e':
  155                         o->rto_expand = 1;
  156                         break;
  157                 case 'r':
  158                         o->rto_expand_offset = strtoull(optarg, NULL, 0);
  159                         break;
  160                 case 'o':
  161                         value = strtoull(optarg, NULL, 0);
  162                         o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9;
  163                         break;
  164                 case 'd':
  165                         value = strtoull(optarg, NULL, 0);
  166                         o->rto_dcols = MIN(255, MAX(1, value));
  167                         break;
  168                 case 's':
  169                         value = strtoull(optarg, NULL, 0);
  170                         o->rto_dsize = 1ULL <<  MIN(SPA_MAXBLOCKSHIFT,
  171                             MAX(SPA_MINBLOCKSHIFT, value));
  172                         break;
  173                 case 't':
  174                         value = strtoull(optarg, NULL, 0);
  175                         o->rto_sweep_timeout = value;
  176                         break;
  177                 case 'v':
  178                         o->rto_v++;
  179                         break;
  180                 case 'S':
  181                         o->rto_sweep = 1;
  182                         break;
  183                 case 'B':
  184                         o->rto_benchmark = 1;
  185                         break;
  186                 case 'D':
  187                         o->rto_gdb = 1;
  188                         break;
  189                 case 'T':
  190                         o->rto_sanity = 1;
  191                         break;
  192                 case 'h':
  193                         usage(B_TRUE);
  194                         break;
  195                 case '?':
  196                 default:
  197                         usage(B_FALSE);
  198                         break;
  199                 }
  200         }
  201 }
  202 
  203 #define DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd)
  204 #define DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size)
  205 
  206 #define CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd)
  207 #define CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size)
  208 
  209 static int
  210 cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
  211 {
  212         int r, i, ret = 0;
  213 
  214         VERIFY(parity >= 1 && parity <= 3);
  215 
  216         for (r = 0; r < rm->rm_nrows; r++) {
  217                 raidz_row_t * const rr = rm->rm_row[r];
  218                 raidz_row_t * const rrg = opts->rm_golden->rm_row[r];
  219                 for (i = 0; i < parity; i++) {
  220                         if (CODE_COL_SIZE(rrg, i) == 0) {
  221                                 VERIFY0(CODE_COL_SIZE(rr, i));
  222                                 continue;
  223                         }
  224 
  225                         if (abd_cmp(CODE_COL(rr, i),
  226                             CODE_COL(rrg, i)) != 0) {
  227                                 ret++;
  228                                 LOG_OPT(D_DEBUG, opts,
  229                                     "\nParity block [%d] different!\n", i);
  230                         }
  231                 }
  232         }
  233         return (ret);
  234 }
  235 
  236 static int
  237 cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
  238 {
  239         int r, i, dcols, ret = 0;
  240 
  241         for (r = 0; r < rm->rm_nrows; r++) {
  242                 raidz_row_t *rr = rm->rm_row[r];
  243                 raidz_row_t *rrg = opts->rm_golden->rm_row[r];
  244                 dcols = opts->rm_golden->rm_row[0]->rr_cols -
  245                     raidz_parity(opts->rm_golden);
  246                 for (i = 0; i < dcols; i++) {
  247                         if (DATA_COL_SIZE(rrg, i) == 0) {
  248                                 VERIFY0(DATA_COL_SIZE(rr, i));
  249                                 continue;
  250                         }
  251 
  252                         if (abd_cmp(DATA_COL(rrg, i),
  253                             DATA_COL(rr, i)) != 0) {
  254                                 ret++;
  255 
  256                                 LOG_OPT(D_DEBUG, opts,
  257                                     "\nData block [%d] different!\n", i);
  258                         }
  259                 }
  260         }
  261         return (ret);
  262 }
  263 
  264 static int
  265 init_rand(void *data, size_t size, void *private)
  266 {
  267         (void) private;
  268         memcpy(data, rand_data, size);
  269         return (0);
  270 }
  271 
  272 static void
  273 corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)
  274 {
  275         for (int r = 0; r < rm->rm_nrows; r++) {
  276                 raidz_row_t *rr = rm->rm_row[r];
  277                 for (int i = 0; i < cnt; i++) {
  278                         raidz_col_t *col = &rr->rr_col[tgts[i]];
  279                         abd_iterate_func(col->rc_abd, 0, col->rc_size,
  280                             init_rand, NULL);
  281                 }
  282         }
  283 }
  284 
  285 void
  286 init_zio_abd(zio_t *zio)
  287 {
  288         abd_iterate_func(zio->io_abd, 0, zio->io_size, init_rand, NULL);
  289 }
  290 
  291 static void
  292 fini_raidz_map(zio_t **zio, raidz_map_t **rm)
  293 {
  294         vdev_raidz_map_free(*rm);
  295         raidz_free((*zio)->io_abd, (*zio)->io_size);
  296         umem_free(*zio, sizeof (zio_t));
  297 
  298         *zio = NULL;
  299         *rm = NULL;
  300 }
  301 
  302 static int
  303 init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
  304 {
  305         int err = 0;
  306         zio_t *zio_test;
  307         raidz_map_t *rm_test;
  308         const size_t total_ncols = opts->rto_dcols + parity;
  309 
  310         if (opts->rm_golden) {
  311                 fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
  312         }
  313 
  314         opts->zio_golden = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
  315         zio_test = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
  316 
  317         opts->zio_golden->io_offset = zio_test->io_offset = opts->rto_offset;
  318         opts->zio_golden->io_size = zio_test->io_size = opts->rto_dsize;
  319 
  320         opts->zio_golden->io_abd = raidz_alloc(opts->rto_dsize);
  321         zio_test->io_abd = raidz_alloc(opts->rto_dsize);
  322 
  323         init_zio_abd(opts->zio_golden);
  324         init_zio_abd(zio_test);
  325 
  326         VERIFY0(vdev_raidz_impl_set("original"));
  327 
  328         if (opts->rto_expand) {
  329                 opts->rm_golden =
  330                     vdev_raidz_map_alloc_expanded(opts->zio_golden->io_abd,
  331                     opts->zio_golden->io_size, opts->zio_golden->io_offset,
  332                     opts->rto_ashift, total_ncols+1, total_ncols,
  333                     parity, opts->rto_expand_offset);
  334                 rm_test = vdev_raidz_map_alloc_expanded(zio_test->io_abd,
  335                     zio_test->io_size, zio_test->io_offset,
  336                     opts->rto_ashift, total_ncols+1, total_ncols,
  337                     parity, opts->rto_expand_offset);
  338         } else {
  339                 opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
  340                     opts->rto_ashift, total_ncols, parity);
  341                 rm_test = vdev_raidz_map_alloc(zio_test,
  342                     opts->rto_ashift, total_ncols, parity);
  343         }
  344 
  345         VERIFY(opts->zio_golden);
  346         VERIFY(opts->rm_golden);
  347 
  348         vdev_raidz_generate_parity(opts->rm_golden);
  349         vdev_raidz_generate_parity(rm_test);
  350 
  351         /* sanity check */
  352         err |= cmp_data(opts, rm_test);
  353         err |= cmp_code(opts, rm_test, parity);
  354 
  355         if (err)
  356                 ERR("initializing the golden copy ... [FAIL]!\n");
  357 
  358         /* tear down raidz_map of test zio */
  359         fini_raidz_map(&zio_test, &rm_test);
  360 
  361         return (err);
  362 }
  363 
  364 /*
  365  * If reflow is not in progress, reflow_offset should be UINT64_MAX.
  366  * For each row, if the row is entirely before reflow_offset, it will
  367  * come from the new location.  Otherwise this row will come from the
  368  * old location.  Therefore, rows that straddle the reflow_offset will
  369  * come from the old location.
  370  *
  371  * NOTE: Until raidz expansion is implemented this function is only
  372  * needed by raidz_test.c to the multi-row raid_map_t functionality.
  373  */
  374 raidz_map_t *
  375 vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset,
  376     uint64_t ashift, uint64_t physical_cols, uint64_t logical_cols,
  377     uint64_t nparity, uint64_t reflow_offset)
  378 {
  379         /* The zio's size in units of the vdev's minimum sector size. */
  380         uint64_t s = size >> ashift;
  381         uint64_t q, r, bc, devidx, asize = 0, tot;
  382 
  383         /*
  384          * "Quotient": The number of data sectors for this stripe on all but
  385          * the "big column" child vdevs that also contain "remainder" data.
  386          * AKA "full rows"
  387          */
  388         q = s / (logical_cols - nparity);
  389 
  390         /*
  391          * "Remainder": The number of partial stripe data sectors in this I/O.
  392          * This will add a sector to some, but not all, child vdevs.
  393          */
  394         r = s - q * (logical_cols - nparity);
  395 
  396         /* The number of "big columns" - those which contain remainder data. */
  397         bc = (r == 0 ? 0 : r + nparity);
  398 
  399         /*
  400          * The total number of data and parity sectors associated with
  401          * this I/O.
  402          */
  403         tot = s + nparity * (q + (r == 0 ? 0 : 1));
  404 
  405         /* How many rows contain data (not skip) */
  406         uint64_t rows = howmany(tot, logical_cols);
  407         int cols = MIN(tot, logical_cols);
  408 
  409         raidz_map_t *rm = kmem_zalloc(offsetof(raidz_map_t, rm_row[rows]),
  410             KM_SLEEP);
  411         rm->rm_nrows = rows;
  412 
  413         for (uint64_t row = 0; row < rows; row++) {
  414                 raidz_row_t *rr = kmem_alloc(offsetof(raidz_row_t,
  415                     rr_col[cols]), KM_SLEEP);
  416                 rm->rm_row[row] = rr;
  417 
  418                 /* The starting RAIDZ (parent) vdev sector of the row. */
  419                 uint64_t b = (offset >> ashift) + row * logical_cols;
  420 
  421                 /*
  422                  * If we are in the middle of a reflow, and any part of this
  423                  * row has not been copied, then use the old location of
  424                  * this row.
  425                  */
  426                 int row_phys_cols = physical_cols;
  427                 if (b + (logical_cols - nparity) > reflow_offset >> ashift)
  428                         row_phys_cols--;
  429 
  430                 /* starting child of this row */
  431                 uint64_t child_id = b % row_phys_cols;
  432                 /* The starting byte offset on each child vdev. */
  433                 uint64_t child_offset = (b / row_phys_cols) << ashift;
  434 
  435                 /*
  436                  * We set cols to the entire width of the block, even
  437                  * if this row is shorter.  This is needed because parity
  438                  * generation (for Q and R) needs to know the entire width,
  439                  * because it treats the short row as though it was
  440                  * full-width (and the "phantom" sectors were zero-filled).
  441                  *
  442                  * Another approach to this would be to set cols shorter
  443                  * (to just the number of columns that we might do i/o to)
  444                  * and have another mechanism to tell the parity generation
  445                  * about the "entire width".  Reconstruction (at least
  446                  * vdev_raidz_reconstruct_general()) would also need to
  447                  * know about the "entire width".
  448                  */
  449                 rr->rr_cols = cols;
  450                 rr->rr_bigcols = bc;
  451                 rr->rr_missingdata = 0;
  452                 rr->rr_missingparity = 0;
  453                 rr->rr_firstdatacol = nparity;
  454                 rr->rr_abd_empty = NULL;
  455                 rr->rr_nempty = 0;
  456 
  457                 for (int c = 0; c < rr->rr_cols; c++, child_id++) {
  458                         if (child_id >= row_phys_cols) {
  459                                 child_id -= row_phys_cols;
  460                                 child_offset += 1ULL << ashift;
  461                         }
  462                         rr->rr_col[c].rc_devidx = child_id;
  463                         rr->rr_col[c].rc_offset = child_offset;
  464                         rr->rr_col[c].rc_orig_data = NULL;
  465                         rr->rr_col[c].rc_error = 0;
  466                         rr->rr_col[c].rc_tried = 0;
  467                         rr->rr_col[c].rc_skipped = 0;
  468                         rr->rr_col[c].rc_need_orig_restore = B_FALSE;
  469 
  470                         uint64_t dc = c - rr->rr_firstdatacol;
  471                         if (c < rr->rr_firstdatacol) {
  472                                 rr->rr_col[c].rc_size = 1ULL << ashift;
  473                                 rr->rr_col[c].rc_abd =
  474                                     abd_alloc_linear(rr->rr_col[c].rc_size,
  475                                     B_TRUE);
  476                         } else if (row == rows - 1 && bc != 0 && c >= bc) {
  477                                 /*
  478                                  * Past the end, this for parity generation.
  479                                  */
  480                                 rr->rr_col[c].rc_size = 0;
  481                                 rr->rr_col[c].rc_abd = NULL;
  482                         } else {
  483                                 /*
  484                                  * "data column" (col excluding parity)
  485                                  * Add an ASCII art diagram here
  486                                  */
  487                                 uint64_t off;
  488 
  489                                 if (c < bc || r == 0) {
  490                                         off = dc * rows + row;
  491                                 } else {
  492                                         off = r * rows +
  493                                             (dc - r) * (rows - 1) + row;
  494                                 }
  495                                 rr->rr_col[c].rc_size = 1ULL << ashift;
  496                                 rr->rr_col[c].rc_abd = abd_get_offset_struct(
  497                                     &rr->rr_col[c].rc_abdstruct,
  498                                     abd, off << ashift, 1 << ashift);
  499                         }
  500 
  501                         asize += rr->rr_col[c].rc_size;
  502                 }
  503                 /*
  504                  * If all data stored spans all columns, there's a danger that
  505                  * parity will always be on the same device and, since parity
  506                  * isn't read during normal operation, that that device's I/O
  507                  * bandwidth won't be used effectively. We therefore switch
  508                  * the parity every 1MB.
  509                  *
  510                  * ...at least that was, ostensibly, the theory. As a practical
  511                  * matter unless we juggle the parity between all devices
  512                  * evenly, we won't see any benefit. Further, occasional writes
  513                  * that aren't a multiple of the LCM of the number of children
  514                  * and the minimum stripe width are sufficient to avoid pessimal
  515                  * behavior. Unfortunately, this decision created an implicit
  516                  * on-disk format requirement that we need to support for all
  517                  * eternity, but only for single-parity RAID-Z.
  518                  *
  519                  * If we intend to skip a sector in the zeroth column for
  520                  * padding we must make sure to note this swap. We will never
  521                  * intend to skip the first column since at least one data and
  522                  * one parity column must appear in each row.
  523                  */
  524                 if (rr->rr_firstdatacol == 1 && rr->rr_cols > 1 &&
  525                     (offset & (1ULL << 20))) {
  526                         ASSERT(rr->rr_cols >= 2);
  527                         ASSERT(rr->rr_col[0].rc_size == rr->rr_col[1].rc_size);
  528                         devidx = rr->rr_col[0].rc_devidx;
  529                         uint64_t o = rr->rr_col[0].rc_offset;
  530                         rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx;
  531                         rr->rr_col[0].rc_offset = rr->rr_col[1].rc_offset;
  532                         rr->rr_col[1].rc_devidx = devidx;
  533                         rr->rr_col[1].rc_offset = o;
  534                 }
  535 
  536         }
  537         ASSERT3U(asize, ==, tot << ashift);
  538 
  539         /* init RAIDZ parity ops */
  540         rm->rm_ops = vdev_raidz_math_get_ops();
  541 
  542         return (rm);
  543 }
  544 
  545 static raidz_map_t *
  546 init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
  547 {
  548         raidz_map_t *rm = NULL;
  549         const size_t alloc_dsize = opts->rto_dsize;
  550         const size_t total_ncols = opts->rto_dcols + parity;
  551         const int ccols[] = { 0, 1, 2 };
  552 
  553         VERIFY(zio);
  554         VERIFY(parity <= 3 && parity >= 1);
  555 
  556         *zio = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
  557 
  558         (*zio)->io_offset = 0;
  559         (*zio)->io_size = alloc_dsize;
  560         (*zio)->io_abd = raidz_alloc(alloc_dsize);
  561         init_zio_abd(*zio);
  562 
  563         if (opts->rto_expand) {
  564                 rm = vdev_raidz_map_alloc_expanded((*zio)->io_abd,
  565                     (*zio)->io_size, (*zio)->io_offset,
  566                     opts->rto_ashift, total_ncols+1, total_ncols,
  567                     parity, opts->rto_expand_offset);
  568         } else {
  569                 rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
  570                     total_ncols, parity);
  571         }
  572         VERIFY(rm);
  573 
  574         /* Make sure code columns are destroyed */
  575         corrupt_colums(rm, ccols, parity);
  576 
  577         return (rm);
  578 }
  579 
  580 static int
  581 run_gen_check(raidz_test_opts_t *opts)
  582 {
  583         char **impl_name;
  584         int fn, err = 0;
  585         zio_t *zio_test;
  586         raidz_map_t *rm_test;
  587 
  588         err = init_raidz_golden_map(opts, PARITY_PQR);
  589         if (0 != err)
  590                 return (err);
  591 
  592         LOG(D_INFO, DBLSEP);
  593         LOG(D_INFO, "Testing parity generation...\n");
  594 
  595         for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
  596             impl_name++) {
  597 
  598                 LOG(D_INFO, SEP);
  599                 LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
  600 
  601                 if (0 != vdev_raidz_impl_set(*impl_name)) {
  602                         LOG(D_INFO, "[SKIP]\n");
  603                         continue;
  604                 } else {
  605                         LOG(D_INFO, "[SUPPORTED]\n");
  606                 }
  607 
  608                 for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
  609 
  610                         /* Check if should stop */
  611                         if (rto_opts.rto_should_stop)
  612                                 return (err);
  613 
  614                         /* create suitable raidz_map */
  615                         rm_test = init_raidz_map(opts, &zio_test, fn+1);
  616                         VERIFY(rm_test);
  617 
  618                         LOG(D_INFO, "\t\tTesting method [%s] ...",
  619                             raidz_gen_name[fn]);
  620 
  621                         if (!opts->rto_sanity)
  622                                 vdev_raidz_generate_parity(rm_test);
  623 
  624                         if (cmp_code(opts, rm_test, fn+1) != 0) {
  625                                 LOG(D_INFO, "[FAIL]\n");
  626                                 err++;
  627                         } else
  628                                 LOG(D_INFO, "[PASS]\n");
  629 
  630                         fini_raidz_map(&zio_test, &rm_test);
  631                 }
  632         }
  633 
  634         fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
  635 
  636         return (err);
  637 }
  638 
  639 static int
  640 run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
  641 {
  642         int x0, x1, x2;
  643         int tgtidx[3];
  644         int err = 0;
  645         static const int rec_tgts[7][3] = {
  646                 {1, 2, 3},      /* rec_p:   bad QR & D[0]       */
  647                 {0, 2, 3},      /* rec_q:   bad PR & D[0]       */
  648                 {0, 1, 3},      /* rec_r:   bad PQ & D[0]       */
  649                 {2, 3, 4},      /* rec_pq:  bad R  & D[0][1]    */
  650                 {1, 3, 4},      /* rec_pr:  bad Q  & D[0][1]    */
  651                 {0, 3, 4},      /* rec_qr:  bad P  & D[0][1]    */
  652                 {3, 4, 5}       /* rec_pqr: bad    & D[0][1][2] */
  653         };
  654 
  655         memcpy(tgtidx, rec_tgts[fn], sizeof (tgtidx));
  656 
  657         if (fn < RAIDZ_REC_PQ) {
  658                 /* can reconstruct 1 failed data disk */
  659                 for (x0 = 0; x0 < opts->rto_dcols; x0++) {
  660                         if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
  661                                 continue;
  662 
  663                         /* Check if should stop */
  664                         if (rto_opts.rto_should_stop)
  665                                 return (err);
  666 
  667                         LOG(D_DEBUG, "[%d] ", x0);
  668 
  669                         tgtidx[2] = x0 + raidz_parity(rm);
  670 
  671                         corrupt_colums(rm, tgtidx+2, 1);
  672 
  673                         if (!opts->rto_sanity)
  674                                 vdev_raidz_reconstruct(rm, tgtidx, 3);
  675 
  676                         if (cmp_data(opts, rm) != 0) {
  677                                 err++;
  678                                 LOG(D_DEBUG, "\nREC D[%d]... [FAIL]\n", x0);
  679                         }
  680                 }
  681 
  682         } else if (fn < RAIDZ_REC_PQR) {
  683                 /* can reconstruct 2 failed data disk */
  684                 for (x0 = 0; x0 < opts->rto_dcols; x0++) {
  685                         if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
  686                                 continue;
  687                         for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
  688                                 if (x1 >= rm->rm_row[0]->rr_cols -
  689                                     raidz_parity(rm))
  690                                         continue;
  691 
  692                                 /* Check if should stop */
  693                                 if (rto_opts.rto_should_stop)
  694                                         return (err);
  695 
  696                                 LOG(D_DEBUG, "[%d %d] ", x0, x1);
  697 
  698                                 tgtidx[1] = x0 + raidz_parity(rm);
  699                                 tgtidx[2] = x1 + raidz_parity(rm);
  700 
  701                                 corrupt_colums(rm, tgtidx+1, 2);
  702 
  703                                 if (!opts->rto_sanity)
  704                                         vdev_raidz_reconstruct(rm, tgtidx, 3);
  705 
  706                                 if (cmp_data(opts, rm) != 0) {
  707                                         err++;
  708                                         LOG(D_DEBUG, "\nREC D[%d %d]... "
  709                                             "[FAIL]\n", x0, x1);
  710                                 }
  711                         }
  712                 }
  713         } else {
  714                 /* can reconstruct 3 failed data disk */
  715                 for (x0 = 0; x0 < opts->rto_dcols; x0++) {
  716                         if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
  717                                 continue;
  718                         for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
  719                                 if (x1 >= rm->rm_row[0]->rr_cols -
  720                                     raidz_parity(rm))
  721                                         continue;
  722                                 for (x2 = x1 + 1; x2 < opts->rto_dcols; x2++) {
  723                                         if (x2 >= rm->rm_row[0]->rr_cols -
  724                                             raidz_parity(rm))
  725                                                 continue;
  726 
  727                                         /* Check if should stop */
  728                                         if (rto_opts.rto_should_stop)
  729                                                 return (err);
  730 
  731                                         LOG(D_DEBUG, "[%d %d %d]", x0, x1, x2);
  732 
  733                                         tgtidx[0] = x0 + raidz_parity(rm);
  734                                         tgtidx[1] = x1 + raidz_parity(rm);
  735                                         tgtidx[2] = x2 + raidz_parity(rm);
  736 
  737                                         corrupt_colums(rm, tgtidx, 3);
  738 
  739                                         if (!opts->rto_sanity)
  740                                                 vdev_raidz_reconstruct(rm,
  741                                                     tgtidx, 3);
  742 
  743                                         if (cmp_data(opts, rm) != 0) {
  744                                                 err++;
  745                                                 LOG(D_DEBUG,
  746                                                     "\nREC D[%d %d %d]... "
  747                                                     "[FAIL]\n", x0, x1, x2);
  748                                         }
  749                                 }
  750                         }
  751                 }
  752         }
  753         return (err);
  754 }
  755 
  756 static int
  757 run_rec_check(raidz_test_opts_t *opts)
  758 {
  759         char **impl_name;
  760         unsigned fn, err = 0;
  761         zio_t *zio_test;
  762         raidz_map_t *rm_test;
  763 
  764         err = init_raidz_golden_map(opts, PARITY_PQR);
  765         if (0 != err)
  766                 return (err);
  767 
  768         LOG(D_INFO, DBLSEP);
  769         LOG(D_INFO, "Testing data reconstruction...\n");
  770 
  771         for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
  772             impl_name++) {
  773 
  774                 LOG(D_INFO, SEP);
  775                 LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
  776 
  777                 if (vdev_raidz_impl_set(*impl_name) != 0) {
  778                         LOG(D_INFO, "[SKIP]\n");
  779                         continue;
  780                 } else
  781                         LOG(D_INFO, "[SUPPORTED]\n");
  782 
  783 
  784                 /* create suitable raidz_map */
  785                 rm_test = init_raidz_map(opts, &zio_test, PARITY_PQR);
  786                 /* generate parity */
  787                 vdev_raidz_generate_parity(rm_test);
  788 
  789                 for (fn = 0; fn < RAIDZ_REC_NUM; fn++) {
  790 
  791                         LOG(D_INFO, "\t\tTesting method [%s] ...",
  792                             raidz_rec_name[fn]);
  793 
  794                         if (run_rec_check_impl(opts, rm_test, fn) != 0) {
  795                                 LOG(D_INFO, "[FAIL]\n");
  796                                 err++;
  797 
  798                         } else
  799                                 LOG(D_INFO, "[PASS]\n");
  800 
  801                 }
  802                 /* tear down test raidz_map */
  803                 fini_raidz_map(&zio_test, &rm_test);
  804         }
  805 
  806         fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
  807 
  808         return (err);
  809 }
  810 
  811 static int
  812 run_test(raidz_test_opts_t *opts)
  813 {
  814         int err = 0;
  815 
  816         if (opts == NULL)
  817                 opts = &rto_opts;
  818 
  819         print_opts(opts, B_FALSE);
  820 
  821         err |= run_gen_check(opts);
  822         err |= run_rec_check(opts);
  823 
  824         return (err);
  825 }
  826 
  827 #define SWEEP_RUNNING   0
  828 #define SWEEP_FINISHED  1
  829 #define SWEEP_ERROR     2
  830 #define SWEEP_TIMEOUT   3
  831 
  832 static int sweep_state = 0;
  833 static raidz_test_opts_t failed_opts;
  834 
  835 static kmutex_t sem_mtx;
  836 static kcondvar_t sem_cv;
  837 static int max_free_slots;
  838 static int free_slots;
  839 
  840 static __attribute__((noreturn)) void
  841 sweep_thread(void *arg)
  842 {
  843         int err = 0;
  844         raidz_test_opts_t *opts = (raidz_test_opts_t *)arg;
  845         VERIFY(opts != NULL);
  846 
  847         err = run_test(opts);
  848 
  849         if (rto_opts.rto_sanity) {
  850                 /* 25% chance that a sweep test fails */
  851                 if (rand() < (RAND_MAX/4))
  852                         err = 1;
  853         }
  854 
  855         if (0 != err) {
  856                 mutex_enter(&sem_mtx);
  857                 memcpy(&failed_opts, opts, sizeof (raidz_test_opts_t));
  858                 sweep_state = SWEEP_ERROR;
  859                 mutex_exit(&sem_mtx);
  860         }
  861 
  862         umem_free(opts, sizeof (raidz_test_opts_t));
  863 
  864         /* signal the next thread */
  865         mutex_enter(&sem_mtx);
  866         free_slots++;
  867         cv_signal(&sem_cv);
  868         mutex_exit(&sem_mtx);
  869 
  870         thread_exit();
  871 }
  872 
  873 static int
  874 run_sweep(void)
  875 {
  876         static const size_t dcols_v[] = { 1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 16 };
  877         static const size_t ashift_v[] = { 9, 12, 14 };
  878         static const size_t size_v[] = { 1 << 9, 21 * (1 << 9), 13 * (1 << 12),
  879                 1 << 17, (1 << 20) - (1 << 12), SPA_MAXBLOCKSIZE };
  880 
  881         (void) setvbuf(stdout, NULL, _IONBF, 0);
  882 
  883         ulong_t total_comb = ARRAY_SIZE(size_v) * ARRAY_SIZE(ashift_v) *
  884             ARRAY_SIZE(dcols_v);
  885         ulong_t tried_comb = 0;
  886         hrtime_t time_diff, start_time = gethrtime();
  887         raidz_test_opts_t *opts;
  888         int a, d, s;
  889 
  890         max_free_slots = free_slots = MAX(2, boot_ncpus);
  891 
  892         mutex_init(&sem_mtx, NULL, MUTEX_DEFAULT, NULL);
  893         cv_init(&sem_cv, NULL, CV_DEFAULT, NULL);
  894 
  895         for (s = 0; s < ARRAY_SIZE(size_v); s++)
  896         for (a = 0; a < ARRAY_SIZE(ashift_v); a++)
  897         for (d = 0; d < ARRAY_SIZE(dcols_v); d++) {
  898 
  899                 if (size_v[s] < (1 << ashift_v[a])) {
  900                         total_comb--;
  901                         continue;
  902                 }
  903 
  904                 if (++tried_comb % 20 == 0)
  905                         LOG(D_ALL, "%lu/%lu... ", tried_comb, total_comb);
  906 
  907                 /* wait for signal to start new thread */
  908                 mutex_enter(&sem_mtx);
  909                 while (cv_timedwait_sig(&sem_cv, &sem_mtx,
  910                     ddi_get_lbolt() + hz)) {
  911 
  912                         /* check if should stop the test (timeout) */
  913                         time_diff = (gethrtime() - start_time) / NANOSEC;
  914                         if (rto_opts.rto_sweep_timeout > 0 &&
  915                             time_diff >= rto_opts.rto_sweep_timeout) {
  916                                 sweep_state = SWEEP_TIMEOUT;
  917                                 rto_opts.rto_should_stop = B_TRUE;
  918                                 mutex_exit(&sem_mtx);
  919                                 goto exit;
  920                         }
  921 
  922                         /* check if should stop the test (error) */
  923                         if (sweep_state != SWEEP_RUNNING) {
  924                                 mutex_exit(&sem_mtx);
  925                                 goto exit;
  926                         }
  927 
  928                         /* exit loop if a slot is available */
  929                         if (free_slots > 0) {
  930                                 break;
  931                         }
  932                 }
  933 
  934                 free_slots--;
  935                 mutex_exit(&sem_mtx);
  936 
  937                 opts = umem_zalloc(sizeof (raidz_test_opts_t), UMEM_NOFAIL);
  938                 opts->rto_ashift = ashift_v[a];
  939                 opts->rto_dcols = dcols_v[d];
  940                 opts->rto_offset = (1ULL << ashift_v[a]) * rand();
  941                 opts->rto_dsize = size_v[s];
  942                 opts->rto_expand = rto_opts.rto_expand;
  943                 opts->rto_expand_offset = rto_opts.rto_expand_offset;
  944                 opts->rto_v = 0; /* be quiet */
  945 
  946                 VERIFY3P(thread_create(NULL, 0, sweep_thread, (void *) opts,
  947                     0, NULL, TS_RUN, defclsyspri), !=, NULL);
  948         }
  949 
  950 exit:
  951         LOG(D_ALL, "\nWaiting for test threads to finish...\n");
  952         mutex_enter(&sem_mtx);
  953         VERIFY(free_slots <= max_free_slots);
  954         while (free_slots < max_free_slots) {
  955                 (void) cv_wait(&sem_cv, &sem_mtx);
  956         }
  957         mutex_exit(&sem_mtx);
  958 
  959         if (sweep_state == SWEEP_ERROR) {
  960                 ERR("Sweep test failed! Failed option: \n");
  961                 print_opts(&failed_opts, B_TRUE);
  962         } else {
  963                 if (sweep_state == SWEEP_TIMEOUT)
  964                         LOG(D_ALL, "Test timeout (%lus). Stopping...\n",
  965                             (ulong_t)rto_opts.rto_sweep_timeout);
  966 
  967                 LOG(D_ALL, "Sweep test succeeded on %lu raidz maps!\n",
  968                     (ulong_t)tried_comb);
  969         }
  970 
  971         mutex_destroy(&sem_mtx);
  972 
  973         return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0);
  974 }
  975 
  976 
  977 int
  978 main(int argc, char **argv)
  979 {
  980         size_t i;
  981         struct sigaction action;
  982         int err = 0;
  983 
  984         /* init gdb pid string early */
  985         (void) sprintf(pid_s, "%d", getpid());
  986 
  987         action.sa_handler = sig_handler;
  988         sigemptyset(&action.sa_mask);
  989         action.sa_flags = 0;
  990 
  991         if (sigaction(SIGSEGV, &action, NULL) < 0) {
  992                 ERR("raidz_test: cannot catch SIGSEGV: %s.\n", strerror(errno));
  993                 exit(EXIT_FAILURE);
  994         }
  995 
  996         (void) setvbuf(stdout, NULL, _IOLBF, 0);
  997 
  998         dprintf_setup(&argc, argv);
  999 
 1000         process_options(argc, argv);
 1001 
 1002         kernel_init(SPA_MODE_READ);
 1003 
 1004         /* setup random data because rand() is not reentrant */
 1005         rand_data = (int *)umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
 1006         srand((unsigned)time(NULL) * getpid());
 1007         for (i = 0; i < SPA_MAXBLOCKSIZE / sizeof (int); i++)
 1008                 rand_data[i] = rand();
 1009 
 1010         mprotect(rand_data, SPA_MAXBLOCKSIZE, PROT_READ);
 1011 
 1012         if (rto_opts.rto_benchmark) {
 1013                 run_raidz_benchmark();
 1014         } else if (rto_opts.rto_sweep) {
 1015                 err = run_sweep();
 1016         } else {
 1017                 err = run_test(NULL);
 1018         }
 1019 
 1020         umem_free(rand_data, SPA_MAXBLOCKSIZE);
 1021         kernel_fini();
 1022 
 1023         return (err);
 1024 }

Cache object: ca4b3634879a9158ec7c63338dbc2cea


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.