The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)

[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: rf_decluster.c,v 1.16 2004/02/29 04:03:50 oster Exp $  */
    2 /*
    3  * Copyright (c) 1995 Carnegie-Mellon University.
    4  * All rights reserved.
    5  *
    6  * Author: Mark Holland
    7  *
    8  * Permission to use, copy, modify and distribute this software and
    9  * its documentation is hereby granted, provided that both the copyright
   10  * notice and this permission notice appear in all copies of the
   11  * software, derivative works or modified versions, and any portions
   12  * thereof, and that both notices appear in supporting documentation.
   13  *
   17  *
   18  * Carnegie Mellon requests users of this software to return to
   19  *
   20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   21  *  School of Computer Science
   22  *  Carnegie Mellon University
   23  *  Pittsburgh PA 15213-3890
   24  *
   25  * any improvements or extensions that they make and grant Carnegie the
   26  * rights to redistribute these changes.
   27  */
   29 /*----------------------------------------------------------------------
   30  *
   31  * rf_decluster.c -- code related to the declustered layout
   32  *
   33  * Created 10-21-92 (MCH)
   34  *
   35  * Nov 93:  adding support for distributed sparing.  This code is a little
   36  *          complex:  the basic layout used is as follows:
   37  *          let F = (v-1)/GCD(r,v-1).  The spare space for each set of
   38  *          F consecutive fulltables is grouped together and placed after
   39  *          that set of tables.
   40  *                   +------------------------------+
   41  *                   |        F fulltables          |
   42  *                   |        Spare Space           |
   43  *                   |        F fulltables          |
   44  *                   |        Spare Space           |
   45  *                   |            ...               |
   46  *                   +------------------------------+
   47  *
   48  *--------------------------------------------------------------------*/
   50 #include <sys/cdefs.h>
   51 __KERNEL_RCSID(0, "$NetBSD: rf_decluster.c,v 1.16 2004/02/29 04:03:50 oster Exp $");
   53 #include <dev/raidframe/raidframevar.h>
   55 #include "rf_archs.h"
   56 #include "rf_raid.h"
   57 #include "rf_decluster.h"
   58 #include "rf_debugMem.h"
   59 #include "rf_utils.h"
   60 #include "rf_alloclist.h"
   61 #include "rf_general.h"
   62 #include "rf_kintf.h"
   63 #include "rf_shutdown.h"
   67 /* configuration code */
   69 int 
   70 rf_ConfigureDeclustered(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
   71                         RF_Config_t *cfgPtr)
   72 {
   73         RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
   74         int     b, v, k, r, lambda;     /* block design params */
   75         int     i, j;
   76         RF_RowCol_t *first_avail_slot;
   77         RF_StripeCount_t complete_FT_count, numCompleteFullTablesPerDisk;
   78         RF_DeclusteredConfigInfo_t *info;
   79         RF_StripeCount_t PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk,
   80                 extraPUsPerDisk;
   81         RF_StripeCount_t totSparePUsPerDisk;
   82         RF_SectorNum_t diskOffsetOfLastFullTableInSUs;
   83         RF_SectorCount_t SpareSpaceInSUs;
   84         char   *cfgBuf = (char *) (cfgPtr->layoutSpecific);
   85         RF_StripeNum_t l, SUID;
   87         SUID = l = 0;
   88         numCompleteSpareRegionsPerDisk = 0;
   90         /* 1. create layout specific structure */
   91         RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList);
   92         if (info == NULL)
   93                 return (ENOMEM);
   94         layoutPtr->layoutSpecificInfo = (void *) info;
   95         info->SpareTable = NULL;
   97         /* 2. extract parameters from the config structure */
   98         if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) {
   99                 (void)memcpy(info->sparemap_fname, cfgBuf, RF_SPAREMAP_NAME_LEN);
  100         }
  101         cfgBuf += RF_SPAREMAP_NAME_LEN;
  103         b = *((int *) cfgBuf);
  104         cfgBuf += sizeof(int);
  105         v = *((int *) cfgBuf);
  106         cfgBuf += sizeof(int);
  107         k = *((int *) cfgBuf);
  108         cfgBuf += sizeof(int);
  109         r = *((int *) cfgBuf);
  110         cfgBuf += sizeof(int);
  111         lambda = *((int *) cfgBuf);
  112         cfgBuf += sizeof(int);
  113         raidPtr->noRotate = *((int *) cfgBuf);
  114         cfgBuf += sizeof(int);
  116         /* the sparemaps are generated assuming that parity is rotated, so we
  117          * issue a warning if both distributed sparing and no-rotate are on at
  118          * the same time */
  119         if ((layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) {
  120                 RF_ERRORMSG("Warning:  distributed sparing specified without parity rotation.\n");
  121         }
  122         if (raidPtr->numCol != v) {
  123                 RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol);
  124                 return (EINVAL);
  125         }
  126         /* 3.  set up the values used in the mapping code */
  127         info->BlocksPerTable = b;
  128         info->Lambda = lambda;
  129         info->NumParityReps = info->groupSize = k;
  130         info->SUsPerTable = b * (k - 1) * layoutPtr->SUsPerPU;  /* b blks, k-1 SUs each */
  131         info->SUsPerFullTable = k * info->SUsPerTable;  /* rot k times */
  132         info->PUsPerBlock = k - 1;
  133         info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU;
  134         info->TableDepthInPUs = (b * k) / v;
  135         info->FullTableDepthInPUs = info->TableDepthInPUs * k;  /* k repetitions */
  137         /* used only in distributed sparing case */
  138         info->FullTablesPerSpareRegion = (v - 1) / rf_gcd(r, v - 1);    /* (v-1)/gcd fulltables */
  139         info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion;
  140         info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v - 1)) * layoutPtr->SUsPerPU;
  142         /* check to make sure the block design is sufficiently small */
  143         if ((raidPtr->>flags & RF_DISTRIBUTE_SPARE)) {
  144                 if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) {
  145                         RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n",
  146                             (int) info->FullTableDepthInPUs,
  147                             (int) info->SpareSpaceDepthPerRegionInSUs,
  148                             (int) layoutPtr->stripeUnitsPerDisk);
  149                         return (EINVAL);
  150                 }
  151         } else {
  152                 if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) {
  153                         RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n",
  154                             (int) (info->TableDepthInPUs * layoutPtr->SUsPerPU), \
  155                             (int) layoutPtr->stripeUnitsPerDisk);
  156                         return (EINVAL);
  157                 }
  158         }
  161         /* compute the size of each disk, and the number of tables in the last
  162          * fulltable (which need not be complete) */
  163         if (raidPtr->>flags & RF_DISTRIBUTE_SPARE) {
  165                 PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU;
  166                 spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs +
  167                     (info->TablesPerSpareRegion * info->TableDepthInPUs) / (v - 1));
  168                 info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU;
  170                 numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs;
  171                 info->NumCompleteSRs = numCompleteSpareRegionsPerDisk;
  172                 extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs;
  174                 /* assume conservatively that we need the full amount of spare
  175                  * space in one region in order to provide spares for the
  176                  * partial spare region at the end of the array.  We set "i"
  177                  * to the number of tables in the partial spare region.  This
  178                  * may actually include some fulltables. */
  179                 extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
  180                 if (extraPUsPerDisk <= 0)
  181                         i = 0;
  182                 else
  183                         i = extraPUsPerDisk / info->TableDepthInPUs;
  185                 complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion / k) + i / k);
  186                 info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
  187                 info->ExtraTablesPerDisk = i % k;
  189                 /* note that in the last spare region, the spare space is
  190                  * complete even though data/parity space is not */
  191                 totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk + 1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
  192                 info->TotSparePUsPerDisk = totSparePUsPerDisk;
  194                 layoutPtr->stripeUnitsPerDisk =
  195                     ((complete_FT_count / raidPtr->numRow) * info->FullTableDepthInPUs +        /* data & parity space */
  196                     info->ExtraTablesPerDisk * info->TableDepthInPUs +
  197                     totSparePUsPerDisk  /* spare space */
  198                     ) * layoutPtr->SUsPerPU;
  199                 layoutPtr->dataStripeUnitsPerDisk =
  200                     (complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs)
  201                     * layoutPtr->SUsPerPU * (k - 1) / k;
  203         } else {
  204                 /* non-dist spare case:  force each disk to contain an
  205                  * integral number of tables */
  206                 layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
  207                 layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
  209                 /* compute the number of tables in the last fulltable, which
  210                  * need not be complete */
  211                 complete_FT_count =
  212                     ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow;
  214                 info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
  215                 info->ExtraTablesPerDisk =
  216                     ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k;
  217         }
  219         raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
  221         /* find the disk offset of the stripe unit where the last fulltable
  222          * starts */
  223         numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow;
  224         diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
  225         if (raidPtr->>flags & RF_DISTRIBUTE_SPARE) {
  226                 SpareSpaceInSUs = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs;
  227                 diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs;
  228                 info->DiskOffsetOfLastSpareSpaceChunkInSUs =
  229                     diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU;
  230         }
  231         info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs;
  232         info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk;
  234         /* 4.  create and initialize the lookup tables */
  235         info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
  236         if (info->LayoutTable == NULL)
  237                 return (ENOMEM);
  238         info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
  239         if (info->OffsetTable == NULL)
  240                 return (ENOMEM);
  241         info->BlockTable = rf_make_2d_array(info->TableDepthInPUs * layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList);
  242         if (info->BlockTable == NULL)
  243                 return (ENOMEM);
  245         first_avail_slot = rf_make_1d_array(v, NULL);
  246         if (first_avail_slot == NULL)
  247                 return (ENOMEM);
  249         for (i = 0; i < b; i++)
  250                 for (j = 0; j < k; j++)
  251                         info->LayoutTable[i][j] = *cfgBuf++;
  253         /* initialize offset table */
  254         for (i = 0; i < b; i++)
  255                 for (j = 0; j < k; j++) {
  256                         info->OffsetTable[i][j] = first_avail_slot[info->LayoutTable[i][j]];
  257                         first_avail_slot[info->LayoutTable[i][j]]++;
  258                 }
  260         /* initialize block table */
  261         for (SUID = l = 0; l < layoutPtr->SUsPerPU; l++) {
  262                 for (i = 0; i < b; i++) {
  263                         for (j = 0; j < k; j++) {
  264                                 info->BlockTable[(info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l]
  265                                     [info->LayoutTable[i][j]] = SUID;
  266                         }
  267                         SUID++;
  268                 }
  269         }
  271         rf_free_1d_array(first_avail_slot, v);
  273         /* 5.  set up the remaining redundant-but-useful parameters */
  275         raidPtr->totalSectors = (k * complete_FT_count + raidPtr->numRow * info->ExtraTablesPerDisk) *
  276             info->SUsPerTable * layoutPtr->sectorsPerStripeUnit;
  277         layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k - 1);
  279         /* strange evaluation order below to try and minimize overflow
  280          * problems */
  282         layoutPtr->dataSectorsPerStripe = (k - 1) * layoutPtr->sectorsPerStripeUnit;
  283         layoutPtr->numDataCol = k - 1;
  284         layoutPtr->numParityCol = 1;
  286         return (0);
  287 }
  288 /* declustering with distributed sparing */
  289 static void rf_ShutdownDeclusteredDS(RF_ThreadArg_t);
  290 static void 
  291 rf_ShutdownDeclusteredDS(RF_ThreadArg_t arg)
  292 {
  293         RF_DeclusteredConfigInfo_t *info;
  294         RF_Raid_t *raidPtr;
  296         raidPtr = (RF_Raid_t *) arg;
  297         info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
  298         if (info->SpareTable)
  299                 rf_FreeSpareTable(raidPtr);
  300 }
  302 int 
  303 rf_ConfigureDeclusteredDS(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
  304                           RF_Config_t *cfgPtr)
  305 {
  306         int     rc;
  308         rc = rf_ConfigureDeclustered(listp, raidPtr, cfgPtr);
  309         if (rc)
  310                 return (rc);
  311         rf_ShutdownCreate(listp, rf_ShutdownDeclusteredDS, raidPtr);
  313         return (0);
  314 }
  316 void 
  317 rf_MapSectorDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
  318                         RF_RowCol_t *row, RF_RowCol_t *col, 
  319                         RF_SectorNum_t *diskSector, int remap)
  320 {
  321         RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
  322         RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
  323         RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
  324         RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
  325         RF_StripeNum_t BlockID, BlockOffset, RepIndex;
  326         RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
  327         RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
  328         RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0;
  330         rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
  332         FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array
  333                                                  * (across rows) */
  334         if (raidPtr->numRow == 1)
  335                 *row = 0;       /* avoid a mod and a div in the common case */
  336         else {
  337                 *row = FullTableID % raidPtr->numRow;
  338                 FullTableID /= raidPtr->numRow; /* convert to fulltable ID on
  339                                                  * this disk */
  340         }
  341         if (raidPtr->>flags & RF_DISTRIBUTE_SPARE) {
  342                 SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
  343                 SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
  344         }
  345         FullTableOffset = SUID % sus_per_fulltable;
  346         TableID = FullTableOffset / info->SUsPerTable;
  347         TableOffset = FullTableOffset - TableID * info->SUsPerTable;
  348         BlockID = TableOffset / info->PUsPerBlock;
  349         BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
  350         BlockID %= info->BlocksPerTable;
  351         RepIndex = info->PUsPerBlock - TableID;
  352         if (!raidPtr->noRotate)
  353                 BlockOffset += ((BlockOffset >= RepIndex) ? 1 : 0);
  354         *col = info->LayoutTable[BlockID][BlockOffset];
  356         /* remap to distributed spare space if indicated */
  357         if (remap) {
  358                 RF_ASSERT(raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
  359                     (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal));
  360                 rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
  361         } else {
  363                 outSU = base_suid;
  364                 outSU += FullTableID * fulltable_depth; /* offs to strt of FT */
  365                 outSU += SpareSpace;    /* skip rsvd spare space */
  366                 outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU; /* offs to strt of tble */
  367                 outSU += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU; /* offs to the PU */
  368         }
  369         outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);      /* offs to the SU within
  370                                                                                  * a PU */
  372         /* convert SUs to sectors, and, if not aligned to SU boundary, add in
  373          * offset to sector.  */
  374         *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
  376         RF_ASSERT(*col != -1);
  377 }
  380 /* prototyping this inexplicably causes the compile of the layout table (rf_layout.c) to fail */
  381 void 
  382 rf_MapParityDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
  383                         RF_RowCol_t *row, RF_RowCol_t *col,
  384                         RF_SectorNum_t *diskSector, int remap)
  385 {
  386         RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
  387         RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
  388         RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
  389         RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
  390         RF_StripeNum_t BlockID, BlockOffset, RepIndex;
  391         RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
  392         RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
  393         RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0;
  395         rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
  397         /* compute row & (possibly) spare space exactly as before */
  398         FullTableID = SUID / sus_per_fulltable;
  399         if (raidPtr->numRow == 1)
  400                 *row = 0;       /* avoid a mod and a div in the common case */
  401         else {
  402                 *row = FullTableID % raidPtr->numRow;
  403                 FullTableID /= raidPtr->numRow; /* convert to fulltable ID on
  404                                                  * this disk */
  405         }
  406         if ((raidPtr->>flags & RF_DISTRIBUTE_SPARE)) {
  407                 SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
  408                 SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
  409         }
  410         /* compute BlockID and RepIndex exactly as before */
  411         FullTableOffset = SUID % sus_per_fulltable;
  412         TableID = FullTableOffset / info->SUsPerTable;
  413         TableOffset = FullTableOffset - TableID * info->SUsPerTable;
  414         /* TableOffset     = FullTableOffset % info->SUsPerTable; */
  415         /* BlockID         = (TableOffset / info->PUsPerBlock) %
  416          * info->BlocksPerTable; */
  417         BlockID = TableOffset / info->PUsPerBlock;
  418         /* BlockOffset     = TableOffset % info->PUsPerBlock; */
  419         BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
  420         BlockID %= info->BlocksPerTable;
  422         /* the parity block is in the position indicated by RepIndex */
  423         RepIndex = (raidPtr->noRotate) ? info->PUsPerBlock : info->PUsPerBlock - TableID;
  424         *col = info->LayoutTable[BlockID][RepIndex];
  426         if (remap) {
  427                 RF_ASSERT(raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
  428                     (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal));
  429                 rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
  430         } else {
  432                 /* compute sector as before, except use RepIndex instead of
  433                  * BlockOffset */
  434                 outSU = base_suid;
  435                 outSU += FullTableID * fulltable_depth;
  436                 outSU += SpareSpace;    /* skip rsvd spare space */
  437                 outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
  438                 outSU += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU;
  439         }
  441         outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
  442         *diskSector = outSU * layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
  444         RF_ASSERT(*col != -1);
  445 }
  446 /* returns an array of ints identifying the disks that comprise the stripe containing the indicated address.
  447  * the caller must _never_ attempt to modify this array.
  448  */
  449 void 
  450 rf_IdentifyStripeDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
  451                              RF_RowCol_t **diskids, RF_RowCol_t *outRow)
  452 {
  453         RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
  454         RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
  455         RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
  456         RF_StripeCount_t fulltable_depth = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
  457         RF_StripeNum_t base_suid = 0;
  458         RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr);
  459         RF_StripeNum_t stripeID, FullTableID;
  460         int     tableOffset;
  462         rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
  463         FullTableID = SUID / sus_per_fulltable; /* fulltable ID within array
  464                                                  * (across rows) */
  465         *outRow = FullTableID % raidPtr->numRow;
  466         stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID);  /* find stripe offset
  467                                                                  * into array */
  468         tableOffset = (stripeID % info->BlocksPerTable);        /* find offset into
  469                                                                  * block design table */
  470         *diskids = info->LayoutTable[tableOffset];
  471 }
  472 /* This returns the default head-separation limit, which is measured
  473  * in "required units for reconstruction".  Each time a disk fetches
  474  * a unit, it bumps a counter.  The head-sep code prohibits any disk
  475  * from getting more than headSepLimit counter values ahead of any
  476  * other.
  477  *
  478  * We assume here that the number of floating recon buffers is already
  479  * set.  There are r stripes to be reconstructed in each table, and so
  480  * if we have a total of B buffers, we can have at most B/r tables
  481  * under recon at any one time.  In each table, lambda units are required
  482  * from each disk, so given B buffers, the head sep limit has to be
  483  * (lambda*B)/r units.  We subtract one to avoid weird boundary cases.
  484  *
  485  * for example, suppose were given 50 buffers, r=19, and lambda=4 as in
  486  * the 20.5 design.  There are 19 stripes/table to be reconstructed, so
  487  * we can have 50/19 tables concurrently under reconstruction, which means
  488  * we can allow the fastest disk to get 50/19 tables ahead of the slower
  489  * disk.  There are lambda "required units" for each disk, so the fastest
  490  * disk can get 4*50/19 = 10 counter values ahead of the slowest.
  491  *
  492  * If numBufsToAccumulate is not 1, we need to limit the head sep further
  493  * because multiple bufs will be required for each stripe under recon.
  494  */
  495 RF_HeadSepLimit_t 
  496 rf_GetDefaultHeadSepLimitDeclustered(RF_Raid_t *raidPtr)
  497 {
  498         RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
  500         return (info->Lambda * raidPtr->numFloatingReconBufs / info->TableDepthInPUs / rf_numBufsToAccumulate);
  501 }
  502 /* returns the default number of recon buffers to use.  The value
  503  * is somewhat's intended to be large enough to allow
  504  * for a reasonably large head-sep limit, but small enough that you
  505  * don't use up all your system memory with buffers.
  506  */
  507 int 
  508 rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t * raidPtr)
  509 {
  510         return (100 * rf_numBufsToAccumulate);
  511 }
  512 /* sectors in the last fulltable of the array need to be handled
  513  * specially since this fulltable can be incomplete.  this function
  514  * changes the values of certain params to handle this.
  515  *
  516  * the idea here is that MapSector et. al. figure out which disk the
  517  * addressed unit lives on by computing the modulos of the unit number
  518  * with the number of units per fulltable, table, etc.  In the last
  519  * fulltable, there are fewer units per fulltable, so we need to adjust
  520  * the number of user data units per fulltable to reflect this.
  521  *
  522  * so, we (1) convert the fulltable size and depth parameters to
  523  * the size of the partial fulltable at the end, (2) compute the
  524  * disk sector offset where this fulltable starts, and (3) convert
  525  * the users stripe unit number from an offset into the array to
  526  * an offset into the last fulltable.
  527  */
  528 void 
  529 rf_decluster_adjust_params(RF_RaidLayout_t *layoutPtr,
  530                            RF_StripeNum_t *SUID,
  531                            RF_StripeCount_t *sus_per_fulltable,
  532                            RF_StripeCount_t *fulltable_depth,
  533                            RF_StripeNum_t *base_suid)
  534 {
  535         RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
  537         if (*SUID >= info->FullTableLimitSUID) {
  538                 /* new full table size is size of last full table on disk */
  539                 *sus_per_fulltable = info->ExtraTablesPerDisk * info->SUsPerTable;
  541                 /* new full table depth is corresponding depth */
  542                 *fulltable_depth = info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU;
  544                 /* set up the new base offset */
  545                 *base_suid = info->DiskOffsetOfLastFullTableInSUs;
  547                 /* convert users array address to an offset into the last
  548                  * fulltable */
  549                 *SUID -= info->FullTableLimitSUID;
  550         }
  551 }
  552 /*
  553  * map a stripe ID to a parity stripe ID.
  554  * See comment above RaidAddressToParityStripeID in layout.c.
  555  */
  556 void 
  557 rf_MapSIDToPSIDDeclustered(RF_RaidLayout_t *layoutPtr,
  558                            RF_StripeNum_t stripeID,
  559                            RF_StripeNum_t *psID,
  560                            RF_ReconUnitNum_t *which_ru)
  561 {
  562         RF_DeclusteredConfigInfo_t *info;
  564         info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
  566         *psID = (stripeID / (layoutPtr->SUsPerPU * info->BlocksPerTable))
  567             * info->BlocksPerTable + (stripeID % info->BlocksPerTable);
  568         *which_ru = (stripeID % (info->BlocksPerTable * layoutPtr->SUsPerPU))
  569             / info->BlocksPerTable;
  570         RF_ASSERT((*which_ru) < layoutPtr->SUsPerPU / layoutPtr->SUsPerRU);
  571 }
  572 /*
  573  * Called from MapSector and MapParity to retarget an access at the spare unit.
  574  * Modifies the "col" and "outSU" parameters only.
  575  */
  576 void 
  577 rf_remap_to_spare_space(RF_RaidLayout_t *layoutPtr,
  578                         RF_DeclusteredConfigInfo_t *info,
  579                         RF_RowCol_t row,
  580                         RF_StripeNum_t FullTableID,
  581                         RF_StripeNum_t TableID,
  582                         RF_SectorNum_t BlockID,
  583                         RF_StripeNum_t base_suid,
  584                         RF_StripeNum_t SpareRegion,
  585                         RF_RowCol_t *outCol,
  586                         RF_StripeNum_t *outSU)
  587 {
  588         RF_StripeNum_t ftID, spareTableStartSU, TableInSpareRegion, lastSROffset,
  589                 which_ft;
  591         /*
  592          * note that FullTableID and hence SpareRegion may have gotten
  593          * tweaked by rf_decluster_adjust_params. We detect this by
  594          * noticing that base_suid is not 0.
  595          */
  596         if (base_suid == 0) {
  597                 ftID = FullTableID;
  598         } else {
  599                 /*
  600                  * There may be > 1.0 full tables in the last (i.e. partial)
  601                  * spare region.  find out which of these we're in.
  602                  */
  603                 lastSROffset = info->NumCompleteSRs * info->SpareRegionDepthInSUs;
  604                 which_ft = (info->DiskOffsetOfLastFullTableInSUs - lastSROffset) / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU);
  606                 /* compute the actual full table ID */
  607                 ftID = info->DiskOffsetOfLastFullTableInSUs / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU) + which_ft;
  608                 SpareRegion = info->NumCompleteSRs;
  609         }
  610         TableInSpareRegion = (ftID * info->NumParityReps + TableID) % info->TablesPerSpareRegion;
  612         *outCol = info->SpareTable[TableInSpareRegion][BlockID].spareDisk;
  613         RF_ASSERT(*outCol != -1);
  615         spareTableStartSU = (SpareRegion == info->NumCompleteSRs) ?
  616             info->DiskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU :
  617             (SpareRegion + 1) * info->SpareRegionDepthInSUs - info->SpareSpaceDepthPerRegionInSUs;
  618         *outSU = spareTableStartSU + info->SpareTable[TableInSpareRegion][BlockID].spareBlockOffsetInSUs;
  619         if (*outSU >= layoutPtr->stripeUnitsPerDisk) {
  620                 printf("rf_remap_to_spare_space: invalid remapped disk SU offset %ld\n", (long) *outSU);
  621         }
  622 }
  627 int 
  628 rf_InstallSpareTable(RF_Raid_t *raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol)
  629 {
  630         RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
  631         RF_SparetWait_t *req;
  632         int     retcode;
  634         RF_Malloc(req, sizeof(*req), (RF_SparetWait_t *));
  635         req->C = raidPtr->numCol;
  636         req->G = raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol;
  637         req->fcol = fcol;
  638         req->SUsPerPU = raidPtr->Layout.SUsPerPU;
  639         req->TablesPerSpareRegion = info->TablesPerSpareRegion;
  640         req->BlocksPerTable = info->BlocksPerTable;
  641         req->TableDepthInPUs = info->TableDepthInPUs;
  642         req->SpareSpaceDepthPerRegionInSUs = info->SpareSpaceDepthPerRegionInSUs;
  644         retcode = rf_GetSpareTableFromDaemon(req);
  645         RF_ASSERT(!retcode);    /* XXX -- fix this to recover gracefully --
  646                                  * XXX */
  647         return (retcode);
  648 }
  649 #endif
  651 /*
  652  * Invoked via ioctl to install a spare table in the kernel.
  653  */
  654 int 
  655 rf_SetSpareTable(RF_Raid_t *raidPtr, void *data)
  656 {
  657         RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
  658         RF_SpareTableEntry_t **ptrs;
  659         int     i, retcode;
  661         /* what we need to copyin is a 2-d array, so first copyin the user
  662          * pointers to the rows in the table */
  663         RF_Malloc(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
  664         retcode = copyin((caddr_t) data, (caddr_t) ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
  666         if (retcode)
  667                 return (retcode);
  669         /* now allocate kernel space for the row pointers */
  670         RF_Malloc(info->SpareTable, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
  672         /* now allocate kernel space for each row in the table, and copy it in
  673          * from user space */
  674         for (i = 0; i < info->TablesPerSpareRegion; i++) {
  675                 RF_Malloc(info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t), (RF_SpareTableEntry_t *));
  676                 retcode = copyin(ptrs[i], info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));
  677                 if (retcode) {
  678                         info->SpareTable = NULL;        /* blow off the memory
  679                                                          * we've allocated */
  680                         return (retcode);
  681                 }
  682         }
  684         /* free up the temporary array we used */
  685         RF_Free(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
  687         return (0);
  688 }
  690 RF_ReconUnitCount_t 
  691 rf_GetNumSpareRUsDeclustered(RF_Raid_t *raidPtr)
  692 {
  693         RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
  695         return (((RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo)->TotSparePUsPerDisk);
  696 }
  699 void 
  700 rf_FreeSpareTable(RF_Raid_t *raidPtr)
  701 {
  702         long    i;
  703         RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
  704         RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
  705         RF_SpareTableEntry_t **table = info->SpareTable;
  707         for (i = 0; i < info->TablesPerSpareRegion; i++) {
  708                 RF_Free(table[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));
  709         }
  710         RF_Free(table, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
  711         info->SpareTable = (RF_SpareTableEntry_t **) NULL;
  712 }

Cache object: eb51118625e9ba79f193e8ea33158d16

[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.