The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/raidframe/rf_disks.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: rf_disks.c,v 1.51.2.2 2004/08/30 08:42:48 tron Exp $   */
    2 /*-
    3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
    4  * All rights reserved.
    5  *
    6  * This code is derived from software contributed to The NetBSD Foundation
    7  * by Greg Oster
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. All advertising materials mentioning features or use of this software
   18  *    must display the following acknowledgement:
   19  *        This product includes software developed by the NetBSD
   20  *        Foundation, Inc. and its contributors.
   21  * 4. Neither the name of The NetBSD Foundation nor the names of its
   22  *    contributors may be used to endorse or promote products derived
   23  *    from this software without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   35  * POSSIBILITY OF SUCH DAMAGE.
   36  */
   37 
   38 /*
   39  * Copyright (c) 1995 Carnegie-Mellon University.
   40  * All rights reserved.
   41  *
   42  * Author: Mark Holland
   43  *
   44  * Permission to use, copy, modify and distribute this software and
   45  * its documentation is hereby granted, provided that both the copyright
   46  * notice and this permission notice appear in all copies of the
   47  * software, derivative works or modified versions, and any portions
   48  * thereof, and that both notices appear in supporting documentation.
   49  *
   50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   53  *
   54  * Carnegie Mellon requests users of this software to return to
   55  *
   56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   57  *  School of Computer Science
   58  *  Carnegie Mellon University
   59  *  Pittsburgh PA 15213-3890
   60  *
   61  * any improvements or extensions that they make and grant Carnegie the
   62  * rights to redistribute these changes.
   63  */
   64 
   65 /***************************************************************
   66  * rf_disks.c -- code to perform operations on the actual disks
   67  ***************************************************************/
   68 
   69 #include <sys/cdefs.h>
   70 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.51.2.2 2004/08/30 08:42:48 tron Exp $");
   71 
   72 #include <dev/raidframe/raidframevar.h>
   73 
   74 #include "rf_raid.h"
   75 #include "rf_alloclist.h"
   76 #include "rf_utils.h"
   77 #include "rf_general.h"
   78 #include "rf_options.h"
   79 #include "rf_kintf.h"
   80 #include "rf_netbsd.h"
   81 
   82 #include <sys/param.h>
   83 #include <sys/systm.h>
   84 #include <sys/proc.h>
   85 #include <sys/ioctl.h>
   86 #include <sys/fcntl.h>
   87 #include <sys/vnode.h>
   88 
   89 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
   90 static void rf_print_label_status( RF_Raid_t *, int, char *, 
   91                                   RF_ComponentLabel_t *);
   92 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *, 
   93                                   RF_ComponentLabel_t *, int, int );
   94 
   95 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
   96 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
   97 
   98 /**************************************************************************
   99  *
  100  * initialize the disks comprising the array
  101  *
  102  * We want the spare disks to have regular row,col numbers so that we can 
  103  * easily substitue a spare for a failed disk.  But, the driver code assumes 
  104  * throughout that the array contains numRow by numCol _non-spare_ disks, so 
  105  * it's not clear how to fit in the spares.  This is an unfortunate holdover
  106  * from raidSim.  The quick and dirty fix is to make row zero bigger than the 
  107  * rest, and put all the spares in it.  This probably needs to get changed 
  108  * eventually.
  109  *
  110  **************************************************************************/
  111 
  112 int 
  113 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
  114                   RF_Config_t *cfgPtr)
  115 {
  116         RF_RaidDisk_t *disks;
  117         RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
  118         RF_RowCol_t c;
  119         int bs, ret;
  120         unsigned i, count, foundone = 0, numFailuresThisRow;
  121         int force;
  122 
  123         force = cfgPtr->force;
  124 
  125         ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
  126         if (ret)
  127                 goto fail;
  128 
  129         disks = raidPtr->Disks;
  130 
  131         numFailuresThisRow = 0;
  132         for (c = 0; c < raidPtr->numCol; c++) {
  133                 ret = rf_ConfigureDisk(raidPtr, 
  134                                        &cfgPtr->devnames[0][c][0],
  135                                        &disks[c], c);
  136                 
  137                 if (ret)
  138                         goto fail;
  139                 
  140                 if (disks[c].status == rf_ds_optimal) {
  141                         raidread_component_label(
  142                                                  raidPtr->raid_cinfo[c].ci_dev,
  143                                                  raidPtr->raid_cinfo[c].ci_vp,
  144                                                  &raidPtr->raid_cinfo[c].ci_label);
  145                 }
  146 
  147                 if (disks[c].status != rf_ds_optimal) {
  148                         numFailuresThisRow++;
  149                 } else {
  150                         if (disks[c].numBlocks < min_numblks)
  151                                 min_numblks = disks[c].numBlocks;
  152                         DPRINTF6("Disk at col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
  153                                  c, disks[c].devname,
  154                                  (long int) disks[c].numBlocks,
  155                                  disks[c].blockSize,
  156                                  (long int) disks[c].numBlocks *
  157                                  disks[c].blockSize / 1024 / 1024);
  158                 }
  159         }
  160         /* XXX fix for n-fault tolerant */
  161         /* XXX this should probably check to see how many failures
  162            we can handle for this configuration! */
  163         if (numFailuresThisRow > 0)
  164                 raidPtr->status = rf_rs_degraded;
  165 
  166         /* all disks must be the same size & have the same block size, bs must
  167          * be a power of 2 */
  168         bs = 0;
  169         foundone = 0;
  170         for (c = 0; c < raidPtr->numCol; c++) {
  171                 if (disks[c].status == rf_ds_optimal) {
  172                         bs = disks[c].blockSize;
  173                         foundone = 1;
  174                         break;
  175                 }
  176         }
  177         if (!foundone) {
  178                 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
  179                 ret = EINVAL;
  180                 goto fail;
  181         }
  182         for (count = 0, i = 1; i; i <<= 1)
  183                 if (bs & i)
  184                         count++;
  185         if (count != 1) {
  186                 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
  187                 ret = EINVAL;
  188                 goto fail;
  189         }
  190 
  191         if (rf_CheckLabels( raidPtr, cfgPtr )) {
  192                 printf("raid%d: There were fatal errors\n", raidPtr->raidid);
  193                 if (force != 0) {
  194                         printf("raid%d: Fatal errors being ignored.\n",
  195                                raidPtr->raidid);
  196                 } else {
  197                         ret = EINVAL;
  198                         goto fail;
  199                 } 
  200         }
  201 
  202         for (c = 0; c < raidPtr->numCol; c++) {
  203                 if (disks[c].status == rf_ds_optimal) {
  204                         if (disks[c].blockSize != bs) {
  205                                 RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
  206                                 ret = EINVAL;
  207                                 goto fail;
  208                         }
  209                         if (disks[c].numBlocks != min_numblks) {
  210                                 RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
  211                                              c, (int) min_numblks);
  212                                 disks[c].numBlocks = min_numblks;
  213                         }
  214                 }
  215         }
  216 
  217         raidPtr->sectorsPerDisk = min_numblks;
  218         raidPtr->logBytesPerSector = ffs(bs) - 1;
  219         raidPtr->bytesPerSector = bs;
  220         raidPtr->sectorMask = bs - 1;
  221         return (0);
  222 
  223 fail:
  224         
  225         rf_UnconfigureVnodes( raidPtr );
  226 
  227         return (ret);
  228 }
  229 
  230 
  231 /****************************************************************************
  232  * set up the data structures describing the spare disks in the array
  233  * recall from the above comment that the spare disk descriptors are stored
  234  * in row zero, which is specially expanded to hold them.
  235  ****************************************************************************/
  236 int 
  237 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
  238                        RF_Config_t *cfgPtr)
  239 {
  240         int     i, ret;
  241         unsigned int bs;
  242         RF_RaidDisk_t *disks;
  243         int     num_spares_done;
  244 
  245         num_spares_done = 0;
  246 
  247         /* The space for the spares should have already been allocated by
  248          * ConfigureDisks() */
  249 
  250         disks = &raidPtr->Disks[raidPtr->numCol];
  251         for (i = 0; i < raidPtr->numSpare; i++) {
  252                 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
  253                                        &disks[i], raidPtr->numCol + i);
  254                 if (ret)
  255                         goto fail;
  256                 if (disks[i].status != rf_ds_optimal) {
  257                         RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 
  258                                      &cfgPtr->spare_names[i][0]);
  259                 } else {
  260                         disks[i].status = rf_ds_spare;  /* change status to
  261                                                          * spare */
  262                         DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
  263                             disks[i].devname,
  264                             (long int) disks[i].numBlocks, disks[i].blockSize,
  265                             (long int) disks[i].numBlocks * 
  266                                  disks[i].blockSize / 1024 / 1024);
  267                 }
  268                 num_spares_done++;
  269         }
  270 
  271         /* check sizes and block sizes on spare disks */
  272         bs = 1 << raidPtr->logBytesPerSector;
  273         for (i = 0; i < raidPtr->numSpare; i++) {
  274                 if (disks[i].blockSize != bs) {
  275                         RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
  276                         ret = EINVAL;
  277                         goto fail;
  278                 }
  279                 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
  280                         RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
  281                                      disks[i].devname, disks[i].blockSize, 
  282                                      (long int) raidPtr->sectorsPerDisk);
  283                         ret = EINVAL;
  284                         goto fail;
  285                 } else
  286                         if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
  287                                 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
  288 
  289                                 disks[i].numBlocks = raidPtr->sectorsPerDisk;
  290                         }
  291         }
  292 
  293         return (0);
  294 
  295 fail:
  296 
  297         /* Release the hold on the main components.  We've failed to allocate
  298          * a spare, and since we're failing, we need to free things.. 
  299                  
  300          XXX failing to allocate a spare is *not* that big of a deal... 
  301          We *can* survive without it, if need be, esp. if we get hot
  302          adding working.  
  303 
  304          If we don't fail out here, then we need a way to remove this spare... 
  305          that should be easier to do here than if we are "live"... 
  306 
  307          */
  308 
  309         rf_UnconfigureVnodes( raidPtr );
  310         
  311         return (ret);
  312 }
  313 
  314 static int
  315 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
  316 {
  317         int ret;
  318 
  319         /* We allocate RF_MAXSPARE on the first row so that we
  320            have room to do hot-swapping of spares */
  321         RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) *
  322                         sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), 
  323                         raidPtr->cleanupList);
  324         if (raidPtr->Disks == NULL) {
  325                 ret = ENOMEM;
  326                 goto fail;
  327         }
  328 
  329         /* get space for device specific stuff.. */
  330         RF_MallocAndAdd(raidPtr->raid_cinfo,
  331                         (raidPtr->numCol + RF_MAXSPARE) * 
  332                         sizeof(struct raidcinfo), (struct raidcinfo *),
  333                         raidPtr->cleanupList);
  334 
  335         if (raidPtr->raid_cinfo == NULL) {
  336                 ret = ENOMEM;
  337                 goto fail;
  338         }
  339 
  340         return(0);
  341 fail:   
  342         rf_UnconfigureVnodes( raidPtr );
  343 
  344         return(ret);
  345 }
  346 
  347 
  348 /* configure a single disk during auto-configuration at boot */
  349 int
  350 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
  351                       RF_AutoConfig_t *auto_config)
  352 {
  353         RF_RaidDisk_t *disks;
  354         RF_RaidDisk_t *diskPtr;
  355         RF_RowCol_t c;  
  356         RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
  357         int bs, ret;
  358         int numFailuresThisRow;
  359         RF_AutoConfig_t *ac;
  360         int parity_good;
  361         int mod_counter;
  362         int mod_counter_found;
  363 
  364 #if DEBUG
  365         printf("Starting autoconfiguration of RAID set...\n");
  366 #endif
  367 
  368         ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
  369         if (ret)
  370                 goto fail;
  371 
  372         disks = raidPtr->Disks;
  373 
  374         /* assume the parity will be fine.. */
  375         parity_good = RF_RAID_CLEAN;
  376 
  377         /* Check for mod_counters that are too low */
  378         mod_counter_found = 0;
  379         mod_counter = 0;
  380         ac = auto_config;
  381         while(ac!=NULL) {
  382                 if (mod_counter_found==0) {
  383                         mod_counter = ac->clabel->mod_counter;
  384                         mod_counter_found = 1;
  385                 } else {
  386                         if (ac->clabel->mod_counter > mod_counter) {
  387                                 mod_counter = ac->clabel->mod_counter;
  388                         }
  389                 }
  390                 ac->flag = 0; /* clear the general purpose flag */
  391                 ac = ac->next;
  392         }
  393 
  394         bs = 0;
  395 
  396         numFailuresThisRow = 0;
  397         for (c = 0; c < raidPtr->numCol; c++) {
  398                 diskPtr = &disks[c];
  399                 
  400                 /* find this row/col in the autoconfig */
  401 #if DEBUG
  402                 printf("Looking for %d in autoconfig\n",c);
  403 #endif
  404                 ac = auto_config;
  405                 while(ac!=NULL) {
  406                         if (ac->clabel==NULL) {
  407                                 /* big-time bad news. */
  408                                 goto fail;
  409                         }
  410                         if ((ac->clabel->column == c) &&
  411                             (ac->clabel->mod_counter == mod_counter)) {
  412                                 /* it's this one... */
  413                                 /* flag it as 'used', so we don't
  414                                    free it later. */
  415                                 ac->flag = 1;
  416 #if DEBUG
  417                                 printf("Found: %s at %d\n",
  418                                        ac->devname,c);
  419 #endif
  420                                 
  421                                 break;
  422                         }
  423                         ac=ac->next;
  424                 }
  425                 
  426                 if (ac==NULL) {
  427                         /* we didn't find an exact match with a
  428                            correct mod_counter above... can we find
  429                            one with an incorrect mod_counter to use
  430                            instead?  (this one, if we find it, will be
  431                            marked as failed once the set configures) 
  432                         */
  433 
  434                         ac = auto_config;
  435                         while(ac!=NULL) {
  436                                 if (ac->clabel==NULL) {
  437                                         /* big-time bad news. */
  438                                         goto fail;
  439                                 }
  440                                 if (ac->clabel->column == c) {
  441                                         /* it's this one... 
  442                                            flag it as 'used', so we 
  443                                            don't free it later. */
  444                                         ac->flag = 1;
  445 #if DEBUG
  446                                         printf("Found(low mod_counter): %s at %d\n",
  447                                                ac->devname,c);
  448 #endif
  449                                         
  450                                         break;
  451                                 }
  452                                 ac=ac->next;
  453                         }
  454                 }
  455 
  456 
  457 
  458                 if (ac!=NULL) {
  459                         /* Found it.  Configure it.. */
  460                         diskPtr->blockSize = ac->clabel->blockSize;
  461                         diskPtr->numBlocks = ac->clabel->numBlocks;
  462                         /* Note: rf_protectedSectors is already 
  463                            factored into numBlocks here */
  464                         raidPtr->raid_cinfo[c].ci_vp = ac->vp;
  465                         raidPtr->raid_cinfo[c].ci_dev = ac->dev;
  466                         
  467                         memcpy(&raidPtr->raid_cinfo[c].ci_label,
  468                                ac->clabel, sizeof(*ac->clabel));
  469                         sprintf(diskPtr->devname, "/dev/%s", 
  470                                 ac->devname);
  471                                 
  472                         /* note the fact that this component was
  473                            autoconfigured.  You'll need this info
  474                            later.  Trust me :) */
  475                         diskPtr->auto_configured = 1;
  476                         diskPtr->dev = ac->dev;
  477                         
  478                         /* 
  479                          * we allow the user to specify that
  480                          * only a fraction of the disks should
  481                          * be used this is just for debug: it
  482                          * speeds up the parity scan 
  483                          */
  484                         
  485                         diskPtr->numBlocks = diskPtr->numBlocks * 
  486                                 rf_sizePercentage / 100;
  487                         
  488                         /* XXX these will get set multiple times, 
  489                            but since we're autoconfiguring, they'd
  490                            better be always the same each time!
  491                            If not, this is the least of your worries */
  492 
  493                         bs = diskPtr->blockSize;
  494                         min_numblks = diskPtr->numBlocks;
  495                         
  496                         /* this gets done multiple times, but that's
  497                            fine -- the serial number will be the same
  498                            for all components, guaranteed */
  499                         raidPtr->serial_number = ac->clabel->serial_number;
  500                         /* check the last time the label was modified */
  501 
  502                         if (ac->clabel->mod_counter != mod_counter) {
  503                                 /* Even though we've filled in all of
  504                                    the above, we don't trust this
  505                                    component since it's modification
  506                                    counter is not in sync with the
  507                                    rest, and we really consider it to
  508                                    be failed.  */
  509                                 disks[c].status = rf_ds_failed;
  510                                 numFailuresThisRow++;
  511                         } else {
  512                                 if (ac->clabel->clean != RF_RAID_CLEAN) {
  513                                         parity_good = RF_RAID_DIRTY;
  514                                 }
  515                         }
  516                 } else {
  517                         /* Didn't find it at all!!  Component must
  518                            really be dead */
  519                         disks[c].status = rf_ds_failed;
  520                         sprintf(disks[c].devname, "component%d", c);
  521                         numFailuresThisRow++;
  522                 }
  523         }
  524         /* XXX fix for n-fault tolerant */
  525         /* XXX this should probably check to see how many failures
  526            we can handle for this configuration! */
  527         if (numFailuresThisRow > 0) {
  528                 raidPtr->status = rf_rs_degraded;
  529                 raidPtr->numFailures = numFailuresThisRow;
  530         }
  531         
  532         /* close the device for the ones that didn't get used */
  533 
  534         ac = auto_config;
  535         while(ac!=NULL) {
  536                 if (ac->flag == 0) {
  537                         vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
  538                         VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0);
  539                         vput(ac->vp);
  540                         ac->vp = NULL;
  541 #if DEBUG 
  542                         printf("Released %s from auto-config set.\n",
  543                                ac->devname);
  544 #endif
  545                 }
  546                 ac = ac->next;
  547         }
  548 
  549         raidPtr->mod_counter = mod_counter;
  550 
  551         /* note the state of the parity, if any */
  552         raidPtr->parity_good = parity_good;
  553         raidPtr->sectorsPerDisk = min_numblks;
  554         raidPtr->logBytesPerSector = ffs(bs) - 1;
  555         raidPtr->bytesPerSector = bs;
  556         raidPtr->sectorMask = bs - 1;
  557         return (0);
  558 
  559 fail:
  560         
  561         rf_UnconfigureVnodes( raidPtr );
  562 
  563         return (ret);
  564 
  565 }
  566 
  567 /* configure a single disk in the array */
  568 int 
  569 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *buf, RF_RaidDisk_t *diskPtr,
  570                  RF_RowCol_t col)
  571 {
  572         char   *p;
  573         struct partinfo dpart;
  574         struct vnode *vp;
  575         struct vattr va;
  576         struct proc *proc;
  577         int     error;
  578 
  579         p = rf_find_non_white(buf);
  580         if (p[strlen(p) - 1] == '\n') {
  581                 /* strip off the newline */
  582                 p[strlen(p) - 1] = '\0';
  583         }
  584         (void) strcpy(diskPtr->devname, p);
  585 
  586         proc = raidPtr->engine_thread;
  587 
  588         /* Let's start by claiming the component is fine and well... */
  589         diskPtr->status = rf_ds_optimal;
  590 
  591         raidPtr->raid_cinfo[col].ci_vp = NULL;
  592         raidPtr->raid_cinfo[col].ci_dev = 0;
  593 
  594         if (!strcmp("absent", diskPtr->devname)) {
  595                 printf("Ignoring missing component at column %d\n", col);
  596                 sprintf(diskPtr->devname, "component%d", col);
  597                 diskPtr->status = rf_ds_failed;
  598                 return (0);
  599         }
  600 
  601         error = raidlookup(diskPtr->devname, proc, &vp);
  602         if (error) {
  603                 printf("raidlookup on device: %s failed!\n", diskPtr->devname);
  604                 if (error == ENXIO) {
  605                         /* the component isn't there... must be dead :-( */
  606                         diskPtr->status = rf_ds_failed;
  607                 } else {
  608                         return (error);
  609                 }
  610         }
  611         if (diskPtr->status == rf_ds_optimal) {
  612 
  613                 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
  614                         return (error);
  615                 }
  616                 error = VOP_IOCTL(vp, DIOCGPART, &dpart,
  617                                   FREAD, proc->p_ucred, proc);
  618                 if (error) {
  619                         return (error);
  620                 }
  621                 
  622                 diskPtr->blockSize = dpart.disklab->d_secsize;
  623                 
  624                 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
  625                 diskPtr->partitionSize = dpart.part->p_size;
  626 
  627                 raidPtr->raid_cinfo[col].ci_vp = vp;
  628                 raidPtr->raid_cinfo[col].ci_dev = va.va_rdev;
  629                 
  630                 /* This component was not automatically configured */
  631                 diskPtr->auto_configured = 0;
  632                 diskPtr->dev = va.va_rdev;
  633                 
  634                 /* we allow the user to specify that only a fraction of the
  635                  * disks should be used this is just for debug:  it speeds up
  636                  * the parity scan */
  637                 diskPtr->numBlocks = diskPtr->numBlocks * 
  638                         rf_sizePercentage / 100;
  639         }
  640         return (0);
  641 }
  642 
  643 static void
  644 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
  645                       RF_ComponentLabel_t *ci_label)
  646 {
  647 
  648         printf("raid%d: Component %s being configured at col: %d\n", 
  649                raidPtr->raidid, dev_name, column );
  650         printf("         Column: %d Num Columns: %d\n",
  651                ci_label->column, 
  652                ci_label->num_columns);
  653         printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
  654                ci_label->version, ci_label->serial_number,
  655                ci_label->mod_counter);
  656         printf("         Clean: %s Status: %d\n",
  657                ci_label->clean ? "Yes" : "No", ci_label->status );
  658 }
  659 
  660 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
  661                                  char *dev_name, RF_ComponentLabel_t *ci_label,
  662                                  int serial_number, int mod_counter)
  663 {
  664         int fatal_error = 0;
  665 
  666         if (serial_number != ci_label->serial_number) {
  667                 printf("%s has a different serial number: %d %d\n", 
  668                        dev_name, serial_number, ci_label->serial_number);
  669                 fatal_error = 1;
  670         }
  671         if (mod_counter != ci_label->mod_counter) {
  672                 printf("%s has a different modfication count: %d %d\n",
  673                        dev_name, mod_counter, ci_label->mod_counter);
  674         }
  675         
  676         if (row != ci_label->row) {
  677                 printf("Row out of alignment for: %s\n", dev_name); 
  678                 fatal_error = 1;
  679         }
  680         if (column != ci_label->column) {
  681                 printf("Column out of alignment for: %s\n", dev_name);
  682                 fatal_error = 1;
  683         }
  684         if (raidPtr->numCol != ci_label->num_columns) {
  685                 printf("Number of columns do not match for: %s\n", dev_name);
  686                 fatal_error = 1;
  687         }
  688         if (ci_label->clean == 0) {
  689                 /* it's not clean, but that's not fatal */
  690                 printf("%s is not clean!\n", dev_name);
  691         }
  692         return(fatal_error);
  693 }
  694 
  695 
  696 /* 
  697 
  698    rf_CheckLabels() - check all the component labels for consistency.
  699    Return an error if there is anything major amiss.
  700 
  701  */
  702 
  703 int 
  704 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
  705 {
  706         int c;
  707         char *dev_name;
  708         RF_ComponentLabel_t *ci_label;
  709         int serial_number = 0;
  710         int mod_number = 0;
  711         int fatal_error = 0;
  712         int mod_values[4];
  713         int mod_count[4];
  714         int ser_values[4];
  715         int ser_count[4];
  716         int num_ser;
  717         int num_mod;
  718         int i;
  719         int found;
  720         int hosed_column;
  721         int too_fatal;
  722         int parity_good;
  723         int force;
  724 
  725         hosed_column = -1;
  726         too_fatal = 0;
  727         force = cfgPtr->force;
  728 
  729         /* 
  730            We're going to try to be a little intelligent here.  If one 
  731            component's label is bogus, and we can identify that it's the
  732            *only* one that's gone, we'll mark it as "failed" and allow
  733            the configuration to proceed.  This will be the *only* case
  734            that we'll proceed if there would be (otherwise) fatal errors.
  735            
  736            Basically we simply keep a count of how many components had
  737            what serial number.  If all but one agree, we simply mark
  738            the disagreeing component as being failed, and allow 
  739            things to come up "normally".
  740            
  741            We do this first for serial numbers, and then for "mod_counter".
  742 
  743          */
  744 
  745         num_ser = 0;
  746         num_mod = 0;
  747 
  748         for (c = 0; c < raidPtr->numCol; c++) {
  749                 ci_label = &raidPtr->raid_cinfo[c].ci_label;
  750                 found=0;
  751                 for(i=0;i<num_ser;i++) {
  752                         if (ser_values[i] == ci_label->serial_number) {
  753                                 ser_count[i]++;
  754                                 found=1;
  755                                 break;
  756                         }
  757                 }
  758                 if (!found) {
  759                         ser_values[num_ser] = ci_label->serial_number;
  760                         ser_count[num_ser] = 1;
  761                         num_ser++;
  762                         if (num_ser>2) {
  763                                 fatal_error = 1;
  764                                 break;
  765                         }
  766                 }
  767                 found=0;
  768                 for(i=0;i<num_mod;i++) {
  769                         if (mod_values[i] == ci_label->mod_counter) {
  770                                 mod_count[i]++;
  771                                 found=1;
  772                                 break;
  773                         }
  774                 }
  775                 if (!found) {
  776                         mod_values[num_mod] = ci_label->mod_counter;
  777                         mod_count[num_mod] = 1;
  778                         num_mod++;
  779                         if (num_mod>2) {
  780                                 fatal_error = 1;
  781                                 break;
  782                         }
  783                 }
  784         }
  785 #if DEBUG
  786         printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
  787         for(i=0;i<num_ser;i++) {
  788                 printf("%d %d\n", ser_values[i], ser_count[i]);
  789         }
  790         printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
  791         for(i=0;i<num_mod;i++) {
  792                 printf("%d %d\n", mod_values[i], mod_count[i]);
  793         }
  794 #endif
  795         serial_number = ser_values[0];
  796         if (num_ser == 2) {
  797                 if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
  798                         /* Locate the maverick component */
  799                         if (ser_count[1] > ser_count[0]) {
  800                                 serial_number = ser_values[1];
  801                         } 
  802 
  803                         for (c = 0; c < raidPtr->numCol; c++) {
  804                                 ci_label = &raidPtr->raid_cinfo[c].ci_label;
  805                                 if (serial_number != ci_label->serial_number) {
  806                                         hosed_column = c;
  807                                         break;
  808                                 }
  809                         }
  810                         printf("Hosed component: %s\n",
  811                                &cfgPtr->devnames[0][hosed_column][0]);
  812                         if (!force) {
  813                                 /* we'll fail this component, as if there are
  814                                    other major errors, we arn't forcing things
  815                                    and we'll abort the config anyways */
  816                                 raidPtr->Disks[hosed_column].status
  817                                         = rf_ds_failed;
  818                                 raidPtr->numFailures++;
  819                                 raidPtr->status = rf_rs_degraded;
  820                         }
  821                 } else {
  822                         too_fatal = 1;
  823                 }
  824                 if (cfgPtr->parityConfig == '') {
  825                         /* We've identified two different serial numbers. 
  826                            RAID 0 can't cope with that, so we'll punt */
  827                         too_fatal = 1;
  828                 }
  829 
  830         } 
  831 
  832         /* record the serial number for later.  If we bail later, setting
  833            this doesn't matter, otherwise we've got the best guess at the 
  834            correct serial number */
  835         raidPtr->serial_number = serial_number;
  836 
  837         mod_number = mod_values[0];
  838         if (num_mod == 2) {
  839                 if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
  840                         /* Locate the maverick component */
  841                         if (mod_count[1] > mod_count[0]) {
  842                                 mod_number = mod_values[1];
  843                         } else if (mod_count[1] < mod_count[0]) {
  844                                 mod_number = mod_values[0];
  845                         } else {
  846                                 /* counts of different modification values
  847                                    are the same.   Assume greater value is 
  848                                    the correct one, all other things 
  849                                    considered */
  850                                 if (mod_values[0] > mod_values[1]) {
  851                                         mod_number = mod_values[0];
  852                                 } else {
  853                                         mod_number = mod_values[1];
  854                                 }
  855                                 
  856                         }
  857 
  858                         for (c = 0; c < raidPtr->numCol; c++) {
  859                                 ci_label = &raidPtr->raid_cinfo[c].ci_label;
  860                                 if (mod_number != ci_label->mod_counter) {
  861                                         if (hosed_column == c) {
  862                                                 /* same one.  Can
  863                                                    deal with it.  */
  864                                         } else {
  865                                                 hosed_column = c;
  866                                                 if (num_ser != 1) {
  867                                                         too_fatal = 1;
  868                                                         break;
  869                                                 }
  870                                         }
  871                                 }
  872                         }
  873                         printf("Hosed component: %s\n",
  874                                &cfgPtr->devnames[0][hosed_column][0]);
  875                         if (!force) {
  876                                 /* we'll fail this component, as if there are
  877                                    other major errors, we arn't forcing things
  878                                    and we'll abort the config anyways */
  879                                 if (raidPtr->Disks[hosed_column].status != rf_ds_failed) {
  880                                         raidPtr->Disks[hosed_column].status
  881                                                 = rf_ds_failed;
  882                                         raidPtr->numFailures++;
  883                                         raidPtr->status = rf_rs_degraded;
  884                                 }
  885                         }
  886                 } else {
  887                         too_fatal = 1;
  888                 }
  889                 if (cfgPtr->parityConfig == '') {
  890                         /* We've identified two different mod counters.
  891                            RAID 0 can't cope with that, so we'll punt */
  892                         too_fatal = 1;
  893                 }
  894         } 
  895 
  896         raidPtr->mod_counter = mod_number;
  897 
  898         if (too_fatal) {
  899                 /* we've had both a serial number mismatch, and a mod_counter
  900                    mismatch -- and they involved two different components!!
  901                    Bail -- make things fail so that the user must force
  902                    the issue... */
  903                 hosed_column = -1;
  904                 fatal_error = 1;
  905         }
  906 
  907         if (num_ser > 2) {
  908                 printf("raid%d: Too many different serial numbers!\n", 
  909                        raidPtr->raidid);
  910                 fatal_error = 1;
  911         }
  912 
  913         if (num_mod > 2) {
  914                 printf("raid%d: Too many different mod counters!\n", 
  915                        raidPtr->raidid);
  916                 fatal_error = 1;
  917         }
  918 
  919         /* we start by assuming the parity will be good, and flee from
  920            that notion at the slightest sign of trouble */
  921 
  922         parity_good = RF_RAID_CLEAN;
  923 
  924         for (c = 0; c < raidPtr->numCol; c++) {
  925                 dev_name = &cfgPtr->devnames[0][c][0];
  926                 ci_label = &raidPtr->raid_cinfo[c].ci_label;
  927                 
  928                 if (c == hosed_column) {
  929                         printf("raid%d: Ignoring %s\n",
  930                                raidPtr->raidid, dev_name);
  931                 } else {                        
  932                         rf_print_label_status( raidPtr, c, dev_name, ci_label);
  933                         if (rf_check_label_vitals( raidPtr, 0, c, 
  934                                                    dev_name, ci_label,
  935                                                    serial_number, 
  936                                                    mod_number )) {
  937                                 fatal_error = 1;
  938                         }
  939                         if (ci_label->clean != RF_RAID_CLEAN) {
  940                                 parity_good = RF_RAID_DIRTY;
  941                         }
  942                 }
  943         }
  944         
  945         if (fatal_error) {
  946                 parity_good = RF_RAID_DIRTY;
  947         }
  948 
  949         /* we note the state of the parity */
  950         raidPtr->parity_good = parity_good;
  951 
  952         return(fatal_error);    
  953 }
  954 
  955 int
  956 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
  957 {
  958         RF_RaidDisk_t *disks;
  959         RF_DiskQueue_t *spareQueues;
  960         int ret;
  961         unsigned int bs;
  962         int spare_number;
  963 
  964         ret=0;
  965 
  966         if (raidPtr->numSpare >= RF_MAXSPARE) {
  967                 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
  968                 return(EINVAL);
  969         }
  970 
  971         RF_LOCK_MUTEX(raidPtr->mutex);
  972         while (raidPtr->adding_hot_spare==1) {
  973                 ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0,
  974                         &(raidPtr->mutex));
  975         }
  976         raidPtr->adding_hot_spare=1;
  977         RF_UNLOCK_MUTEX(raidPtr->mutex);
  978 
  979         /* the beginning of the spares... */
  980         disks = &raidPtr->Disks[raidPtr->numCol];
  981 
  982         spare_number = raidPtr->numSpare;
  983 
  984         ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
  985                                &disks[spare_number],
  986                                raidPtr->numCol + spare_number);
  987 
  988         if (ret)
  989                 goto fail;
  990         if (disks[spare_number].status != rf_ds_optimal) {
  991                 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 
  992                              sparePtr->component_name);
  993                 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
  994                 ret=EINVAL;
  995                 goto fail;
  996         } else {
  997                 disks[spare_number].status = rf_ds_spare;
  998                 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
  999                          disks[spare_number].devname,
 1000                          (long int) disks[spare_number].numBlocks, 
 1001                          disks[spare_number].blockSize,
 1002                          (long int) disks[spare_number].numBlocks * 
 1003                          disks[spare_number].blockSize / 1024 / 1024);
 1004         }
 1005         
 1006 
 1007         /* check sizes and block sizes on the spare disk */
 1008         bs = 1 << raidPtr->logBytesPerSector;
 1009         if (disks[spare_number].blockSize != bs) {
 1010                 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
 1011                 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
 1012                 ret = EINVAL;
 1013                 goto fail;
 1014         }
 1015         if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
 1016                 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
 1017                              disks[spare_number].devname, 
 1018                              disks[spare_number].blockSize, 
 1019                              (long int) raidPtr->sectorsPerDisk);
 1020                 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
 1021                 ret = EINVAL;
 1022                 goto fail;
 1023         } else {
 1024                 if (disks[spare_number].numBlocks > 
 1025                     raidPtr->sectorsPerDisk) {
 1026                         RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname, 
 1027                                      (long int) raidPtr->sectorsPerDisk);
 1028                         
 1029                         disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
 1030                 }
 1031         }
 1032 
 1033         spareQueues = &raidPtr->Queues[raidPtr->numCol];
 1034         ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
 1035                                  raidPtr->numCol + spare_number, 
 1036                                  raidPtr->qType,
 1037                                  raidPtr->sectorsPerDisk,
 1038                                  raidPtr->Disks[raidPtr->numCol + 
 1039                                                   spare_number].dev,
 1040                                  raidPtr->maxOutstanding,
 1041                                  &raidPtr->shutdownList,
 1042                                  raidPtr->cleanupList);
 1043                                  
 1044         RF_LOCK_MUTEX(raidPtr->mutex);
 1045         raidPtr->numSpare++;
 1046         RF_UNLOCK_MUTEX(raidPtr->mutex);
 1047 
 1048 fail:
 1049         RF_LOCK_MUTEX(raidPtr->mutex);
 1050         raidPtr->adding_hot_spare=0;
 1051         wakeup(&(raidPtr->adding_hot_spare));
 1052         RF_UNLOCK_MUTEX(raidPtr->mutex);
 1053 
 1054         return(ret);
 1055 }
 1056 
 1057 int
 1058 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
 1059 {
 1060         int spare_number;
 1061 
 1062 
 1063         if (raidPtr->numSpare==0) {
 1064                 printf("No spares to remove!\n");
 1065                 return(EINVAL);
 1066         }
 1067 
 1068         spare_number = sparePtr->column;
 1069 
 1070         return(EINVAL); /* XXX not implemented yet */
 1071 #if 0
 1072         if (spare_number < 0 || spare_number > raidPtr->numSpare) {
 1073                 return(EINVAL);
 1074         }
 1075 
 1076         /* verify that this spare isn't in use... */
 1077 
 1078 
 1079 
 1080 
 1081         /* it's gone.. */
 1082 
 1083         raidPtr->numSpare--;
 1084 
 1085         return(0);
 1086 #endif
 1087 }
 1088 
 1089 
 1090 int
 1091 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
 1092 {
 1093         RF_RaidDisk_t *disks;
 1094 
 1095         if ((component->column < 0) || 
 1096             (component->column >= raidPtr->numCol)) {
 1097                 return(EINVAL);
 1098         }
 1099 
 1100         disks = &raidPtr->Disks[component->column];
 1101 
 1102         /* 1. This component must be marked as 'failed' */
 1103 
 1104         return(EINVAL); /* Not implemented yet. */
 1105 }
 1106 
 1107 int
 1108 rf_incorporate_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
 1109 {
 1110 
 1111         /* Issues here include how to 'move' this in if there is IO 
 1112            taking place (e.g. component queues and such) */
 1113 
 1114         return(EINVAL); /* Not implemented yet. */
 1115 }

Cache object: e1bdd17abe5578c3f574a8cc3d83b344


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.