The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/raidframe/rf_netbsdkintf.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: rf_netbsdkintf.c,v 1.178.2.1 2004/07/02 18:03:06 he Exp $      */
    2 /*-
    3  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
    4  * All rights reserved.
    5  *
    6  * This code is derived from software contributed to The NetBSD Foundation
    7  * by Greg Oster; Jason R. Thorpe.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. All advertising materials mentioning features or use of this software
   18  *    must display the following acknowledgement:
   19  *        This product includes software developed by the NetBSD
   20  *        Foundation, Inc. and its contributors.
   21  * 4. Neither the name of The NetBSD Foundation nor the names of its
   22  *    contributors may be used to endorse or promote products derived
   23  *    from this software without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   35  * POSSIBILITY OF SUCH DAMAGE.
   36  */
   37 
   38 /*
   39  * Copyright (c) 1990, 1993
   40  *      The Regents of the University of California.  All rights reserved.
   41  *
   42  * This code is derived from software contributed to Berkeley by
   43  * the Systems Programming Group of the University of Utah Computer
   44  * Science Department.
   45  *
   46  * Redistribution and use in source and binary forms, with or without
   47  * modification, are permitted provided that the following conditions
   48  * are met:
   49  * 1. Redistributions of source code must retain the above copyright
   50  *    notice, this list of conditions and the following disclaimer.
   51  * 2. Redistributions in binary form must reproduce the above copyright
   52  *    notice, this list of conditions and the following disclaimer in the
   53  *    documentation and/or other materials provided with the distribution.
   54  * 3. Neither the name of the University nor the names of its contributors
   55  *    may be used to endorse or promote products derived from this software
   56  *    without specific prior written permission.
   57  *
   58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   68  * SUCH DAMAGE.
   69  *
   70  * from: Utah $Hdr: cd.c 1.6 90/11/28$
   71  *
   72  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
   73  */
   74 
   75 /*
   76  * Copyright (c) 1988 University of Utah.
   77  *
   78  * This code is derived from software contributed to Berkeley by
   79  * the Systems Programming Group of the University of Utah Computer
   80  * Science Department.
   81  *
   82  * Redistribution and use in source and binary forms, with or without
   83  * modification, are permitted provided that the following conditions
   84  * are met:
   85  * 1. Redistributions of source code must retain the above copyright
   86  *    notice, this list of conditions and the following disclaimer.
   87  * 2. Redistributions in binary form must reproduce the above copyright
   88  *    notice, this list of conditions and the following disclaimer in the
   89  *    documentation and/or other materials provided with the distribution.
   90  * 3. All advertising materials mentioning features or use of this software
   91  *    must display the following acknowledgement:
   92  *      This product includes software developed by the University of
   93  *      California, Berkeley and its contributors.
   94  * 4. Neither the name of the University nor the names of its contributors
   95  *    may be used to endorse or promote products derived from this software
   96  *    without specific prior written permission.
   97  *
   98  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   99  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  100  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  101  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  102  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  103  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  104  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  105  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  106  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  107  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  108  * SUCH DAMAGE.
  109  *
  110  * from: Utah $Hdr: cd.c 1.6 90/11/28$
  111  *
  112  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
  113  */
  114 
  115 /*
  116  * Copyright (c) 1995 Carnegie-Mellon University.
  117  * All rights reserved.
  118  *
  119  * Authors: Mark Holland, Jim Zelenka
  120  *
  121  * Permission to use, copy, modify and distribute this software and
  122  * its documentation is hereby granted, provided that both the copyright
  123  * notice and this permission notice appear in all copies of the
  124  * software, derivative works or modified versions, and any portions
  125  * thereof, and that both notices appear in supporting documentation.
  126  *
  127  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  128  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  129  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  130  *
  131  * Carnegie Mellon requests users of this software to return to
  132  *
  133  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  134  *  School of Computer Science
  135  *  Carnegie Mellon University
  136  *  Pittsburgh PA 15213-3890
  137  *
  138  * any improvements or extensions that they make and grant Carnegie the
  139  * rights to redistribute these changes.
  140  */
  141 
  142 /***********************************************************
  143  *
  144  * rf_kintf.c -- the kernel interface routines for RAIDframe
  145  *
  146  ***********************************************************/
  147 
  148 #include <sys/cdefs.h>
  149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.178.2.1 2004/07/02 18:03:06 he Exp $");
  150 
  151 #include <sys/param.h>
  152 #include <sys/errno.h>
  153 #include <sys/pool.h>
  154 #include <sys/proc.h>
  155 #include <sys/queue.h>
  156 #include <sys/disk.h>
  157 #include <sys/device.h>
  158 #include <sys/stat.h>
  159 #include <sys/ioctl.h>
  160 #include <sys/fcntl.h>
  161 #include <sys/systm.h>
  162 #include <sys/namei.h>
  163 #include <sys/vnode.h>
  164 #include <sys/disklabel.h>
  165 #include <sys/conf.h>
  166 #include <sys/lock.h>
  167 #include <sys/buf.h>
  168 #include <sys/user.h>
  169 #include <sys/reboot.h>
  170 
  171 #include <dev/raidframe/raidframevar.h>
  172 #include <dev/raidframe/raidframeio.h>
  173 #include "raid.h"
  174 #include "opt_raid_autoconfig.h"
  175 #include "rf_raid.h"
  176 #include "rf_copyback.h"
  177 #include "rf_dag.h"
  178 #include "rf_dagflags.h"
  179 #include "rf_desc.h"
  180 #include "rf_diskqueue.h"
  181 #include "rf_etimer.h"
  182 #include "rf_general.h"
  183 #include "rf_kintf.h"
  184 #include "rf_options.h"
  185 #include "rf_driver.h"
  186 #include "rf_parityscan.h"
  187 #include "rf_threadstuff.h"
  188 
  189 #ifdef DEBUG
  190 int     rf_kdebug_level = 0;
  191 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
  192 #else                           /* DEBUG */
  193 #define db1_printf(a) { }
  194 #endif                          /* DEBUG */
  195 
  196 static RF_Raid_t **raidPtrs;    /* global raid device descriptors */
  197 
  198 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
  199 
  200 static RF_SparetWait_t *rf_sparet_wait_queue;   /* requests to install a
  201                                                  * spare table */
  202 static RF_SparetWait_t *rf_sparet_resp_queue;   /* responses from
  203                                                  * installation process */
  204 
  205 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
  206 
  207 /* prototypes */
  208 static void KernelWakeupFunc(struct buf * bp);
  209 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag, 
  210                    dev_t dev, RF_SectorNum_t startSect, 
  211                    RF_SectorCount_t numSect, caddr_t buf,
  212                    void (*cbFunc) (struct buf *), void *cbArg, 
  213                    int logBytesPerSector, struct proc * b_proc);
  214 static void raidinit(RF_Raid_t *);
  215 
  216 void raidattach(int);
  217 
  218 dev_type_open(raidopen);
  219 dev_type_close(raidclose);
  220 dev_type_read(raidread);
  221 dev_type_write(raidwrite);
  222 dev_type_ioctl(raidioctl);
  223 dev_type_strategy(raidstrategy);
  224 dev_type_dump(raiddump);
  225 dev_type_size(raidsize);
  226 
  227 const struct bdevsw raid_bdevsw = {
  228         raidopen, raidclose, raidstrategy, raidioctl,
  229         raiddump, raidsize, D_DISK
  230 };
  231 
  232 const struct cdevsw raid_cdevsw = {
  233         raidopen, raidclose, raidread, raidwrite, raidioctl,
  234         nostop, notty, nopoll, nommap, nokqfilter, D_DISK
  235 };
  236 
  237 /*
  238  * Pilfered from ccd.c
  239  */
  240 
  241 struct raidbuf {
  242         struct buf rf_buf;      /* new I/O buf.  MUST BE FIRST!!! */
  243         struct buf *rf_obp;     /* ptr. to original I/O buf */
  244         RF_DiskQueueData_t *req;/* the request that this was part of.. */
  245 };
  246 
  247 /* XXX Not sure if the following should be replacing the raidPtrs above,
  248    or if it should be used in conjunction with that... 
  249 */
  250 
  251 struct raid_softc {
  252         int     sc_flags;       /* flags */
  253         int     sc_cflags;      /* configuration flags */
  254         size_t  sc_size;        /* size of the raid device */
  255         char    sc_xname[20];   /* XXX external name */
  256         struct disk sc_dkdev;   /* generic disk device info */
  257         struct bufq_state buf_queue;    /* used for the device queue */
  258 };
  259 /* sc_flags */
  260 #define RAIDF_INITED    0x01    /* unit has been initialized */
  261 #define RAIDF_WLABEL    0x02    /* label area is writable */
  262 #define RAIDF_LABELLING 0x04    /* unit is currently being labelled */
  263 #define RAIDF_WANTED    0x40    /* someone is waiting to obtain a lock */
  264 #define RAIDF_LOCKED    0x80    /* unit is locked */
  265 
  266 #define raidunit(x)     DISKUNIT(x)
  267 int numraid = 0;
  268 
  269 /* 
  270  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 
  271  * Be aware that large numbers can allow the driver to consume a lot of 
  272  * kernel memory, especially on writes, and in degraded mode reads.
  273  * 
  274  * For example: with a stripe width of 64 blocks (32k) and 5 disks, 
  275  * a single 64K write will typically require 64K for the old data, 
  276  * 64K for the old parity, and 64K for the new parity, for a total 
  277  * of 192K (if the parity buffer is not re-used immediately).
  278  * Even it if is used immediately, that's still 128K, which when multiplied
  279  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
  280  * 
  281  * Now in degraded mode, for example, a 64K read on the above setup may
  282  * require data reconstruction, which will require *all* of the 4 remaining 
  283  * disks to participate -- 4 * 32K/disk == 128K again.
  284  */
  285 
  286 #ifndef RAIDOUTSTANDING
  287 #define RAIDOUTSTANDING   6
  288 #endif
  289 
  290 #define RAIDLABELDEV(dev)       \
  291         (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
  292 
  293 /* declared here, and made public, for the benefit of KVM stuff.. */
  294 struct raid_softc *raid_softc;
  295 
  296 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 
  297                                      struct disklabel *);
  298 static void raidgetdisklabel(dev_t);
  299 static void raidmakedisklabel(struct raid_softc *);
  300 
  301 static int raidlock(struct raid_softc *);
  302 static void raidunlock(struct raid_softc *);
  303 
  304 static void rf_markalldirty(RF_Raid_t *);
  305 
  306 struct device *raidrootdev;
  307 
  308 void rf_ReconThread(struct rf_recon_req *);
  309 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
  310 void rf_CopybackThread(RF_Raid_t *raidPtr);
  311 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
  312 int rf_autoconfig(struct device *self);
  313 void rf_buildroothack(RF_ConfigSet_t *);
  314 
  315 RF_AutoConfig_t *rf_find_raid_components(void);
  316 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
  317 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
  318 static int rf_reasonable_label(RF_ComponentLabel_t *);
  319 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
  320 int rf_set_autoconfig(RF_Raid_t *, int);
  321 int rf_set_rootpartition(RF_Raid_t *, int);
  322 void rf_release_all_vps(RF_ConfigSet_t *);
  323 void rf_cleanup_config_set(RF_ConfigSet_t *);
  324 int rf_have_enough_components(RF_ConfigSet_t *);
  325 int rf_auto_config_set(RF_ConfigSet_t *, int *);
  326 
  327 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
  328                                   allow autoconfig to take place.
  329                                   Note that this is overridden by having
  330                                   RAID_AUTOCONFIG as an option in the 
  331                                   kernel config file.  */
  332 
  333 struct RF_Pools_s rf_pools;
  334 
  335 void
  336 raidattach(int num)
  337 {
  338         int raidID;
  339         int i, rc;
  340 
  341 #ifdef DEBUG
  342         printf("raidattach: Asked for %d units\n", num);
  343 #endif
  344 
  345         if (num <= 0) {
  346 #ifdef DIAGNOSTIC
  347                 panic("raidattach: count <= 0");
  348 #endif
  349                 return;
  350         }
  351         /* This is where all the initialization stuff gets done. */
  352 
  353         numraid = num;
  354 
  355         /* Make some space for requested number of units... */
  356 
  357         RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
  358         if (raidPtrs == NULL) {
  359                 panic("raidPtrs is NULL!!");
  360         }
  361 
  362         /* Initialize the component buffer pool. */
  363         rf_pool_init(&rf_pools.cbuf, sizeof(struct raidbuf),
  364                      "raidpl", num * RAIDOUTSTANDING,
  365                      2 * num * RAIDOUTSTANDING);
  366 
  367         rf_mutex_init(&rf_sparet_wait_mutex);
  368 
  369         rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
  370 
  371         for (i = 0; i < num; i++)
  372                 raidPtrs[i] = NULL;
  373         rc = rf_BootRaidframe();
  374         if (rc == 0)
  375                 printf("Kernelized RAIDframe activated\n");
  376         else
  377                 panic("Serious error booting RAID!!");
  378 
  379         /* put together some datastructures like the CCD device does.. This
  380          * lets us lock the device and what-not when it gets opened. */
  381 
  382         raid_softc = (struct raid_softc *)
  383                 malloc(num * sizeof(struct raid_softc),
  384                        M_RAIDFRAME, M_NOWAIT);
  385         if (raid_softc == NULL) {
  386                 printf("WARNING: no memory for RAIDframe driver\n");
  387                 return;
  388         }
  389 
  390         memset(raid_softc, 0, num * sizeof(struct raid_softc));
  391 
  392         raidrootdev = (struct device *)malloc(num * sizeof(struct device),
  393                                               M_RAIDFRAME, M_NOWAIT);
  394         if (raidrootdev == NULL) {
  395                 panic("No memory for RAIDframe driver!!?!?!");
  396         }
  397 
  398         for (raidID = 0; raidID < num; raidID++) {
  399                 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
  400 
  401                 raidrootdev[raidID].dv_class  = DV_DISK;
  402                 raidrootdev[raidID].dv_cfdata = NULL;
  403                 raidrootdev[raidID].dv_unit   = raidID;
  404                 raidrootdev[raidID].dv_parent = NULL;
  405                 raidrootdev[raidID].dv_flags  = 0;
  406                 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
  407 
  408                 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
  409                           (RF_Raid_t *));
  410                 if (raidPtrs[raidID] == NULL) {
  411                         printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
  412                         numraid = raidID;
  413                         return;
  414                 }
  415         }
  416 
  417 #ifdef RAID_AUTOCONFIG
  418         raidautoconfig = 1;
  419 #endif
  420 
  421         /*
  422          * Register a finalizer which will be used to auto-config RAID
  423          * sets once all real hardware devices have been found.
  424          */
  425         if (config_finalize_register(NULL, rf_autoconfig) != 0)
  426                 printf("WARNING: unable to register RAIDframe finalizer\n");
  427 }
  428 
  429 int
  430 rf_autoconfig(struct device *self)
  431 {
  432         RF_AutoConfig_t *ac_list;
  433         RF_ConfigSet_t *config_sets;
  434 
  435         if (raidautoconfig == 0)
  436                 return (0);
  437 
  438         /* XXX This code can only be run once. */
  439         raidautoconfig = 0;
  440 
  441         /* 1. locate all RAID components on the system */
  442 #ifdef DEBUG
  443         printf("Searching for RAID components...\n");
  444 #endif
  445         ac_list = rf_find_raid_components();
  446 
  447         /* 2. Sort them into their respective sets. */
  448         config_sets = rf_create_auto_sets(ac_list);
  449 
  450         /*
  451          * 3. Evaluate each set andconfigure the valid ones.
  452          * This gets done in rf_buildroothack().
  453          */
  454         rf_buildroothack(config_sets);
  455 
  456         return (1);
  457 }
  458 
  459 void
  460 rf_buildroothack(RF_ConfigSet_t *config_sets)
  461 {
  462         RF_ConfigSet_t *cset;
  463         RF_ConfigSet_t *next_cset;
  464         int retcode;
  465         int raidID;
  466         int rootID;
  467         int num_root;
  468 
  469         rootID = 0;
  470         num_root = 0;
  471         cset = config_sets;
  472         while(cset != NULL ) {
  473                 next_cset = cset->next;
  474                 if (rf_have_enough_components(cset) && 
  475                     cset->ac->clabel->autoconfigure==1) {
  476                         retcode = rf_auto_config_set(cset,&raidID);
  477                         if (!retcode) {
  478                                 if (cset->rootable) {
  479                                         rootID = raidID;
  480                                         num_root++;
  481                                 }
  482                         } else {
  483                                 /* The autoconfig didn't work :( */
  484 #if DEBUG
  485                                 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
  486 #endif
  487                                 rf_release_all_vps(cset);
  488                         }
  489                 } else {
  490                         /* we're not autoconfiguring this set...  
  491                            release the associated resources */
  492                         rf_release_all_vps(cset);
  493                 }
  494                 /* cleanup */
  495                 rf_cleanup_config_set(cset);
  496                 cset = next_cset;
  497         }
  498 
  499         /* we found something bootable... */
  500 
  501         if (num_root == 1) {
  502                 booted_device = &raidrootdev[rootID]; 
  503         } else if (num_root > 1) {
  504                 /* we can't guess.. require the user to answer... */
  505                 boothowto |= RB_ASKNAME;
  506         }
  507 }
  508 
  509 
  510 int
  511 raidsize(dev_t dev)
  512 {
  513         struct raid_softc *rs;
  514         struct disklabel *lp;
  515         int     part, unit, omask, size;
  516 
  517         unit = raidunit(dev);
  518         if (unit >= numraid)
  519                 return (-1);
  520         rs = &raid_softc[unit];
  521 
  522         if ((rs->sc_flags & RAIDF_INITED) == 0)
  523                 return (-1);
  524 
  525         part = DISKPART(dev);
  526         omask = rs->sc_dkdev.dk_openmask & (1 << part);
  527         lp = rs->sc_dkdev.dk_label;
  528 
  529         if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
  530                 return (-1);
  531 
  532         if (lp->d_partitions[part].p_fstype != FS_SWAP)
  533                 size = -1;
  534         else
  535                 size = lp->d_partitions[part].p_size *
  536                     (lp->d_secsize / DEV_BSIZE);
  537 
  538         if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
  539                 return (-1);
  540 
  541         return (size);
  542 
  543 }
  544 
  545 int
  546 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t  size)
  547 {
  548         /* Not implemented. */
  549         return ENXIO;
  550 }
  551 /* ARGSUSED */
  552 int
  553 raidopen(dev_t dev, int flags, int fmt, struct proc *p)
  554 {
  555         int     unit = raidunit(dev);
  556         struct raid_softc *rs;
  557         struct disklabel *lp;
  558         int     part, pmask;
  559         int     error = 0;
  560 
  561         if (unit >= numraid)
  562                 return (ENXIO);
  563         rs = &raid_softc[unit];
  564 
  565         if ((error = raidlock(rs)) != 0)
  566                 return (error);
  567         lp = rs->sc_dkdev.dk_label;
  568 
  569         part = DISKPART(dev);
  570         pmask = (1 << part);
  571 
  572         if ((rs->sc_flags & RAIDF_INITED) &&
  573             (rs->sc_dkdev.dk_openmask == 0))
  574                 raidgetdisklabel(dev);
  575 
  576         /* make sure that this partition exists */
  577 
  578         if (part != RAW_PART) {
  579                 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
  580                     ((part >= lp->d_npartitions) ||
  581                         (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
  582                         error = ENXIO;
  583                         raidunlock(rs);
  584                         return (error);
  585                 }
  586         }
  587         /* Prevent this unit from being unconfigured while open. */
  588         switch (fmt) {
  589         case S_IFCHR:
  590                 rs->sc_dkdev.dk_copenmask |= pmask;
  591                 break;
  592 
  593         case S_IFBLK:
  594                 rs->sc_dkdev.dk_bopenmask |= pmask;
  595                 break;
  596         }
  597 
  598         if ((rs->sc_dkdev.dk_openmask == 0) && 
  599             ((rs->sc_flags & RAIDF_INITED) != 0)) {
  600                 /* First one... mark things as dirty... Note that we *MUST*
  601                  have done a configure before this.  I DO NOT WANT TO BE
  602                  SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
  603                  THAT THEY BELONG TOGETHER!!!!! */
  604                 /* XXX should check to see if we're only open for reading
  605                    here... If so, we needn't do this, but then need some
  606                    other way of keeping track of what's happened.. */
  607 
  608                 rf_markalldirty( raidPtrs[unit] );
  609         }
  610 
  611 
  612         rs->sc_dkdev.dk_openmask =
  613             rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
  614 
  615         raidunlock(rs);
  616 
  617         return (error);
  618 
  619 
  620 }
  621 /* ARGSUSED */
  622 int
  623 raidclose(dev_t dev, int flags, int fmt, struct proc *p)
  624 {
  625         int     unit = raidunit(dev);
  626         struct raid_softc *rs;
  627         int     error = 0;
  628         int     part;
  629 
  630         if (unit >= numraid)
  631                 return (ENXIO);
  632         rs = &raid_softc[unit];
  633 
  634         if ((error = raidlock(rs)) != 0)
  635                 return (error);
  636 
  637         part = DISKPART(dev);
  638 
  639         /* ...that much closer to allowing unconfiguration... */
  640         switch (fmt) {
  641         case S_IFCHR:
  642                 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
  643                 break;
  644 
  645         case S_IFBLK:
  646                 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
  647                 break;
  648         }
  649         rs->sc_dkdev.dk_openmask =
  650             rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
  651         
  652         if ((rs->sc_dkdev.dk_openmask == 0) &&
  653             ((rs->sc_flags & RAIDF_INITED) != 0)) {
  654                 /* Last one... device is not unconfigured yet.  
  655                    Device shutdown has taken care of setting the 
  656                    clean bits if RAIDF_INITED is not set 
  657                    mark things as clean... */
  658 
  659                 rf_update_component_labels(raidPtrs[unit],
  660                                                  RF_FINAL_COMPONENT_UPDATE);
  661                 if (doing_shutdown) {
  662                         /* last one, and we're going down, so
  663                            lights out for this RAID set too. */
  664                         error = rf_Shutdown(raidPtrs[unit]);
  665                         
  666                         /* It's no longer initialized... */
  667                         rs->sc_flags &= ~RAIDF_INITED;
  668                         
  669                         /* Detach the disk. */
  670                         disk_detach(&rs->sc_dkdev);
  671                 }
  672         }
  673 
  674         raidunlock(rs);
  675         return (0);
  676 
  677 }
  678 
  679 void
  680 raidstrategy(struct buf *bp)
  681 {
  682         int s;
  683 
  684         unsigned int raidID = raidunit(bp->b_dev);
  685         RF_Raid_t *raidPtr;
  686         struct raid_softc *rs = &raid_softc[raidID];
  687         int     wlabel;
  688 
  689         if ((rs->sc_flags & RAIDF_INITED) ==0) {
  690                 bp->b_error = ENXIO;
  691                 bp->b_flags |= B_ERROR;
  692                 bp->b_resid = bp->b_bcount;
  693                 biodone(bp);
  694                 return;
  695         }
  696         if (raidID >= numraid || !raidPtrs[raidID]) {
  697                 bp->b_error = ENODEV;
  698                 bp->b_flags |= B_ERROR;
  699                 bp->b_resid = bp->b_bcount;
  700                 biodone(bp);
  701                 return;
  702         }
  703         raidPtr = raidPtrs[raidID];
  704         if (!raidPtr->valid) {
  705                 bp->b_error = ENODEV;
  706                 bp->b_flags |= B_ERROR;
  707                 bp->b_resid = bp->b_bcount;
  708                 biodone(bp);
  709                 return;
  710         }
  711         if (bp->b_bcount == 0) {
  712                 db1_printf(("b_bcount is zero..\n"));
  713                 biodone(bp);
  714                 return;
  715         }
  716 
  717         /*
  718          * Do bounds checking and adjust transfer.  If there's an
  719          * error, the bounds check will flag that for us.
  720          */
  721 
  722         wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
  723         if (DISKPART(bp->b_dev) != RAW_PART)
  724                 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
  725                         db1_printf(("Bounds check failed!!:%d %d\n",
  726                                 (int) bp->b_blkno, (int) wlabel));
  727                         biodone(bp);
  728                         return;
  729                 }
  730         s = splbio();
  731 
  732         bp->b_resid = 0;
  733 
  734         /* stuff it onto our queue */
  735         BUFQ_PUT(&rs->buf_queue, bp);
  736 
  737         raidstart(raidPtrs[raidID]);
  738 
  739         splx(s);
  740 }
  741 /* ARGSUSED */
  742 int
  743 raidread(dev_t dev, struct uio *uio, int flags)
  744 {
  745         int     unit = raidunit(dev);
  746         struct raid_softc *rs;
  747 
  748         if (unit >= numraid)
  749                 return (ENXIO);
  750         rs = &raid_softc[unit];
  751 
  752         if ((rs->sc_flags & RAIDF_INITED) == 0)
  753                 return (ENXIO);
  754 
  755         return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
  756 
  757 }
  758 /* ARGSUSED */
  759 int
  760 raidwrite(dev_t dev, struct uio *uio, int flags)
  761 {
  762         int     unit = raidunit(dev);
  763         struct raid_softc *rs;
  764 
  765         if (unit >= numraid)
  766                 return (ENXIO);
  767         rs = &raid_softc[unit];
  768 
  769         if ((rs->sc_flags & RAIDF_INITED) == 0)
  770                 return (ENXIO);
  771 
  772         return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
  773 
  774 }
  775 
  776 int
  777 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
  778 {
  779         int     unit = raidunit(dev);
  780         int     error = 0;
  781         int     part, pmask;
  782         struct raid_softc *rs;
  783         RF_Config_t *k_cfg, *u_cfg;
  784         RF_Raid_t *raidPtr;
  785         RF_RaidDisk_t *diskPtr;
  786         RF_AccTotals_t *totals;
  787         RF_DeviceConfig_t *d_cfg, **ucfgp;
  788         u_char *specific_buf;
  789         int retcode = 0;
  790         int column;
  791         int raidid;
  792         struct rf_recon_req *rrcopy, *rr;
  793         RF_ComponentLabel_t *clabel;
  794         RF_ComponentLabel_t ci_label;
  795         RF_ComponentLabel_t **clabel_ptr;
  796         RF_SingleComponent_t *sparePtr,*componentPtr;
  797         RF_SingleComponent_t hot_spare;
  798         RF_SingleComponent_t component;
  799         RF_ProgressInfo_t progressInfo, **progressInfoPtr;
  800         int i, j, d;
  801 #ifdef __HAVE_OLD_DISKLABEL
  802         struct disklabel newlabel;
  803 #endif
  804 
  805         if (unit >= numraid)
  806                 return (ENXIO);
  807         rs = &raid_softc[unit];
  808         raidPtr = raidPtrs[unit];
  809 
  810         db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
  811                 (int) DISKPART(dev), (int) unit, (int) cmd));
  812 
  813         /* Must be open for writes for these commands... */
  814         switch (cmd) {
  815         case DIOCSDINFO:
  816         case DIOCWDINFO:
  817 #ifdef __HAVE_OLD_DISKLABEL
  818         case ODIOCWDINFO:
  819         case ODIOCSDINFO:
  820 #endif
  821         case DIOCWLABEL:
  822                 if ((flag & FWRITE) == 0)
  823                         return (EBADF);
  824         }
  825 
  826         /* Must be initialized for these... */
  827         switch (cmd) {
  828         case DIOCGDINFO:
  829         case DIOCSDINFO:
  830         case DIOCWDINFO:
  831 #ifdef __HAVE_OLD_DISKLABEL
  832         case ODIOCGDINFO:
  833         case ODIOCWDINFO:
  834         case ODIOCSDINFO:
  835         case ODIOCGDEFLABEL:
  836 #endif
  837         case DIOCGPART:
  838         case DIOCWLABEL:
  839         case DIOCGDEFLABEL:
  840         case RAIDFRAME_SHUTDOWN:
  841         case RAIDFRAME_REWRITEPARITY:
  842         case RAIDFRAME_GET_INFO:
  843         case RAIDFRAME_RESET_ACCTOTALS:
  844         case RAIDFRAME_GET_ACCTOTALS:
  845         case RAIDFRAME_KEEP_ACCTOTALS:
  846         case RAIDFRAME_GET_SIZE:
  847         case RAIDFRAME_FAIL_DISK:
  848         case RAIDFRAME_COPYBACK:
  849         case RAIDFRAME_CHECK_RECON_STATUS:
  850         case RAIDFRAME_CHECK_RECON_STATUS_EXT:
  851         case RAIDFRAME_GET_COMPONENT_LABEL:
  852         case RAIDFRAME_SET_COMPONENT_LABEL:
  853         case RAIDFRAME_ADD_HOT_SPARE:
  854         case RAIDFRAME_REMOVE_HOT_SPARE:
  855         case RAIDFRAME_INIT_LABELS:
  856         case RAIDFRAME_REBUILD_IN_PLACE:
  857         case RAIDFRAME_CHECK_PARITY:
  858         case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
  859         case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
  860         case RAIDFRAME_CHECK_COPYBACK_STATUS:
  861         case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
  862         case RAIDFRAME_SET_AUTOCONFIG:
  863         case RAIDFRAME_SET_ROOT:
  864         case RAIDFRAME_DELETE_COMPONENT:
  865         case RAIDFRAME_INCORPORATE_HOT_SPARE:
  866                 if ((rs->sc_flags & RAIDF_INITED) == 0)
  867                         return (ENXIO);
  868         }
  869 
  870         switch (cmd) {
  871 
  872                 /* configure the system */
  873         case RAIDFRAME_CONFIGURE:
  874 
  875                 if (raidPtr->valid) {
  876                         /* There is a valid RAID set running on this unit! */
  877                         printf("raid%d: Device already configured!\n",unit);
  878                         return(EINVAL);
  879                 }
  880 
  881                 /* copy-in the configuration information */
  882                 /* data points to a pointer to the configuration structure */
  883 
  884                 u_cfg = *((RF_Config_t **) data);
  885                 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
  886                 if (k_cfg == NULL) {
  887                         return (ENOMEM);
  888                 }
  889                 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
  890                 if (retcode) {
  891                         RF_Free(k_cfg, sizeof(RF_Config_t));
  892                         db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
  893                                 retcode));
  894                         return (retcode);
  895                 }
  896                 /* allocate a buffer for the layout-specific data, and copy it
  897                  * in */
  898                 if (k_cfg->layoutSpecificSize) {
  899                         if (k_cfg->layoutSpecificSize > 10000) {
  900                                 /* sanity check */
  901                                 RF_Free(k_cfg, sizeof(RF_Config_t));
  902                                 return (EINVAL);
  903                         }
  904                         RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
  905                             (u_char *));
  906                         if (specific_buf == NULL) {
  907                                 RF_Free(k_cfg, sizeof(RF_Config_t));
  908                                 return (ENOMEM);
  909                         }
  910                         retcode = copyin(k_cfg->layoutSpecific, specific_buf,
  911                             k_cfg->layoutSpecificSize);
  912                         if (retcode) {
  913                                 RF_Free(k_cfg, sizeof(RF_Config_t));
  914                                 RF_Free(specific_buf, 
  915                                         k_cfg->layoutSpecificSize);
  916                                 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
  917                                         retcode));
  918                                 return (retcode);
  919                         }
  920                 } else
  921                         specific_buf = NULL;
  922                 k_cfg->layoutSpecific = specific_buf;
  923 
  924                 /* should do some kind of sanity check on the configuration.
  925                  * Store the sum of all the bytes in the last byte? */
  926 
  927                 /* configure the system */
  928 
  929                 /*
  930                  * Clear the entire RAID descriptor, just to make sure
  931                  *  there is no stale data left in the case of a 
  932                  *  reconfiguration 
  933                  */
  934                 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
  935                 raidPtr->raidid = unit;
  936 
  937                 retcode = rf_Configure(raidPtr, k_cfg, NULL);
  938 
  939                 if (retcode == 0) {
  940 
  941                         /* allow this many simultaneous IO's to 
  942                            this RAID device */
  943                         raidPtr->openings = RAIDOUTSTANDING;
  944                                         
  945                         raidinit(raidPtr);
  946                         rf_markalldirty(raidPtr);
  947                 }
  948                 /* free the buffers.  No return code here. */
  949                 if (k_cfg->layoutSpecificSize) {
  950                         RF_Free(specific_buf, k_cfg->layoutSpecificSize);
  951                 }
  952                 RF_Free(k_cfg, sizeof(RF_Config_t));
  953 
  954                 return (retcode);
  955 
  956                 /* shutdown the system */
  957         case RAIDFRAME_SHUTDOWN:
  958 
  959                 if ((error = raidlock(rs)) != 0)
  960                         return (error);
  961 
  962                 /*
  963                  * If somebody has a partition mounted, we shouldn't
  964                  * shutdown.
  965                  */
  966 
  967                 part = DISKPART(dev);
  968                 pmask = (1 << part);
  969                 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
  970                     ((rs->sc_dkdev.dk_bopenmask & pmask) &&
  971                         (rs->sc_dkdev.dk_copenmask & pmask))) {
  972                         raidunlock(rs);
  973                         return (EBUSY);
  974                 }
  975 
  976                 retcode = rf_Shutdown(raidPtr);
  977 
  978                 /* It's no longer initialized... */
  979                 rs->sc_flags &= ~RAIDF_INITED;
  980 
  981                 /* Detach the disk. */
  982                 disk_detach(&rs->sc_dkdev);
  983 
  984                 raidunlock(rs);
  985 
  986                 return (retcode);
  987         case RAIDFRAME_GET_COMPONENT_LABEL:
  988                 clabel_ptr = (RF_ComponentLabel_t **) data;
  989                 /* need to read the component label for the disk indicated
  990                    by row,column in clabel */
  991 
  992                 /* For practice, let's get it directly fromdisk, rather 
  993                    than from the in-core copy */
  994                 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
  995                            (RF_ComponentLabel_t *));
  996                 if (clabel == NULL)
  997                         return (ENOMEM);
  998 
  999                 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
 1000                 
 1001                 retcode = copyin( *clabel_ptr, clabel, 
 1002                                   sizeof(RF_ComponentLabel_t));
 1003 
 1004                 if (retcode) {
 1005                         RF_Free( clabel, sizeof(RF_ComponentLabel_t));
 1006                         return(retcode);
 1007                 }
 1008 
 1009                 clabel->row = 0; /* Don't allow looking at anything else.*/
 1010 
 1011                 column = clabel->column;
 1012 
 1013                 if ((column < 0) || (column >= raidPtr->numCol +
 1014                                      raidPtr->numSpare)) {
 1015                         RF_Free( clabel, sizeof(RF_ComponentLabel_t));
 1016                         return(EINVAL);
 1017                 }
 1018 
 1019                 raidread_component_label(raidPtr->Disks[column].dev, 
 1020                                 raidPtr->raid_cinfo[column].ci_vp, 
 1021                                 clabel );
 1022 
 1023                 retcode = copyout(clabel, *clabel_ptr,
 1024                                   sizeof(RF_ComponentLabel_t));
 1025                 RF_Free(clabel, sizeof(RF_ComponentLabel_t));
 1026                 return (retcode);
 1027 
 1028         case RAIDFRAME_SET_COMPONENT_LABEL:
 1029                 clabel = (RF_ComponentLabel_t *) data;
 1030 
 1031                 /* XXX check the label for valid stuff... */
 1032                 /* Note that some things *should not* get modified --
 1033                    the user should be re-initing the labels instead of 
 1034                    trying to patch things.
 1035                    */
 1036 
 1037                 raidid = raidPtr->raidid;
 1038 #if DEBUG
 1039                 printf("raid%d: Got component label:\n", raidid);
 1040                 printf("raid%d: Version: %d\n", raidid, clabel->version);
 1041                 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
 1042                 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
 1043                 printf("raid%d: Column: %d\n", raidid, clabel->column);
 1044                 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
 1045                 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
 1046                 printf("raid%d: Status: %d\n", raidid, clabel->status);
 1047 #endif
 1048                 clabel->row = 0;
 1049                 column = clabel->column;
 1050 
 1051                 if ((column < 0) || (column >= raidPtr->numCol)) {
 1052                         return(EINVAL);
 1053                 }
 1054 
 1055                 /* XXX this isn't allowed to do anything for now :-) */
 1056 
 1057                 /* XXX and before it is, we need to fill in the rest
 1058                    of the fields!?!?!?! */
 1059 #if 0
 1060                 raidwrite_component_label( 
 1061                             raidPtr->Disks[column].dev, 
 1062                             raidPtr->raid_cinfo[column].ci_vp, 
 1063                             clabel );
 1064 #endif
 1065                 return (0);
 1066 
 1067         case RAIDFRAME_INIT_LABELS:     
 1068                 clabel = (RF_ComponentLabel_t *) data;
 1069                 /* 
 1070                    we only want the serial number from
 1071                    the above.  We get all the rest of the information
 1072                    from the config that was used to create this RAID
 1073                    set. 
 1074                    */
 1075 
 1076                 raidPtr->serial_number = clabel->serial_number;
 1077                 
 1078                 raid_init_component_label(raidPtr, &ci_label);
 1079                 ci_label.serial_number = clabel->serial_number;
 1080                 ci_label.row = 0; /* we dont' pretend to support more */
 1081 
 1082                 for(column=0;column<raidPtr->numCol;column++) {
 1083                         diskPtr = &raidPtr->Disks[column];
 1084                         if (!RF_DEAD_DISK(diskPtr->status)) {
 1085                                 ci_label.partitionSize = diskPtr->partitionSize;
 1086                                 ci_label.column = column;
 1087                                 raidwrite_component_label( 
 1088                                                           raidPtr->Disks[column].dev, 
 1089                                                           raidPtr->raid_cinfo[column].ci_vp, 
 1090                                                           &ci_label );
 1091                         }
 1092                 }
 1093 
 1094                 return (retcode);
 1095         case RAIDFRAME_SET_AUTOCONFIG:
 1096                 d = rf_set_autoconfig(raidPtr, *(int *) data);
 1097                 printf("raid%d: New autoconfig value is: %d\n", 
 1098                        raidPtr->raidid, d);
 1099                 *(int *) data = d;
 1100                 return (retcode);
 1101 
 1102         case RAIDFRAME_SET_ROOT:
 1103                 d = rf_set_rootpartition(raidPtr, *(int *) data);
 1104                 printf("raid%d: New rootpartition value is: %d\n", 
 1105                        raidPtr->raidid, d);
 1106                 *(int *) data = d;
 1107                 return (retcode);
 1108 
 1109                 /* initialize all parity */
 1110         case RAIDFRAME_REWRITEPARITY:
 1111 
 1112                 if (raidPtr->Layout.map->faultsTolerated == 0) {
 1113                         /* Parity for RAID 0 is trivially correct */
 1114                         raidPtr->parity_good = RF_RAID_CLEAN;
 1115                         return(0);
 1116                 }
 1117                 
 1118                 if (raidPtr->parity_rewrite_in_progress == 1) {
 1119                         /* Re-write is already in progress! */
 1120                         return(EINVAL);
 1121                 }
 1122 
 1123                 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
 1124                                            rf_RewriteParityThread,
 1125                                            raidPtr,"raid_parity");
 1126                 return (retcode);
 1127 
 1128 
 1129         case RAIDFRAME_ADD_HOT_SPARE:
 1130                 sparePtr = (RF_SingleComponent_t *) data;
 1131                 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
 1132                 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
 1133                 return(retcode);
 1134 
 1135         case RAIDFRAME_REMOVE_HOT_SPARE:
 1136                 return(retcode);
 1137 
 1138         case RAIDFRAME_DELETE_COMPONENT:
 1139                 componentPtr = (RF_SingleComponent_t *)data;
 1140                 memcpy( &component, componentPtr, 
 1141                         sizeof(RF_SingleComponent_t));
 1142                 retcode = rf_delete_component(raidPtr, &component);
 1143                 return(retcode);
 1144 
 1145         case RAIDFRAME_INCORPORATE_HOT_SPARE:
 1146                 componentPtr = (RF_SingleComponent_t *)data;
 1147                 memcpy( &component, componentPtr, 
 1148                         sizeof(RF_SingleComponent_t));
 1149                 retcode = rf_incorporate_hot_spare(raidPtr, &component);
 1150                 return(retcode);
 1151 
 1152         case RAIDFRAME_REBUILD_IN_PLACE:
 1153 
 1154                 if (raidPtr->Layout.map->faultsTolerated == 0) {
 1155                         /* Can't do this on a RAID 0!! */
 1156                         return(EINVAL);
 1157                 }
 1158 
 1159                 if (raidPtr->recon_in_progress == 1) {
 1160                         /* a reconstruct is already in progress! */
 1161                         return(EINVAL);
 1162                 }
 1163 
 1164                 componentPtr = (RF_SingleComponent_t *) data;
 1165                 memcpy( &component, componentPtr, 
 1166                         sizeof(RF_SingleComponent_t));
 1167                 component.row = 0; /* we don't support any more */
 1168                 column = component.column;
 1169 
 1170                 if ((column < 0) || (column >= raidPtr->numCol)) {
 1171                         return(EINVAL);
 1172                 }
 1173 
 1174                 RF_LOCK_MUTEX(raidPtr->mutex);
 1175                 if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
 1176                     (raidPtr->numFailures > 0)) { 
 1177                         /* XXX 0 above shouldn't be constant!!! */
 1178                         /* some component other than this has failed.
 1179                            Let's not make things worse than they already
 1180                            are... */
 1181                         printf("raid%d: Unable to reconstruct to disk at:\n",
 1182                                raidPtr->raidid);
 1183                         printf("raid%d:     Col: %d   Too many failures.\n",
 1184                                raidPtr->raidid, column);
 1185                         RF_UNLOCK_MUTEX(raidPtr->mutex);
 1186                         return (EINVAL);
 1187                 }
 1188                 if (raidPtr->Disks[column].status == 
 1189                     rf_ds_reconstructing) {
 1190                         printf("raid%d: Unable to reconstruct to disk at:\n",
 1191                                raidPtr->raidid);
 1192                         printf("raid%d:    Col: %d   Reconstruction already occuring!\n", raidPtr->raidid, column);
 1193                         
 1194                         RF_UNLOCK_MUTEX(raidPtr->mutex);
 1195                         return (EINVAL);
 1196                 }
 1197                 if (raidPtr->Disks[column].status == rf_ds_spared) {
 1198                         RF_UNLOCK_MUTEX(raidPtr->mutex);
 1199                         return (EINVAL);
 1200                 }
 1201                 RF_UNLOCK_MUTEX(raidPtr->mutex);
 1202 
 1203                 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
 1204                 if (rrcopy == NULL)
 1205                         return(ENOMEM);
 1206 
 1207                 rrcopy->raidPtr = (void *) raidPtr;
 1208                 rrcopy->col = column;
 1209 
 1210                 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
 1211                                            rf_ReconstructInPlaceThread,
 1212                                            rrcopy,"raid_reconip");
 1213                 return(retcode);
 1214 
 1215         case RAIDFRAME_GET_INFO:
 1216                 if (!raidPtr->valid)
 1217                         return (ENODEV);
 1218                 ucfgp = (RF_DeviceConfig_t **) data;
 1219                 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
 1220                           (RF_DeviceConfig_t *));
 1221                 if (d_cfg == NULL)
 1222                         return (ENOMEM);
 1223                 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
 1224                 d_cfg->rows = 1; /* there is only 1 row now */
 1225                 d_cfg->cols = raidPtr->numCol;
 1226                 d_cfg->ndevs = raidPtr->numCol;
 1227                 if (d_cfg->ndevs >= RF_MAX_DISKS) {
 1228                         RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
 1229                         return (ENOMEM);
 1230                 }
 1231                 d_cfg->nspares = raidPtr->numSpare;
 1232                 if (d_cfg->nspares >= RF_MAX_DISKS) {
 1233                         RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
 1234                         return (ENOMEM);
 1235                 }
 1236                 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
 1237                 d = 0;
 1238                 for (j = 0; j < d_cfg->cols; j++) {
 1239                         d_cfg->devs[d] = raidPtr->Disks[j];
 1240                         d++;
 1241                 }
 1242                 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
 1243                         d_cfg->spares[i] = raidPtr->Disks[j];
 1244                 }
 1245                 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
 1246                 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
 1247 
 1248                 return (retcode);
 1249 
 1250         case RAIDFRAME_CHECK_PARITY:
 1251                 *(int *) data = raidPtr->parity_good;
 1252                 return (0);
 1253 
 1254         case RAIDFRAME_RESET_ACCTOTALS:
 1255                 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
 1256                 return (0);
 1257 
 1258         case RAIDFRAME_GET_ACCTOTALS:
 1259                 totals = (RF_AccTotals_t *) data;
 1260                 *totals = raidPtr->acc_totals;
 1261                 return (0);
 1262 
 1263         case RAIDFRAME_KEEP_ACCTOTALS:
 1264                 raidPtr->keep_acc_totals = *(int *)data;
 1265                 return (0);
 1266 
 1267         case RAIDFRAME_GET_SIZE:
 1268                 *(int *) data = raidPtr->totalSectors;
 1269                 return (0);
 1270 
 1271                 /* fail a disk & optionally start reconstruction */
 1272         case RAIDFRAME_FAIL_DISK:
 1273 
 1274                 if (raidPtr->Layout.map->faultsTolerated == 0) {
 1275                         /* Can't do this on a RAID 0!! */
 1276                         return(EINVAL);
 1277                 }
 1278 
 1279                 rr = (struct rf_recon_req *) data;
 1280                 rr->row = 0;
 1281                 if (rr->col < 0 || rr->col >= raidPtr->numCol)
 1282                         return (EINVAL);
 1283 
 1284 
 1285                 RF_LOCK_MUTEX(raidPtr->mutex);
 1286                 if ((raidPtr->Disks[rr->col].status == 
 1287                      rf_ds_optimal) && (raidPtr->numFailures > 0)) { 
 1288                         /* some other component has failed.  Let's not make
 1289                            things worse. XXX wrong for RAID6 */
 1290                         RF_UNLOCK_MUTEX(raidPtr->mutex);
 1291                         return (EINVAL);
 1292                 }
 1293                 if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
 1294                         /* Can't fail a spared disk! */
 1295                         RF_UNLOCK_MUTEX(raidPtr->mutex);
 1296                         return (EINVAL);
 1297                 }
 1298                 RF_UNLOCK_MUTEX(raidPtr->mutex);
 1299 
 1300                 /* make a copy of the recon request so that we don't rely on
 1301                  * the user's buffer */
 1302                 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
 1303                 if (rrcopy == NULL)
 1304                         return(ENOMEM);
 1305                 memcpy(rrcopy, rr, sizeof(*rr));
 1306                 rrcopy->raidPtr = (void *) raidPtr;
 1307 
 1308                 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
 1309                                            rf_ReconThread,
 1310                                            rrcopy,"raid_recon");
 1311                 return (0);
 1312 
 1313                 /* invoke a copyback operation after recon on whatever disk
 1314                  * needs it, if any */
 1315         case RAIDFRAME_COPYBACK:
 1316 
 1317                 if (raidPtr->Layout.map->faultsTolerated == 0) {
 1318                         /* This makes no sense on a RAID 0!! */
 1319                         return(EINVAL);
 1320                 }
 1321 
 1322                 if (raidPtr->copyback_in_progress == 1) {
 1323                         /* Copyback is already in progress! */
 1324                         return(EINVAL);
 1325                 }
 1326 
 1327                 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
 1328                                            rf_CopybackThread,
 1329                                            raidPtr,"raid_copyback");
 1330                 return (retcode);
 1331 
 1332                 /* return the percentage completion of reconstruction */
 1333         case RAIDFRAME_CHECK_RECON_STATUS:
 1334                 if (raidPtr->Layout.map->faultsTolerated == 0) {
 1335                         /* This makes no sense on a RAID 0, so tell the
 1336                            user it's done. */
 1337                         *(int *) data = 100;
 1338                         return(0);
 1339                 }
 1340                 if (raidPtr->status != rf_rs_reconstructing)
 1341                         *(int *) data = 100;
 1342                 else {
 1343                         if (raidPtr->reconControl->numRUsTotal > 0) {
 1344                                 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
 1345                         } else {
 1346                                 *(int *) data = 0;
 1347                         }
 1348                 }
 1349                 return (0);
 1350         case RAIDFRAME_CHECK_RECON_STATUS_EXT:
 1351                 progressInfoPtr = (RF_ProgressInfo_t **) data;
 1352                 if (raidPtr->status != rf_rs_reconstructing) {
 1353                         progressInfo.remaining = 0;
 1354                         progressInfo.completed = 100;
 1355                         progressInfo.total = 100;
 1356                 } else {
 1357                         progressInfo.total = 
 1358                                 raidPtr->reconControl->numRUsTotal;
 1359                         progressInfo.completed = 
 1360                                 raidPtr->reconControl->numRUsComplete;
 1361                         progressInfo.remaining = progressInfo.total -
 1362                                 progressInfo.completed;
 1363                 }
 1364                 retcode = copyout(&progressInfo, *progressInfoPtr,
 1365                                   sizeof(RF_ProgressInfo_t));
 1366                 return (retcode);
 1367 
 1368         case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
 1369                 if (raidPtr->Layout.map->faultsTolerated == 0) {
 1370                         /* This makes no sense on a RAID 0, so tell the
 1371                            user it's done. */
 1372                         *(int *) data = 100;
 1373                         return(0);
 1374                 }
 1375                 if (raidPtr->parity_rewrite_in_progress == 1) {
 1376                         *(int *) data = 100 * 
 1377                                 raidPtr->parity_rewrite_stripes_done / 
 1378                                 raidPtr->Layout.numStripe;
 1379                 } else {
 1380                         *(int *) data = 100;
 1381                 }
 1382                 return (0);
 1383 
 1384         case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
 1385                 progressInfoPtr = (RF_ProgressInfo_t **) data;
 1386                 if (raidPtr->parity_rewrite_in_progress == 1) {
 1387                         progressInfo.total = raidPtr->Layout.numStripe;
 1388                         progressInfo.completed = 
 1389                                 raidPtr->parity_rewrite_stripes_done;
 1390                         progressInfo.remaining = progressInfo.total -
 1391                                 progressInfo.completed;
 1392                 } else {
 1393                         progressInfo.remaining = 0;
 1394                         progressInfo.completed = 100;
 1395                         progressInfo.total = 100;
 1396                 }
 1397                 retcode = copyout(&progressInfo, *progressInfoPtr,
 1398                                   sizeof(RF_ProgressInfo_t));
 1399                 return (retcode);
 1400 
 1401         case RAIDFRAME_CHECK_COPYBACK_STATUS:
 1402                 if (raidPtr->Layout.map->faultsTolerated == 0) {
 1403                         /* This makes no sense on a RAID 0 */
 1404                         *(int *) data = 100;
 1405                         return(0);
 1406                 }
 1407                 if (raidPtr->copyback_in_progress == 1) {
 1408                         *(int *) data = 100 * raidPtr->copyback_stripes_done /
 1409                                 raidPtr->Layout.numStripe;
 1410                 } else {
 1411                         *(int *) data = 100;
 1412                 }
 1413                 return (0);
 1414 
 1415         case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
 1416                 progressInfoPtr = (RF_ProgressInfo_t **) data;
 1417                 if (raidPtr->copyback_in_progress == 1) {
 1418                         progressInfo.total = raidPtr->Layout.numStripe;
 1419                         progressInfo.completed = 
 1420                                 raidPtr->copyback_stripes_done;
 1421                         progressInfo.remaining = progressInfo.total -
 1422                                 progressInfo.completed;
 1423                 } else {
 1424                         progressInfo.remaining = 0;
 1425                         progressInfo.completed = 100;
 1426                         progressInfo.total = 100;
 1427                 }
 1428                 retcode = copyout(&progressInfo, *progressInfoPtr,
 1429                                   sizeof(RF_ProgressInfo_t));
 1430                 return (retcode);
 1431 
 1432                 /* the sparetable daemon calls this to wait for the kernel to
 1433                  * need a spare table. this ioctl does not return until a
 1434                  * spare table is needed. XXX -- calling mpsleep here in the
 1435                  * ioctl code is almost certainly wrong and evil. -- XXX XXX
 1436                  * -- I should either compute the spare table in the kernel,
 1437                  * or have a different -- XXX XXX -- interface (a different
 1438                  * character device) for delivering the table     -- XXX */
 1439 #if 0
 1440         case RAIDFRAME_SPARET_WAIT:
 1441                 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
 1442                 while (!rf_sparet_wait_queue)
 1443                         mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
 1444                 waitreq = rf_sparet_wait_queue;
 1445                 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
 1446                 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
 1447 
 1448                 /* structure assignment */
 1449                 *((RF_SparetWait_t *) data) = *waitreq; 
 1450 
 1451                 RF_Free(waitreq, sizeof(*waitreq));
 1452                 return (0);
 1453 
 1454                 /* wakes up a process waiting on SPARET_WAIT and puts an error
 1455                  * code in it that will cause the dameon to exit */
 1456         case RAIDFRAME_ABORT_SPARET_WAIT:
 1457                 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
 1458                 waitreq->fcol = -1;
 1459                 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
 1460                 waitreq->next = rf_sparet_wait_queue;
 1461                 rf_sparet_wait_queue = waitreq;
 1462                 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
 1463                 wakeup(&rf_sparet_wait_queue);
 1464                 return (0);
 1465 
 1466                 /* used by the spare table daemon to deliver a spare table
 1467                  * into the kernel */
 1468         case RAIDFRAME_SEND_SPARET:
 1469 
 1470                 /* install the spare table */
 1471                 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
 1472 
 1473                 /* respond to the requestor.  the return status of the spare
 1474                  * table installation is passed in the "fcol" field */
 1475                 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
 1476                 waitreq->fcol = retcode;
 1477                 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
 1478                 waitreq->next = rf_sparet_resp_queue;
 1479                 rf_sparet_resp_queue = waitreq;
 1480                 wakeup(&rf_sparet_resp_queue);
 1481                 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
 1482 
 1483                 return (retcode);
 1484 #endif
 1485 
 1486         default:
 1487                 break; /* fall through to the os-specific code below */
 1488 
 1489         }
 1490 
 1491         if (!raidPtr->valid)
 1492                 return (EINVAL);
 1493 
 1494         /*
 1495          * Add support for "regular" device ioctls here.
 1496          */
 1497 
 1498         switch (cmd) {
 1499         case DIOCGDINFO:
 1500                 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
 1501                 break;
 1502 #ifdef __HAVE_OLD_DISKLABEL
 1503         case ODIOCGDINFO:
 1504                 newlabel = *(rs->sc_dkdev.dk_label);
 1505                 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
 1506                         return ENOTTY;
 1507                 memcpy(data, &newlabel, sizeof (struct olddisklabel));
 1508                 break;
 1509 #endif
 1510 
 1511         case DIOCGPART:
 1512                 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
 1513                 ((struct partinfo *) data)->part =
 1514                     &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
 1515                 break;
 1516 
 1517         case DIOCWDINFO:
 1518         case DIOCSDINFO:
 1519 #ifdef __HAVE_OLD_DISKLABEL
 1520         case ODIOCWDINFO:
 1521         case ODIOCSDINFO:
 1522 #endif
 1523         {
 1524                 struct disklabel *lp;
 1525 #ifdef __HAVE_OLD_DISKLABEL
 1526                 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
 1527                         memset(&newlabel, 0, sizeof newlabel);
 1528                         memcpy(&newlabel, data, sizeof (struct olddisklabel));
 1529                         lp = &newlabel;
 1530                 } else
 1531 #endif
 1532                 lp = (struct disklabel *)data;
 1533 
 1534                 if ((error = raidlock(rs)) != 0)
 1535                         return (error);
 1536 
 1537                 rs->sc_flags |= RAIDF_LABELLING;
 1538 
 1539                 error = setdisklabel(rs->sc_dkdev.dk_label,
 1540                     lp, 0, rs->sc_dkdev.dk_cpulabel);
 1541                 if (error == 0) {
 1542                         if (cmd == DIOCWDINFO
 1543 #ifdef __HAVE_OLD_DISKLABEL
 1544                             || cmd == ODIOCWDINFO
 1545 #endif
 1546                            )
 1547                                 error = writedisklabel(RAIDLABELDEV(dev),
 1548                                     raidstrategy, rs->sc_dkdev.dk_label,
 1549                                     rs->sc_dkdev.dk_cpulabel);
 1550                 }
 1551                 rs->sc_flags &= ~RAIDF_LABELLING;
 1552 
 1553                 raidunlock(rs);
 1554 
 1555                 if (error)
 1556                         return (error);
 1557                 break;
 1558         }
 1559 
 1560         case DIOCWLABEL:
 1561                 if (*(int *) data != 0)
 1562                         rs->sc_flags |= RAIDF_WLABEL;
 1563                 else
 1564                         rs->sc_flags &= ~RAIDF_WLABEL;
 1565                 break;
 1566 
 1567         case DIOCGDEFLABEL:
 1568                 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
 1569                 break;
 1570 
 1571 #ifdef __HAVE_OLD_DISKLABEL
 1572         case ODIOCGDEFLABEL:
 1573                 raidgetdefaultlabel(raidPtr, rs, &newlabel);
 1574                 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
 1575                         return ENOTTY;
 1576                 memcpy(data, &newlabel, sizeof (struct olddisklabel));
 1577                 break;
 1578 #endif
 1579 
 1580         default:
 1581                 retcode = ENOTTY;
 1582         }
 1583         return (retcode);
 1584 
 1585 }
 1586 
 1587 
 1588 /* raidinit -- complete the rest of the initialization for the
 1589    RAIDframe device.  */
 1590 
 1591 
 1592 static void
 1593 raidinit(RF_Raid_t *raidPtr)
 1594 {
 1595         struct raid_softc *rs;
 1596         int     unit;
 1597 
 1598         unit = raidPtr->raidid;
 1599 
 1600         rs = &raid_softc[unit];
 1601 
 1602         /* XXX should check return code first... */
 1603         rs->sc_flags |= RAIDF_INITED;
 1604 
 1605         sprintf(rs->sc_xname, "raid%d", unit);  /* XXX doesn't check bounds. */
 1606 
 1607         rs->sc_dkdev.dk_name = rs->sc_xname;
 1608 
 1609         /* disk_attach actually creates space for the CPU disklabel, among
 1610          * other things, so it's critical to call this *BEFORE* we try putzing
 1611          * with disklabels. */
 1612 
 1613         disk_attach(&rs->sc_dkdev);
 1614 
 1615         /* XXX There may be a weird interaction here between this, and
 1616          * protectedSectors, as used in RAIDframe.  */
 1617 
 1618         rs->sc_size = raidPtr->totalSectors;
 1619 }
 1620 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
 1621 /* wake up the daemon & tell it to get us a spare table
 1622  * XXX
 1623  * the entries in the queues should be tagged with the raidPtr
 1624  * so that in the extremely rare case that two recons happen at once, 
 1625  * we know for which device were requesting a spare table
 1626  * XXX
 1627  * 
 1628  * XXX This code is not currently used. GO
 1629  */
 1630 int 
 1631 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
 1632 {
 1633         int     retcode;
 1634 
 1635         RF_LOCK_MUTEX(rf_sparet_wait_mutex);
 1636         req->next = rf_sparet_wait_queue;
 1637         rf_sparet_wait_queue = req;
 1638         wakeup(&rf_sparet_wait_queue);
 1639 
 1640         /* mpsleep unlocks the mutex */
 1641         while (!rf_sparet_resp_queue) {
 1642                 tsleep(&rf_sparet_resp_queue, PRIBIO,
 1643                     "raidframe getsparetable", 0);
 1644         }
 1645         req = rf_sparet_resp_queue;
 1646         rf_sparet_resp_queue = req->next;
 1647         RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
 1648 
 1649         retcode = req->fcol;
 1650         RF_Free(req, sizeof(*req));     /* this is not the same req as we
 1651                                          * alloc'd */
 1652         return (retcode);
 1653 }
 1654 #endif
 1655 
 1656 /* a wrapper around rf_DoAccess that extracts appropriate info from the 
 1657  * bp & passes it down.
 1658  * any calls originating in the kernel must use non-blocking I/O
 1659  * do some extra sanity checking to return "appropriate" error values for
 1660  * certain conditions (to make some standard utilities work)
 1661  * 
 1662  * Formerly known as: rf_DoAccessKernel
 1663  */
 1664 void
 1665 raidstart(RF_Raid_t *raidPtr)
 1666 {
 1667         RF_SectorCount_t num_blocks, pb, sum;
 1668         RF_RaidAddr_t raid_addr;
 1669         struct partition *pp;
 1670         daddr_t blocknum;
 1671         int     unit;
 1672         struct raid_softc *rs;
 1673         int     do_async;
 1674         struct buf *bp;
 1675         int rc;
 1676 
 1677         unit = raidPtr->raidid;
 1678         rs = &raid_softc[unit];
 1679         
 1680         /* quick check to see if anything has died recently */
 1681         RF_LOCK_MUTEX(raidPtr->mutex);
 1682         if (raidPtr->numNewFailures > 0) {
 1683                 RF_UNLOCK_MUTEX(raidPtr->mutex);
 1684                 rf_update_component_labels(raidPtr, 
 1685                                            RF_NORMAL_COMPONENT_UPDATE);
 1686                 RF_LOCK_MUTEX(raidPtr->mutex);
 1687                 raidPtr->numNewFailures--;
 1688         }
 1689 
 1690         /* Check to see if we're at the limit... */
 1691         while (raidPtr->openings > 0) {
 1692                 RF_UNLOCK_MUTEX(raidPtr->mutex);
 1693 
 1694                 /* get the next item, if any, from the queue */
 1695                 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
 1696                         /* nothing more to do */
 1697                         return;
 1698                 }
 1699 
 1700                 /* Ok, for the bp we have here, bp->b_blkno is relative to the
 1701                  * partition.. Need to make it absolute to the underlying 
 1702                  * device.. */
 1703 
 1704                 blocknum = bp->b_blkno;
 1705                 if (DISKPART(bp->b_dev) != RAW_PART) {
 1706                         pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
 1707                         blocknum += pp->p_offset;
 1708                 }
 1709 
 1710                 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 
 1711                             (int) blocknum));
 1712                 
 1713                 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
 1714                 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
 1715                 
 1716                 /* *THIS* is where we adjust what block we're going to... 
 1717                  * but DO NOT TOUCH bp->b_blkno!!! */
 1718                 raid_addr = blocknum;
 1719                 
 1720                 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
 1721                 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
 1722                 sum = raid_addr + num_blocks + pb;
 1723                 if (1 || rf_debugKernelAccess) {
 1724                         db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
 1725                                     (int) raid_addr, (int) sum, (int) num_blocks,
 1726                                     (int) pb, (int) bp->b_resid));
 1727                 }
 1728                 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
 1729                     || (sum < num_blocks) || (sum < pb)) {
 1730                         bp->b_error = ENOSPC;
 1731                         bp->b_flags |= B_ERROR;
 1732                         bp->b_resid = bp->b_bcount;
 1733                         biodone(bp);
 1734                         RF_LOCK_MUTEX(raidPtr->mutex);
 1735                         continue;
 1736                 }
 1737                 /*
 1738                  * XXX rf_DoAccess() should do this, not just DoAccessKernel()
 1739                  */
 1740                 
 1741                 if (bp->b_bcount & raidPtr->sectorMask) {
 1742                         bp->b_error = EINVAL;
 1743                         bp->b_flags |= B_ERROR;
 1744                         bp->b_resid = bp->b_bcount;
 1745                         biodone(bp);
 1746                         RF_LOCK_MUTEX(raidPtr->mutex);
 1747                         continue;
 1748                         
 1749                 }
 1750                 db1_printf(("Calling DoAccess..\n"));
 1751                 
 1752 
 1753                 RF_LOCK_MUTEX(raidPtr->mutex);
 1754                 raidPtr->openings--;
 1755                 RF_UNLOCK_MUTEX(raidPtr->mutex);
 1756 
 1757                 /*
 1758                  * Everything is async.
 1759                  */
 1760                 do_async = 1;
 1761                 
 1762                 disk_busy(&rs->sc_dkdev);
 1763 
 1764                 /* XXX we're still at splbio() here... do we *really* 
 1765                    need to be? */
 1766 
 1767                 /* don't ever condition on bp->b_flags & B_WRITE.  
 1768                  * always condition on B_READ instead */
 1769                 
 1770                 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
 1771                                  RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
 1772                                  do_async, raid_addr, num_blocks,
 1773                                  bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
 1774 
 1775                 if (rc) {
 1776                         bp->b_error = rc;
 1777                         bp->b_flags |= B_ERROR;
 1778                         bp->b_resid = bp->b_bcount;
 1779                         biodone(bp);
 1780                         /* continue loop */
 1781                 }       
 1782 
 1783                 RF_LOCK_MUTEX(raidPtr->mutex);
 1784         }
 1785         RF_UNLOCK_MUTEX(raidPtr->mutex);
 1786 }
 1787 
 1788 
 1789 
 1790 
 1791 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
 1792 
 1793 int 
 1794 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
 1795 {
 1796         int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
 1797         struct buf *bp;
 1798         struct raidbuf *raidbp = NULL;
 1799 
 1800         req->queue = queue;
 1801 
 1802 #if DIAGNOSTIC
 1803         if (queue->raidPtr->raidid >= numraid) {
 1804                 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
 1805                     numraid);
 1806                 panic("Invalid Unit number in rf_DispatchKernelIO");
 1807         }
 1808 #endif
 1809 
 1810         bp = req->bp;
 1811 #if 1
 1812         /* XXX when there is a physical disk failure, someone is passing us a
 1813          * buffer that contains old stuff!!  Attempt to deal with this problem
 1814          * without taking a performance hit... (not sure where the real bug
 1815          * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
 1816 
 1817         if (bp->b_flags & B_ERROR) {
 1818                 bp->b_flags &= ~B_ERROR;
 1819         }
 1820         if (bp->b_error != 0) {
 1821                 bp->b_error = 0;
 1822         }
 1823 #endif
 1824         raidbp = pool_get(&rf_pools.cbuf, PR_NOWAIT);
 1825         if (raidbp == NULL) {
 1826                 bp->b_flags |= B_ERROR;
 1827                 bp->b_error = ENOMEM;
 1828                 return (ENOMEM);
 1829         }
 1830         BUF_INIT(&raidbp->rf_buf);
 1831 
 1832         /*
 1833          * context for raidiodone
 1834          */
 1835         raidbp->rf_obp = bp;
 1836         raidbp->req = req;
 1837 
 1838         BIO_COPYPRIO(&raidbp->rf_buf, bp);
 1839 
 1840         switch (req->type) {
 1841         case RF_IO_TYPE_NOP:    /* used primarily to unlock a locked queue */
 1842                 /* XXX need to do something extra here.. */
 1843                 /* I'm leaving this in, as I've never actually seen it used,
 1844                  * and I'd like folks to report it... GO */
 1845                 printf(("WAKEUP CALLED\n"));
 1846                 queue->numOutstanding++;
 1847 
 1848                 /* XXX need to glue the original buffer into this??  */
 1849 
 1850                 KernelWakeupFunc(&raidbp->rf_buf);
 1851                 break;
 1852 
 1853         case RF_IO_TYPE_READ:
 1854         case RF_IO_TYPE_WRITE:
 1855 #if RF_ACC_TRACE > 0
 1856                 if (req->tracerec) {
 1857                         RF_ETIMER_START(req->tracerec->timer);
 1858                 }
 1859 #endif
 1860                 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
 1861                     op | bp->b_flags, queue->rf_cinfo->ci_dev,
 1862                     req->sectorOffset, req->numSector,
 1863                     req->buf, KernelWakeupFunc, (void *) req,
 1864                     queue->raidPtr->logBytesPerSector, req->b_proc);
 1865 
 1866                 if (rf_debugKernelAccess) {
 1867                         db1_printf(("dispatch: bp->b_blkno = %ld\n",
 1868                                 (long) bp->b_blkno));
 1869                 }
 1870                 queue->numOutstanding++;
 1871                 queue->last_deq_sector = req->sectorOffset;
 1872                 /* acc wouldn't have been let in if there were any pending
 1873                  * reqs at any other priority */
 1874                 queue->curPriority = req->priority;
 1875 
 1876                 db1_printf(("Going for %c to unit %d col %d\n",
 1877                             req->type, queue->raidPtr->raidid, 
 1878                             queue->col));
 1879                 db1_printf(("sector %d count %d (%d bytes) %d\n",
 1880                         (int) req->sectorOffset, (int) req->numSector,
 1881                         (int) (req->numSector <<
 1882                             queue->raidPtr->logBytesPerSector),
 1883                         (int) queue->raidPtr->logBytesPerSector));
 1884                 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
 1885                         raidbp->rf_buf.b_vp->v_numoutput++;
 1886                 }
 1887                 VOP_STRATEGY(raidbp->rf_buf.b_vp, &raidbp->rf_buf);
 1888 
 1889                 break;
 1890 
 1891         default:
 1892                 panic("bad req->type in rf_DispatchKernelIO");
 1893         }
 1894         db1_printf(("Exiting from DispatchKernelIO\n"));
 1895 
 1896         return (0);
 1897 }
 1898 /* this is the callback function associated with a I/O invoked from
 1899    kernel code.
 1900  */
 1901 static void 
 1902 KernelWakeupFunc(struct buf *vbp)
 1903 {
 1904         RF_DiskQueueData_t *req = NULL;
 1905         RF_DiskQueue_t *queue;
 1906         struct raidbuf *raidbp = (struct raidbuf *) vbp;
 1907         struct buf *bp;
 1908         int s;
 1909 
 1910         s = splbio();
 1911         db1_printf(("recovering the request queue:\n"));
 1912         req = raidbp->req;
 1913 
 1914         bp = raidbp->rf_obp;
 1915 
 1916         queue = (RF_DiskQueue_t *) req->queue;
 1917 
 1918         if (raidbp->rf_buf.b_flags & B_ERROR) {
 1919                 bp->b_flags |= B_ERROR;
 1920                 bp->b_error = raidbp->rf_buf.b_error ?
 1921                     raidbp->rf_buf.b_error : EIO;
 1922         }
 1923 
 1924         /* XXX methinks this could be wrong... */
 1925 #if 1
 1926         bp->b_resid = raidbp->rf_buf.b_resid;
 1927 #endif
 1928 #if RF_ACC_TRACE > 0
 1929         if (req->tracerec) {
 1930                 RF_ETIMER_STOP(req->tracerec->timer);
 1931                 RF_ETIMER_EVAL(req->tracerec->timer);
 1932                 RF_LOCK_MUTEX(rf_tracing_mutex);
 1933                 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
 1934                 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
 1935                 req->tracerec->num_phys_ios++;
 1936                 RF_UNLOCK_MUTEX(rf_tracing_mutex);
 1937         }
 1938 #endif
 1939         bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
 1940 
 1941         /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
 1942          * ballistic, and mark the component as hosed... */
 1943 
 1944         if (bp->b_flags & B_ERROR) {
 1945                 /* Mark the disk as dead */
 1946                 /* but only mark it once... */
 1947                 if (queue->raidPtr->Disks[queue->col].status ==
 1948                     rf_ds_optimal) {
 1949                         printf("raid%d: IO Error.  Marking %s as failed.\n",
 1950                                queue->raidPtr->raidid,
 1951                                queue->raidPtr->Disks[queue->col].devname);
 1952                         queue->raidPtr->Disks[queue->col].status =
 1953                             rf_ds_failed;
 1954                         queue->raidPtr->status = rf_rs_degraded;
 1955                         queue->raidPtr->numFailures++;
 1956                         queue->raidPtr->numNewFailures++;
 1957                 } else {        /* Disk is already dead... */
 1958                         /* printf("Disk already marked as dead!\n"); */
 1959                 }
 1960 
 1961         }
 1962 
 1963         pool_put(&rf_pools.cbuf, raidbp);
 1964 
 1965         /* Fill in the error value */
 1966 
 1967         req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
 1968 
 1969         simple_lock(&queue->raidPtr->iodone_lock);
 1970 
 1971         /* Drop this one on the "finished" queue... */
 1972         TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
 1973 
 1974         /* Let the raidio thread know there is work to be done. */
 1975         wakeup(&(queue->raidPtr->iodone));
 1976 
 1977         simple_unlock(&queue->raidPtr->iodone_lock);
 1978 
 1979         splx(s);
 1980 }
 1981 
 1982 
 1983 
 1984 /*
 1985  * initialize a buf structure for doing an I/O in the kernel.
 1986  */
 1987 static void 
 1988 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
 1989        RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t buf,
 1990        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
 1991        struct proc *b_proc)
 1992 {
 1993         /* bp->b_flags       = B_PHYS | rw_flag; */
 1994         bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
 1995         bp->b_bcount = numSect << logBytesPerSector;
 1996         bp->b_bufsize = bp->b_bcount;
 1997         bp->b_error = 0;
 1998         bp->b_dev = dev;
 1999         bp->b_data = buf;
 2000         bp->b_blkno = startSect;
 2001         bp->b_resid = bp->b_bcount;     /* XXX is this right!??!?!! */
 2002         if (bp->b_bcount == 0) {
 2003                 panic("bp->b_bcount is zero in InitBP!!");
 2004         }
 2005         bp->b_proc = b_proc;
 2006         bp->b_iodone = cbFunc;
 2007         bp->b_vp = b_vp;
 2008 
 2009 }
 2010 
 2011 static void
 2012 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs, 
 2013                     struct disklabel *lp)
 2014 {
 2015         memset(lp, 0, sizeof(*lp));
 2016 
 2017         /* fabricate a label... */
 2018         lp->d_secperunit = raidPtr->totalSectors;
 2019         lp->d_secsize = raidPtr->bytesPerSector;
 2020         lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
 2021         lp->d_ntracks = 4 * raidPtr->numCol;
 2022         lp->d_ncylinders = raidPtr->totalSectors / 
 2023                 (lp->d_nsectors * lp->d_ntracks);
 2024         lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
 2025 
 2026         strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
 2027         lp->d_type = DTYPE_RAID;
 2028         strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
 2029         lp->d_rpm = 3600;
 2030         lp->d_interleave = 1;
 2031         lp->d_flags = 0;
 2032 
 2033         lp->d_partitions[RAW_PART].p_offset = 0;
 2034         lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
 2035         lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
 2036         lp->d_npartitions = RAW_PART + 1;
 2037 
 2038         lp->d_magic = DISKMAGIC;
 2039         lp->d_magic2 = DISKMAGIC;
 2040         lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
 2041 
 2042 }
 2043 /*
 2044  * Read the disklabel from the raid device.  If one is not present, fake one
 2045  * up.
 2046  */
 2047 static void
 2048 raidgetdisklabel(dev_t dev)
 2049 {
 2050         int     unit = raidunit(dev);
 2051         struct raid_softc *rs = &raid_softc[unit];
 2052         const char   *errstring;
 2053         struct disklabel *lp = rs->sc_dkdev.dk_label;
 2054         struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
 2055         RF_Raid_t *raidPtr;
 2056 
 2057         db1_printf(("Getting the disklabel...\n"));
 2058 
 2059         memset(clp, 0, sizeof(*clp));
 2060 
 2061         raidPtr = raidPtrs[unit];
 2062 
 2063         raidgetdefaultlabel(raidPtr, rs, lp);
 2064 
 2065         /*
 2066          * Call the generic disklabel extraction routine.
 2067          */
 2068         errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
 2069             rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
 2070         if (errstring)
 2071                 raidmakedisklabel(rs);
 2072         else {
 2073                 int     i;
 2074                 struct partition *pp;
 2075 
 2076                 /*
 2077                  * Sanity check whether the found disklabel is valid.
 2078                  *
 2079                  * This is necessary since total size of the raid device
 2080                  * may vary when an interleave is changed even though exactly
 2081                  * same componets are used, and old disklabel may used
 2082                  * if that is found.
 2083                  */
 2084                 if (lp->d_secperunit != rs->sc_size)
 2085                         printf("raid%d: WARNING: %s: "
 2086                             "total sector size in disklabel (%d) != "
 2087                             "the size of raid (%ld)\n", unit, rs->sc_xname,
 2088                             lp->d_secperunit, (long) rs->sc_size);
 2089                 for (i = 0; i < lp->d_npartitions; i++) {
 2090                         pp = &lp->d_partitions[i];
 2091                         if (pp->p_offset + pp->p_size > rs->sc_size)
 2092                                 printf("raid%d: WARNING: %s: end of partition `%c' "
 2093                                        "exceeds the size of raid (%ld)\n", 
 2094                                        unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
 2095                 }
 2096         }
 2097 
 2098 }
 2099 /*
 2100  * Take care of things one might want to take care of in the event
 2101  * that a disklabel isn't present.
 2102  */
 2103 static void
 2104 raidmakedisklabel(struct raid_softc *rs)
 2105 {
 2106         struct disklabel *lp = rs->sc_dkdev.dk_label;
 2107         db1_printf(("Making a label..\n"));
 2108 
 2109         /*
 2110          * For historical reasons, if there's no disklabel present
 2111          * the raw partition must be marked FS_BSDFFS.
 2112          */
 2113 
 2114         lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
 2115 
 2116         strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
 2117 
 2118         lp->d_checksum = dkcksum(lp);
 2119 }
 2120 /*
 2121  * Lookup the provided name in the filesystem.  If the file exists,
 2122  * is a valid block device, and isn't being used by anyone else,
 2123  * set *vpp to the file's vnode.
 2124  * You'll find the original of this in ccd.c
 2125  */
 2126 int
 2127 raidlookup(char *path, struct proc *p, struct vnode **vpp)
 2128 {
 2129         struct nameidata nd;
 2130         struct vnode *vp;
 2131         struct vattr va;
 2132         int     error;
 2133 
 2134         NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
 2135         if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
 2136                 return (error);
 2137         }
 2138         vp = nd.ni_vp;
 2139         if (vp->v_usecount > 1) {
 2140                 VOP_UNLOCK(vp, 0);
 2141                 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
 2142                 return (EBUSY);
 2143         }
 2144         if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
 2145                 VOP_UNLOCK(vp, 0);
 2146                 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
 2147                 return (error);
 2148         }
 2149         /* XXX: eventually we should handle VREG, too. */
 2150         if (va.va_type != VBLK) {
 2151                 VOP_UNLOCK(vp, 0);
 2152                 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
 2153                 return (ENOTBLK);
 2154         }
 2155         VOP_UNLOCK(vp, 0);
 2156         *vpp = vp;
 2157         return (0);
 2158 }
 2159 /*
 2160  * Wait interruptibly for an exclusive lock.
 2161  *
 2162  * XXX
 2163  * Several drivers do this; it should be abstracted and made MP-safe.
 2164  * (Hmm... where have we seen this warning before :->  GO )
 2165  */
 2166 static int
 2167 raidlock(struct raid_softc *rs)
 2168 {
 2169         int     error;
 2170 
 2171         while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
 2172                 rs->sc_flags |= RAIDF_WANTED;
 2173                 if ((error =
 2174                         tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
 2175                         return (error);
 2176         }
 2177         rs->sc_flags |= RAIDF_LOCKED;
 2178         return (0);
 2179 }
 2180 /*
 2181  * Unlock and wake up any waiters.
 2182  */
 2183 static void
 2184 raidunlock(struct raid_softc *rs)
 2185 {
 2186 
 2187         rs->sc_flags &= ~RAIDF_LOCKED;
 2188         if ((rs->sc_flags & RAIDF_WANTED) != 0) {
 2189                 rs->sc_flags &= ~RAIDF_WANTED;
 2190                 wakeup(rs);
 2191         }
 2192 }
 2193  
 2194 
 2195 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
 2196 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
 2197 
 2198 int 
 2199 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
 2200 {
 2201         RF_ComponentLabel_t clabel;
 2202         raidread_component_label(dev, b_vp, &clabel);
 2203         clabel.mod_counter = mod_counter;
 2204         clabel.clean = RF_RAID_CLEAN;
 2205         raidwrite_component_label(dev, b_vp, &clabel);
 2206         return(0);
 2207 }
 2208 
 2209 
 2210 int 
 2211 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
 2212 {
 2213         RF_ComponentLabel_t clabel;
 2214         raidread_component_label(dev, b_vp, &clabel);
 2215         clabel.mod_counter = mod_counter;
 2216         clabel.clean = RF_RAID_DIRTY;
 2217         raidwrite_component_label(dev, b_vp, &clabel);
 2218         return(0);
 2219 }
 2220 
 2221 /* ARGSUSED */
 2222 int
 2223 raidread_component_label(dev_t dev, struct vnode *b_vp, 
 2224                          RF_ComponentLabel_t *clabel)
 2225 {
 2226         struct buf *bp;
 2227         const struct bdevsw *bdev;
 2228         int error;
 2229         
 2230         /* XXX should probably ensure that we don't try to do this if
 2231            someone has changed rf_protected_sectors. */ 
 2232 
 2233         if (b_vp == NULL) {
 2234                 /* For whatever reason, this component is not valid.
 2235                    Don't try to read a component label from it. */
 2236                 return(EINVAL);
 2237         }
 2238 
 2239         /* get a block of the appropriate size... */
 2240         bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
 2241         bp->b_dev = dev;
 2242 
 2243         /* get our ducks in a row for the read */
 2244         bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
 2245         bp->b_bcount = RF_COMPONENT_INFO_SIZE;
 2246         bp->b_flags |= B_READ;
 2247         bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
 2248 
 2249         bdev = bdevsw_lookup(bp->b_dev);
 2250         if (bdev == NULL)
 2251                 return (ENXIO);
 2252         (*bdev->d_strategy)(bp);
 2253 
 2254         error = biowait(bp); 
 2255 
 2256         if (!error) {
 2257                 memcpy(clabel, bp->b_data,
 2258                        sizeof(RF_ComponentLabel_t));
 2259         } 
 2260 
 2261         brelse(bp); 
 2262         return(error);
 2263 }
 2264 /* ARGSUSED */
 2265 int 
 2266 raidwrite_component_label(dev_t dev, struct vnode *b_vp, 
 2267                           RF_ComponentLabel_t *clabel)
 2268 {
 2269         struct buf *bp;
 2270         const struct bdevsw *bdev;
 2271         int error;
 2272 
 2273         /* get a block of the appropriate size... */
 2274         bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
 2275         bp->b_dev = dev;
 2276 
 2277         /* get our ducks in a row for the write */
 2278         bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
 2279         bp->b_bcount = RF_COMPONENT_INFO_SIZE;
 2280         bp->b_flags |= B_WRITE;
 2281         bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
 2282 
 2283         memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
 2284 
 2285         memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
 2286 
 2287         bdev = bdevsw_lookup(bp->b_dev);
 2288         if (bdev == NULL)
 2289                 return (ENXIO);
 2290         (*bdev->d_strategy)(bp);
 2291         error = biowait(bp); 
 2292         brelse(bp);
 2293         if (error) {
 2294 #if 1
 2295                 printf("Failed to write RAID component info!\n");
 2296 #endif
 2297         }
 2298 
 2299         return(error);
 2300 }
 2301 
 2302 void 
 2303 rf_markalldirty(RF_Raid_t *raidPtr)
 2304 {
 2305         RF_ComponentLabel_t clabel;
 2306         int sparecol;
 2307         int c;
 2308         int j;
 2309         int scol = -1;
 2310 
 2311         raidPtr->mod_counter++;
 2312         for (c = 0; c < raidPtr->numCol; c++) {
 2313                 /* we don't want to touch (at all) a disk that has
 2314                    failed */
 2315                 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
 2316                         raidread_component_label(
 2317                                                  raidPtr->Disks[c].dev,
 2318                                                  raidPtr->raid_cinfo[c].ci_vp,
 2319                                                  &clabel);
 2320                         if (clabel.status == rf_ds_spared) {
 2321                                 /* XXX do something special... 
 2322                                    but whatever you do, don't 
 2323                                    try to access it!! */
 2324                         } else {
 2325                                 raidmarkdirty( 
 2326                                               raidPtr->Disks[c].dev,
 2327                                               raidPtr->raid_cinfo[c].ci_vp,
 2328                                               raidPtr->mod_counter);
 2329                         }
 2330                 }
 2331         } 
 2332 
 2333         for( c = 0; c < raidPtr->numSpare ; c++) {
 2334                 sparecol = raidPtr->numCol + c;
 2335                 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
 2336                         /* 
 2337                            
 2338                            we claim this disk is "optimal" if it's 
 2339                            rf_ds_used_spare, as that means it should be 
 2340                            directly substitutable for the disk it replaced. 
 2341                            We note that too...
 2342 
 2343                          */
 2344 
 2345                         for(j=0;j<raidPtr->numCol;j++) {
 2346                                 if (raidPtr->Disks[j].spareCol == sparecol) {
 2347                                         scol = j;
 2348                                         break;
 2349                                 }
 2350                         }
 2351                                 
 2352                         raidread_component_label( 
 2353                                  raidPtr->Disks[sparecol].dev,
 2354                                  raidPtr->raid_cinfo[sparecol].ci_vp,
 2355                                  &clabel);
 2356                         /* make sure status is noted */
 2357 
 2358                         raid_init_component_label(raidPtr, &clabel);
 2359 
 2360                         clabel.row = 0;
 2361                         clabel.column = scol;
 2362                         /* Note: we *don't* change status from rf_ds_used_spare
 2363                            to rf_ds_optimal */
 2364                         /* clabel.status = rf_ds_optimal; */
 2365                         
 2366                         raidmarkdirty(raidPtr->Disks[sparecol].dev,
 2367                                       raidPtr->raid_cinfo[sparecol].ci_vp,
 2368                                       raidPtr->mod_counter);
 2369                 }
 2370         }
 2371 }
 2372 
 2373 
 2374 void
 2375 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
 2376 {
 2377         RF_ComponentLabel_t clabel;
 2378         int sparecol;
 2379         int c;
 2380         int j;
 2381         int scol;
 2382 
 2383         scol = -1;
 2384 
 2385         /* XXX should do extra checks to make sure things really are clean, 
 2386            rather than blindly setting the clean bit... */
 2387 
 2388         raidPtr->mod_counter++;
 2389 
 2390         for (c = 0; c < raidPtr->numCol; c++) {
 2391                 if (raidPtr->Disks[c].status == rf_ds_optimal) {
 2392                         raidread_component_label(
 2393                                                  raidPtr->Disks[c].dev,
 2394                                                  raidPtr->raid_cinfo[c].ci_vp,
 2395                                                  &clabel);
 2396                                 /* make sure status is noted */
 2397                         clabel.status = rf_ds_optimal;
 2398                                 /* bump the counter */
 2399                         clabel.mod_counter = raidPtr->mod_counter;
 2400 
 2401                         raidwrite_component_label( 
 2402                                                   raidPtr->Disks[c].dev,
 2403                                                   raidPtr->raid_cinfo[c].ci_vp,
 2404                                                   &clabel);
 2405                         if (final == RF_FINAL_COMPONENT_UPDATE) {
 2406                                 if (raidPtr->parity_good == RF_RAID_CLEAN) {
 2407                                         raidmarkclean( 
 2408                                                       raidPtr->Disks[c].dev, 
 2409                                                       raidPtr->raid_cinfo[c].ci_vp,
 2410                                                       raidPtr->mod_counter);
 2411                                 }
 2412                         }
 2413                 } 
 2414                 /* else we don't touch it.. */
 2415         } 
 2416 
 2417         for( c = 0; c < raidPtr->numSpare ; c++) {
 2418                 sparecol = raidPtr->numCol + c;
 2419                 /* Need to ensure that the reconstruct actually completed! */
 2420                 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
 2421                         /* 
 2422                            
 2423                            we claim this disk is "optimal" if it's 
 2424                            rf_ds_used_spare, as that means it should be 
 2425                            directly substitutable for the disk it replaced. 
 2426                            We note that too...
 2427 
 2428                          */
 2429 
 2430                         for(j=0;j<raidPtr->numCol;j++) {
 2431                                 if (raidPtr->Disks[j].spareCol == sparecol) {
 2432                                         scol = j;
 2433                                         break;
 2434                                 }
 2435                         }
 2436                         
 2437                         /* XXX shouldn't *really* need this... */
 2438                         raidread_component_label( 
 2439                                       raidPtr->Disks[sparecol].dev,
 2440                                       raidPtr->raid_cinfo[sparecol].ci_vp,
 2441                                       &clabel);
 2442                         /* make sure status is noted */
 2443 
 2444                         raid_init_component_label(raidPtr, &clabel);
 2445 
 2446                         clabel.mod_counter = raidPtr->mod_counter;
 2447                         clabel.column = scol;
 2448                         clabel.status = rf_ds_optimal;
 2449 
 2450                         raidwrite_component_label(
 2451                                       raidPtr->Disks[sparecol].dev,
 2452                                       raidPtr->raid_cinfo[sparecol].ci_vp,
 2453                                       &clabel);
 2454                         if (final == RF_FINAL_COMPONENT_UPDATE) {
 2455                                 if (raidPtr->parity_good == RF_RAID_CLEAN) {
 2456                                         raidmarkclean( raidPtr->Disks[sparecol].dev,
 2457                                                        raidPtr->raid_cinfo[sparecol].ci_vp,
 2458                                                        raidPtr->mod_counter);
 2459                                 }
 2460                         }
 2461                 }
 2462         }
 2463 }
 2464 
 2465 void
 2466 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
 2467 {
 2468         struct proc *p;
 2469 
 2470         p = raidPtr->engine_thread;
 2471 
 2472         if (vp != NULL) {
 2473                 if (auto_configured == 1) {
 2474                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2475                         VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
 2476                         vput(vp);
 2477                         
 2478                 } else {                                
 2479                         (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
 2480                 }
 2481         } 
 2482 }
 2483 
 2484 
 2485 void
 2486 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
 2487 {
 2488         int r,c; 
 2489         struct vnode *vp;
 2490         int acd;
 2491 
 2492 
 2493         /* We take this opportunity to close the vnodes like we should.. */
 2494 
 2495         for (c = 0; c < raidPtr->numCol; c++) {
 2496                 vp = raidPtr->raid_cinfo[c].ci_vp;
 2497                 acd = raidPtr->Disks[c].auto_configured;
 2498                 rf_close_component(raidPtr, vp, acd);
 2499                 raidPtr->raid_cinfo[c].ci_vp = NULL;
 2500                 raidPtr->Disks[c].auto_configured = 0;
 2501         }
 2502 
 2503         for (r = 0; r < raidPtr->numSpare; r++) {
 2504                 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
 2505                 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
 2506                 rf_close_component(raidPtr, vp, acd);
 2507                 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
 2508                 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
 2509         }
 2510 }
 2511 
 2512 
 2513 void 
 2514 rf_ReconThread(struct rf_recon_req *req)
 2515 {
 2516         int     s;
 2517         RF_Raid_t *raidPtr;
 2518 
 2519         s = splbio();
 2520         raidPtr = (RF_Raid_t *) req->raidPtr;
 2521         raidPtr->recon_in_progress = 1;
 2522 
 2523         rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
 2524                     ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
 2525 
 2526         RF_Free(req, sizeof(*req));
 2527 
 2528         raidPtr->recon_in_progress = 0;
 2529         splx(s);
 2530 
 2531         /* That's all... */
 2532         kthread_exit(0);        /* does not return */
 2533 }
 2534 
 2535 void
 2536 rf_RewriteParityThread(RF_Raid_t *raidPtr)
 2537 {
 2538         int retcode;
 2539         int s;
 2540 
 2541         raidPtr->parity_rewrite_in_progress = 1;
 2542         s = splbio();
 2543         retcode = rf_RewriteParity(raidPtr);
 2544         splx(s);
 2545         if (retcode) {
 2546                 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
 2547         } else {
 2548                 /* set the clean bit!  If we shutdown correctly,
 2549                    the clean bit on each component label will get
 2550                    set */
 2551                 raidPtr->parity_good = RF_RAID_CLEAN;
 2552         }
 2553         raidPtr->parity_rewrite_in_progress = 0;
 2554 
 2555         /* Anyone waiting for us to stop?  If so, inform them... */
 2556         if (raidPtr->waitShutdown) {
 2557                 wakeup(&raidPtr->parity_rewrite_in_progress);
 2558         }
 2559 
 2560         /* That's all... */
 2561         kthread_exit(0);        /* does not return */
 2562 }
 2563 
 2564 
 2565 void
 2566 rf_CopybackThread(RF_Raid_t *raidPtr)
 2567 {
 2568         int s;
 2569 
 2570         raidPtr->copyback_in_progress = 1;
 2571         s = splbio();
 2572         rf_CopybackReconstructedData(raidPtr);
 2573         splx(s);
 2574         raidPtr->copyback_in_progress = 0;
 2575 
 2576         /* That's all... */
 2577         kthread_exit(0);        /* does not return */
 2578 }
 2579 
 2580 
 2581 void
 2582 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
 2583 {
 2584         int s;
 2585         RF_Raid_t *raidPtr;
 2586         
 2587         s = splbio();
 2588         raidPtr = req->raidPtr;
 2589         raidPtr->recon_in_progress = 1;
 2590         rf_ReconstructInPlace(raidPtr, req->col);
 2591         RF_Free(req, sizeof(*req));
 2592         raidPtr->recon_in_progress = 0;
 2593         splx(s);
 2594 
 2595         /* That's all... */
 2596         kthread_exit(0);        /* does not return */
 2597 }
 2598 
 2599 RF_AutoConfig_t *
 2600 rf_find_raid_components()
 2601 {
 2602         struct vnode *vp;
 2603         struct disklabel label;
 2604         struct device *dv;
 2605         dev_t dev;
 2606         int bmajor;
 2607         int error;
 2608         int i;
 2609         int good_one;
 2610         RF_ComponentLabel_t *clabel;
 2611         RF_AutoConfig_t *ac_list;
 2612         RF_AutoConfig_t *ac;
 2613 
 2614 
 2615         /* initialize the AutoConfig list */
 2616         ac_list = NULL;
 2617 
 2618         /* we begin by trolling through *all* the devices on the system */
 2619 
 2620         for (dv = alldevs.tqh_first; dv != NULL;
 2621              dv = dv->dv_list.tqe_next) {
 2622 
 2623                 /* we are only interested in disks... */
 2624                 if (dv->dv_class != DV_DISK)
 2625                         continue;
 2626 
 2627                 /* we don't care about floppies... */
 2628                 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
 2629                         continue;
 2630                 }
 2631 
 2632                 /* we don't care about CD's... */
 2633                 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
 2634                         continue;
 2635                 }
 2636 
 2637                 /* hdfd is the Atari/Hades floppy driver */
 2638                 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
 2639                         continue;
 2640                 }
 2641                 /* fdisa is the Atari/Milan floppy driver */
 2642                 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
 2643                         continue;
 2644                 }
 2645                 
 2646                 /* need to find the device_name_to_block_device_major stuff */
 2647                 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
 2648 
 2649                 /* get a vnode for the raw partition of this disk */
 2650 
 2651                 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
 2652                 if (bdevvp(dev, &vp))
 2653                         panic("RAID can't alloc vnode");
 2654 
 2655                 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
 2656 
 2657                 if (error) {
 2658                         /* "Who cares."  Continue looking 
 2659                            for something that exists*/
 2660                         vput(vp);
 2661                         continue;
 2662                 }
 2663 
 2664                 /* Ok, the disk exists.  Go get the disklabel. */
 2665                 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
 2666                 if (error) {
 2667                         /*
 2668                          * XXX can't happen - open() would
 2669                          * have errored out (or faked up one)
 2670                          */
 2671                         printf("can't get label for dev %s%c (%d)!?!?\n",
 2672                                dv->dv_xname, 'a' + RAW_PART, error);
 2673                 }
 2674 
 2675                 /* don't need this any more.  We'll allocate it again
 2676                    a little later if we really do... */
 2677                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2678                 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
 2679                 vput(vp);
 2680 
 2681                 for (i=0; i < label.d_npartitions; i++) {
 2682                         /* We only support partitions marked as RAID */
 2683                         if (label.d_partitions[i].p_fstype != FS_RAID)
 2684                                 continue;
 2685 
 2686                         dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
 2687                         if (bdevvp(dev, &vp))
 2688                                 panic("RAID can't alloc vnode");
 2689 
 2690                         error = VOP_OPEN(vp, FREAD, NOCRED, 0);
 2691                         if (error) {
 2692                                 /* Whatever... */
 2693                                 vput(vp);
 2694                                 continue;
 2695                         }
 2696 
 2697                         good_one = 0;
 2698 
 2699                         clabel = (RF_ComponentLabel_t *) 
 2700                                 malloc(sizeof(RF_ComponentLabel_t), 
 2701                                        M_RAIDFRAME, M_NOWAIT);
 2702                         if (clabel == NULL) {
 2703                                 /* XXX CLEANUP HERE */
 2704                                 printf("RAID auto config: out of memory!\n");
 2705                                 return(NULL); /* XXX probably should panic? */
 2706                         }
 2707 
 2708                         if (!raidread_component_label(dev, vp, clabel)) {
 2709                                 /* Got the label.  Does it look reasonable? */
 2710                                 if (rf_reasonable_label(clabel) &&
 2711                                     (clabel->partitionSize <= 
 2712                                      label.d_partitions[i].p_size)) {
 2713 #if DEBUG
 2714                                         printf("Component on: %s%c: %d\n", 
 2715                                                dv->dv_xname, 'a'+i,
 2716                                                label.d_partitions[i].p_size);
 2717                                         rf_print_component_label(clabel);
 2718 #endif
 2719                                         /* if it's reasonable, add it, 
 2720                                            else ignore it. */
 2721                                         ac = (RF_AutoConfig_t *)
 2722                                                 malloc(sizeof(RF_AutoConfig_t),
 2723                                                        M_RAIDFRAME,
 2724                                                        M_NOWAIT);
 2725                                         if (ac == NULL) {
 2726                                                 /* XXX should panic?? */
 2727                                                 return(NULL);
 2728                                         }
 2729                                         
 2730                                         sprintf(ac->devname, "%s%c",
 2731                                                 dv->dv_xname, 'a'+i);
 2732                                         ac->dev = dev;
 2733                                         ac->vp = vp;
 2734                                         ac->clabel = clabel;
 2735                                         ac->next = ac_list;
 2736                                         ac_list = ac;
 2737                                         good_one = 1;
 2738                                 } 
 2739                         }
 2740                         if (!good_one) {
 2741                                 /* cleanup */
 2742                                 free(clabel, M_RAIDFRAME);
 2743                                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2744                                 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
 2745                                 vput(vp);
 2746                         }
 2747                 }
 2748         }
 2749         return(ac_list);
 2750 }
 2751                         
 2752 static int
 2753 rf_reasonable_label(RF_ComponentLabel_t *clabel)
 2754 {
 2755         
 2756         if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
 2757              (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
 2758             ((clabel->clean == RF_RAID_CLEAN) ||
 2759              (clabel->clean == RF_RAID_DIRTY)) &&
 2760             clabel->row >=0 && 
 2761             clabel->column >= 0 && 
 2762             clabel->num_rows > 0 &&
 2763             clabel->num_columns > 0 &&
 2764             clabel->row < clabel->num_rows && 
 2765             clabel->column < clabel->num_columns &&
 2766             clabel->blockSize > 0 &&
 2767             clabel->numBlocks > 0) {
 2768                 /* label looks reasonable enough... */
 2769                 return(1);
 2770         }
 2771         return(0);
 2772 }
 2773 
 2774 
 2775 #if DEBUG
 2776 void
 2777 rf_print_component_label(RF_ComponentLabel_t *clabel)
 2778 {
 2779         printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
 2780                clabel->row, clabel->column, 
 2781                clabel->num_rows, clabel->num_columns);
 2782         printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
 2783                clabel->version, clabel->serial_number,
 2784                clabel->mod_counter);
 2785         printf("   Clean: %s Status: %d\n",
 2786                clabel->clean ? "Yes" : "No", clabel->status );
 2787         printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
 2788                clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
 2789         printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
 2790                (char) clabel->parityConfig, clabel->blockSize, 
 2791                clabel->numBlocks);
 2792         printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
 2793         printf("   Contains root partition: %s\n",  
 2794                clabel->root_partition ? "Yes" : "No" );
 2795         printf("   Last configured as: raid%d\n", clabel->last_unit );
 2796 #if 0
 2797            printf("   Config order: %d\n", clabel->config_order);
 2798 #endif
 2799                
 2800 }
 2801 #endif
 2802 
 2803 RF_ConfigSet_t *
 2804 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
 2805 {
 2806         RF_AutoConfig_t *ac;
 2807         RF_ConfigSet_t *config_sets;
 2808         RF_ConfigSet_t *cset;
 2809         RF_AutoConfig_t *ac_next;
 2810 
 2811 
 2812         config_sets = NULL;
 2813 
 2814         /* Go through the AutoConfig list, and figure out which components
 2815            belong to what sets.  */
 2816         ac = ac_list;
 2817         while(ac!=NULL) {
 2818                 /* we're going to putz with ac->next, so save it here
 2819                    for use at the end of the loop */
 2820                 ac_next = ac->next;
 2821 
 2822                 if (config_sets == NULL) {
 2823                         /* will need at least this one... */
 2824                         config_sets = (RF_ConfigSet_t *)
 2825                                 malloc(sizeof(RF_ConfigSet_t), 
 2826                                        M_RAIDFRAME, M_NOWAIT);
 2827                         if (config_sets == NULL) {
 2828                                 panic("rf_create_auto_sets: No memory!");
 2829                         }
 2830                         /* this one is easy :) */
 2831                         config_sets->ac = ac;
 2832                         config_sets->next = NULL;
 2833                         config_sets->rootable = 0;
 2834                         ac->next = NULL;
 2835                 } else {
 2836                         /* which set does this component fit into? */
 2837                         cset = config_sets;
 2838                         while(cset!=NULL) {
 2839                                 if (rf_does_it_fit(cset, ac)) {
 2840                                         /* looks like it matches... */
 2841                                         ac->next = cset->ac;
 2842                                         cset->ac = ac;
 2843                                         break;
 2844                                 }
 2845                                 cset = cset->next;
 2846                         }
 2847                         if (cset==NULL) {
 2848                                 /* didn't find a match above... new set..*/
 2849                                 cset = (RF_ConfigSet_t *)
 2850                                         malloc(sizeof(RF_ConfigSet_t), 
 2851                                                M_RAIDFRAME, M_NOWAIT);
 2852                                 if (cset == NULL) {
 2853                                         panic("rf_create_auto_sets: No memory!");
 2854                                 }
 2855                                 cset->ac = ac;
 2856                                 ac->next = NULL;
 2857                                 cset->next = config_sets;
 2858                                 cset->rootable = 0;
 2859                                 config_sets = cset;
 2860                         }
 2861                 }
 2862                 ac = ac_next;
 2863         }
 2864 
 2865 
 2866         return(config_sets);
 2867 }
 2868 
 2869 static int
 2870 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
 2871 {
 2872         RF_ComponentLabel_t *clabel1, *clabel2;
 2873 
 2874         /* If this one matches the *first* one in the set, that's good
 2875            enough, since the other members of the set would have been
 2876            through here too... */
 2877         /* note that we are not checking partitionSize here..
 2878 
 2879            Note that we are also not checking the mod_counters here.
 2880            If everything else matches execpt the mod_counter, that's 
 2881            good enough for this test.  We will deal with the mod_counters
 2882            a little later in the autoconfiguration process.  
 2883 
 2884             (clabel1->mod_counter == clabel2->mod_counter) &&
 2885 
 2886            The reason we don't check for this is that failed disks
 2887            will have lower modification counts.  If those disks are
 2888            not added to the set they used to belong to, then they will
 2889            form their own set, which may result in 2 different sets,
 2890            for example, competing to be configured at raid0, and
 2891            perhaps competing to be the root filesystem set.  If the
 2892            wrong ones get configured, or both attempt to become /,
 2893            weird behaviour and or serious lossage will occur.  Thus we
 2894            need to bring them into the fold here, and kick them out at
 2895            a later point.
 2896 
 2897         */
 2898 
 2899         clabel1 = cset->ac->clabel;
 2900         clabel2 = ac->clabel;
 2901         if ((clabel1->version == clabel2->version) &&
 2902             (clabel1->serial_number == clabel2->serial_number) &&
 2903             (clabel1->num_rows == clabel2->num_rows) &&
 2904             (clabel1->num_columns == clabel2->num_columns) &&
 2905             (clabel1->sectPerSU == clabel2->sectPerSU) &&
 2906             (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
 2907             (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
 2908             (clabel1->parityConfig == clabel2->parityConfig) &&
 2909             (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
 2910             (clabel1->blockSize == clabel2->blockSize) &&
 2911             (clabel1->numBlocks == clabel2->numBlocks) &&
 2912             (clabel1->autoconfigure == clabel2->autoconfigure) &&
 2913             (clabel1->root_partition == clabel2->root_partition) &&
 2914             (clabel1->last_unit == clabel2->last_unit) &&
 2915             (clabel1->config_order == clabel2->config_order)) {
 2916                 /* if it get's here, it almost *has* to be a match */
 2917         } else {
 2918                 /* it's not consistent with somebody in the set.. 
 2919                    punt */
 2920                 return(0);
 2921         }
 2922         /* all was fine.. it must fit... */
 2923         return(1);
 2924 }
 2925 
 2926 int
 2927 rf_have_enough_components(RF_ConfigSet_t *cset)
 2928 {
 2929         RF_AutoConfig_t *ac;
 2930         RF_AutoConfig_t *auto_config;
 2931         RF_ComponentLabel_t *clabel;
 2932         int c;
 2933         int num_cols;
 2934         int num_missing;
 2935         int mod_counter;
 2936         int mod_counter_found;
 2937         int even_pair_failed;
 2938         char parity_type;
 2939         
 2940 
 2941         /* check to see that we have enough 'live' components
 2942            of this set.  If so, we can configure it if necessary */
 2943 
 2944         num_cols = cset->ac->clabel->num_columns;
 2945         parity_type = cset->ac->clabel->parityConfig;
 2946 
 2947         /* XXX Check for duplicate components!?!?!? */
 2948 
 2949         /* Determine what the mod_counter is supposed to be for this set. */
 2950 
 2951         mod_counter_found = 0;
 2952         mod_counter = 0;
 2953         ac = cset->ac;
 2954         while(ac!=NULL) {
 2955                 if (mod_counter_found==0) {
 2956                         mod_counter = ac->clabel->mod_counter;
 2957                         mod_counter_found = 1;
 2958                 } else {
 2959                         if (ac->clabel->mod_counter > mod_counter) {
 2960                                 mod_counter = ac->clabel->mod_counter;
 2961                         }
 2962                 }
 2963                 ac = ac->next;
 2964         }
 2965 
 2966         num_missing = 0;
 2967         auto_config = cset->ac;
 2968 
 2969         even_pair_failed = 0;
 2970         for(c=0; c<num_cols; c++) {
 2971                 ac = auto_config;
 2972                 while(ac!=NULL) {
 2973                         if ((ac->clabel->column == c) && 
 2974                             (ac->clabel->mod_counter == mod_counter)) {
 2975                                 /* it's this one... */
 2976 #if DEBUG
 2977                                 printf("Found: %s at %d\n",
 2978                                        ac->devname,c);
 2979 #endif
 2980                                 break;
 2981                         }
 2982                         ac=ac->next;
 2983                 }
 2984                 if (ac==NULL) {
 2985                                 /* Didn't find one here! */
 2986                                 /* special case for RAID 1, especially
 2987                                    where there are more than 2
 2988                                    components (where RAIDframe treats
 2989                                    things a little differently :( ) */
 2990                         if (parity_type == '1') {
 2991                                 if (c%2 == 0) { /* even component */
 2992                                         even_pair_failed = 1;
 2993                                 } else { /* odd component.  If
 2994                                             we're failed, and
 2995                                             so is the even
 2996                                             component, it's
 2997                                             "Good Night, Charlie" */
 2998                                         if (even_pair_failed == 1) {
 2999                                                 return(0);
 3000                                         }
 3001                                 }
 3002                         } else {
 3003                                 /* normal accounting */
 3004                                 num_missing++;
 3005                         }
 3006                 }
 3007                 if ((parity_type == '1') && (c%2 == 1)) {
 3008                                 /* Just did an even component, and we didn't
 3009                                    bail.. reset the even_pair_failed flag, 
 3010                                    and go on to the next component.... */
 3011                         even_pair_failed = 0;
 3012                 }
 3013         }
 3014 
 3015         clabel = cset->ac->clabel;
 3016 
 3017         if (((clabel->parityConfig == '') && (num_missing > 0)) ||
 3018             ((clabel->parityConfig == '4') && (num_missing > 1)) ||
 3019             ((clabel->parityConfig == '5') && (num_missing > 1))) {
 3020                 /* XXX this needs to be made *much* more general */
 3021                 /* Too many failures */
 3022                 return(0);
 3023         }
 3024         /* otherwise, all is well, and we've got enough to take a kick
 3025            at autoconfiguring this set */
 3026         return(1);
 3027 }
 3028 
 3029 void
 3030 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
 3031                         RF_Raid_t *raidPtr)
 3032 {
 3033         RF_ComponentLabel_t *clabel;
 3034         int i;
 3035 
 3036         clabel = ac->clabel;
 3037 
 3038         /* 1. Fill in the common stuff */
 3039         config->numRow = clabel->num_rows = 1;
 3040         config->numCol = clabel->num_columns;
 3041         config->numSpare = 0; /* XXX should this be set here? */
 3042         config->sectPerSU = clabel->sectPerSU;
 3043         config->SUsPerPU = clabel->SUsPerPU;
 3044         config->SUsPerRU = clabel->SUsPerRU;
 3045         config->parityConfig = clabel->parityConfig;
 3046         /* XXX... */
 3047         strcpy(config->diskQueueType,"fifo");
 3048         config->maxOutstandingDiskReqs = clabel->maxOutstanding;
 3049         config->layoutSpecificSize = 0; /* XXX ?? */
 3050 
 3051         while(ac!=NULL) {
 3052                 /* row/col values will be in range due to the checks
 3053                    in reasonable_label() */
 3054                 strcpy(config->devnames[0][ac->clabel->column],
 3055                        ac->devname);
 3056                 ac = ac->next;
 3057         }
 3058 
 3059         for(i=0;i<RF_MAXDBGV;i++) {
 3060                 config->debugVars[i][0] = 0;
 3061         }
 3062 }
 3063 
 3064 int
 3065 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
 3066 {
 3067         RF_ComponentLabel_t clabel;
 3068         struct vnode *vp;
 3069         dev_t dev;
 3070         int column;
 3071         int sparecol;
 3072 
 3073         raidPtr->autoconfigure = new_value;
 3074 
 3075         for(column=0; column<raidPtr->numCol; column++) {
 3076                 if (raidPtr->Disks[column].status == rf_ds_optimal) {
 3077                         dev = raidPtr->Disks[column].dev;
 3078                         vp = raidPtr->raid_cinfo[column].ci_vp;
 3079                         raidread_component_label(dev, vp, &clabel);
 3080                         clabel.autoconfigure = new_value;
 3081                         raidwrite_component_label(dev, vp, &clabel);
 3082                 }
 3083         }
 3084         for(column = 0; column < raidPtr->numSpare ; column++) {
 3085                 sparecol = raidPtr->numCol + column;
 3086                 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
 3087                         dev = raidPtr->Disks[sparecol].dev;
 3088                         vp = raidPtr->raid_cinfo[sparecol].ci_vp;
 3089                         raidread_component_label(dev, vp, &clabel);
 3090                         clabel.autoconfigure = new_value;
 3091                         raidwrite_component_label(dev, vp, &clabel);
 3092                 }
 3093         }
 3094         return(new_value);
 3095 }
 3096 
 3097 int
 3098 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
 3099 {
 3100         RF_ComponentLabel_t clabel;
 3101         struct vnode *vp;
 3102         dev_t dev;
 3103         int column;
 3104         int sparecol;
 3105 
 3106         raidPtr->root_partition = new_value;
 3107         for(column=0; column<raidPtr->numCol; column++) {
 3108                 if (raidPtr->Disks[column].status == rf_ds_optimal) {
 3109                         dev = raidPtr->Disks[column].dev;
 3110                         vp = raidPtr->raid_cinfo[column].ci_vp;
 3111                         raidread_component_label(dev, vp, &clabel);
 3112                         clabel.root_partition = new_value;
 3113                         raidwrite_component_label(dev, vp, &clabel);
 3114                 }
 3115         }
 3116         for(column = 0; column < raidPtr->numSpare ; column++) {
 3117                 sparecol = raidPtr->numCol + column;
 3118                 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
 3119                         dev = raidPtr->Disks[sparecol].dev;
 3120                         vp = raidPtr->raid_cinfo[sparecol].ci_vp;
 3121                         raidread_component_label(dev, vp, &clabel);
 3122                         clabel.root_partition = new_value;
 3123                         raidwrite_component_label(dev, vp, &clabel);
 3124                 }
 3125         }
 3126         return(new_value);
 3127 }
 3128 
 3129 void
 3130 rf_release_all_vps(RF_ConfigSet_t *cset)
 3131 {
 3132         RF_AutoConfig_t *ac;
 3133         
 3134         ac = cset->ac;
 3135         while(ac!=NULL) {
 3136                 /* Close the vp, and give it back */
 3137                 if (ac->vp) {
 3138                         vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
 3139                         VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
 3140                         vput(ac->vp);
 3141                         ac->vp = NULL;
 3142                 }
 3143                 ac = ac->next;
 3144         }
 3145 }
 3146 
 3147 
 3148 void
 3149 rf_cleanup_config_set(RF_ConfigSet_t *cset)
 3150 {
 3151         RF_AutoConfig_t *ac;
 3152         RF_AutoConfig_t *next_ac;
 3153         
 3154         ac = cset->ac;
 3155         while(ac!=NULL) {
 3156                 next_ac = ac->next;
 3157                 /* nuke the label */
 3158                 free(ac->clabel, M_RAIDFRAME);
 3159                 /* cleanup the config structure */
 3160                 free(ac, M_RAIDFRAME);
 3161                 /* "next.." */
 3162                 ac = next_ac;
 3163         }
 3164         /* and, finally, nuke the config set */
 3165         free(cset, M_RAIDFRAME);
 3166 }
 3167 
 3168 
 3169 void
 3170 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
 3171 {
 3172         /* current version number */
 3173         clabel->version = RF_COMPONENT_LABEL_VERSION; 
 3174         clabel->serial_number = raidPtr->serial_number;
 3175         clabel->mod_counter = raidPtr->mod_counter;
 3176         clabel->num_rows = 1;
 3177         clabel->num_columns = raidPtr->numCol;
 3178         clabel->clean = RF_RAID_DIRTY; /* not clean */
 3179         clabel->status = rf_ds_optimal; /* "It's good!" */
 3180         
 3181         clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
 3182         clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
 3183         clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
 3184 
 3185         clabel->blockSize = raidPtr->bytesPerSector;
 3186         clabel->numBlocks = raidPtr->sectorsPerDisk;
 3187 
 3188         /* XXX not portable */
 3189         clabel->parityConfig = raidPtr->Layout.map->parityConfig;
 3190         clabel->maxOutstanding = raidPtr->maxOutstanding;
 3191         clabel->autoconfigure = raidPtr->autoconfigure;
 3192         clabel->root_partition = raidPtr->root_partition;
 3193         clabel->last_unit = raidPtr->raidid;
 3194         clabel->config_order = raidPtr->config_order;
 3195 }
 3196 
 3197 int
 3198 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
 3199 {
 3200         RF_Raid_t *raidPtr;
 3201         RF_Config_t *config;
 3202         int raidID;
 3203         int retcode;
 3204 
 3205 #if DEBUG
 3206         printf("RAID autoconfigure\n");
 3207 #endif
 3208 
 3209         retcode = 0;
 3210         *unit = -1;
 3211 
 3212         /* 1. Create a config structure */
 3213 
 3214         config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
 3215                                        M_RAIDFRAME,
 3216                                        M_NOWAIT);
 3217         if (config==NULL) {
 3218                 printf("Out of mem!?!?\n");
 3219                                 /* XXX do something more intelligent here. */
 3220                 return(1);
 3221         }
 3222 
 3223         memset(config, 0, sizeof(RF_Config_t));
 3224 
 3225         /* 
 3226            2. Figure out what RAID ID this one is supposed to live at 
 3227            See if we can get the same RAID dev that it was configured
 3228            on last time.. 
 3229         */
 3230 
 3231         raidID = cset->ac->clabel->last_unit;
 3232         if ((raidID < 0) || (raidID >= numraid)) {
 3233                 /* let's not wander off into lala land. */
 3234                 raidID = numraid - 1;
 3235         }
 3236         if (raidPtrs[raidID]->valid != 0) {
 3237 
 3238                 /* 
 3239                    Nope... Go looking for an alternative...  
 3240                    Start high so we don't immediately use raid0 if that's
 3241                    not taken. 
 3242                 */
 3243 
 3244                 for(raidID = numraid - 1; raidID >= 0; raidID--) {
 3245                         if (raidPtrs[raidID]->valid == 0) {
 3246                                 /* can use this one! */
 3247                                 break;
 3248                         }
 3249                 }
 3250         }
 3251 
 3252         if (raidID < 0) {
 3253                 /* punt... */
 3254                 printf("Unable to auto configure this set!\n");
 3255                 printf("(Out of RAID devs!)\n");
 3256                 return(1);
 3257         }
 3258 
 3259 #if DEBUG
 3260         printf("Configuring raid%d:\n",raidID);
 3261 #endif
 3262 
 3263         raidPtr = raidPtrs[raidID];
 3264 
 3265         /* XXX all this stuff should be done SOMEWHERE ELSE! */
 3266         raidPtr->raidid = raidID;
 3267         raidPtr->openings = RAIDOUTSTANDING;
 3268 
 3269         /* 3. Build the configuration structure */
 3270         rf_create_configuration(cset->ac, config, raidPtr);
 3271 
 3272         /* 4. Do the configuration */
 3273         retcode = rf_Configure(raidPtr, config, cset->ac);
 3274         
 3275         if (retcode == 0) {
 3276 
 3277                 raidinit(raidPtrs[raidID]);
 3278 
 3279                 rf_markalldirty(raidPtrs[raidID]);
 3280                 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
 3281                 if (cset->ac->clabel->root_partition==1) {
 3282                         /* everything configured just fine.  Make a note
 3283                            that this set is eligible to be root. */
 3284                         cset->rootable = 1;
 3285                         /* XXX do this here? */
 3286                         raidPtrs[raidID]->root_partition = 1; 
 3287                 }
 3288         }
 3289 
 3290         /* 5. Cleanup */
 3291         free(config, M_RAIDFRAME);
 3292         
 3293         *unit = raidID;
 3294         return(retcode);
 3295 }
 3296 
 3297 void
 3298 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
 3299 {
 3300         struct buf *bp;
 3301 
 3302         bp = (struct buf *)desc->bp;
 3303         disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev, 
 3304             (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
 3305 }
 3306 
 3307 void
 3308 rf_pool_init(struct pool *p, size_t size, char *w_chan, 
 3309              size_t min, size_t max)
 3310 {
 3311         pool_init(p, size, 0, 0, 0, w_chan, NULL);      
 3312         pool_sethiwat(p, max);
 3313         pool_prime(p, min);
 3314         pool_setlowat(p, min);
 3315 }

Cache object: bb06536df13b0a82b7850078d9e40722


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.