1 /* $NetBSD: rf_copyback.c,v 1.28 2004/03/04 02:49:58 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /*****************************************************************************
30 *
31 * copyback.c -- code to copy reconstructed data back from spare space to
32 * the replaced disk.
33 *
34 * the code operates using callbacks on the I/Os to continue with the
35 * next unit to be copied back. We do this because a simple loop
36 * containing blocking I/Os will not work in the simulator.
37 *
38 ****************************************************************************/
39
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: rf_copyback.c,v 1.28 2004/03/04 02:49:58 oster Exp $");
42
43 #include <dev/raidframe/raidframevar.h>
44
45 #include <sys/time.h>
46 #include <sys/buf.h>
47 #include "rf_raid.h"
48 #include "rf_mcpair.h"
49 #include "rf_acctrace.h"
50 #include "rf_etimer.h"
51 #include "rf_general.h"
52 #include "rf_utils.h"
53 #include "rf_copyback.h"
54 #include "rf_decluster.h"
55 #include "rf_driver.h"
56 #include "rf_shutdown.h"
57 #include "rf_kintf.h"
58
59 #define RF_COPYBACK_DATA 0
60 #define RF_COPYBACK_PARITY 1
61
62 int rf_copyback_in_progress;
63
64 static int rf_CopybackReadDoneProc(RF_CopybackDesc_t * desc, int status);
65 static int rf_CopybackWriteDoneProc(RF_CopybackDesc_t * desc, int status);
66 static void rf_CopybackOne(RF_CopybackDesc_t * desc, int typ,
67 RF_RaidAddr_t addr, RF_RowCol_t testCol,
68 RF_SectorNum_t testOffs);
69 static void rf_CopybackComplete(RF_CopybackDesc_t * desc, int status);
70
71 int
72 rf_ConfigureCopyback(listp)
73 RF_ShutdownList_t **listp;
74 {
75 rf_copyback_in_progress = 0;
76 return (0);
77 }
78
79 #include <sys/param.h>
80 #include <sys/systm.h>
81 #include <sys/proc.h>
82 #include <sys/ioctl.h>
83 #include <sys/fcntl.h>
84 #include <sys/vnode.h>
85
86 /* do a complete copyback */
87 void
88 rf_CopybackReconstructedData(RF_Raid_t *raidPtr)
89 {
90 RF_ComponentLabel_t c_label;
91 int found, retcode;
92 RF_CopybackDesc_t *desc;
93 RF_RowCol_t fcol;
94 RF_RaidDisk_t *badDisk;
95 char *databuf;
96
97 struct partinfo dpart;
98 struct vnode *vp;
99 struct vattr va;
100 struct proc *proc;
101
102 int ac;
103
104 fcol = 0;
105 found = 0;
106 for (fcol = 0; fcol < raidPtr->numCol; fcol++) {
107 if (raidPtr->Disks[fcol].status == rf_ds_dist_spared
108 || raidPtr->Disks[fcol].status == rf_ds_spared) {
109 found = 1;
110 break;
111 }
112 }
113
114 if (!found) {
115 printf("raid%d: no disks need copyback\n", raidPtr->raidid);
116 return;
117 }
118
119 badDisk = &raidPtr->Disks[fcol];
120
121 proc = raidPtr->engine_thread;
122
123 /* This device may have been opened successfully the first time. Close
124 * it before trying to open it again.. */
125
126 if (raidPtr->raid_cinfo[fcol].ci_vp != NULL) {
127 printf("Closed the open device: %s\n",
128 raidPtr->Disks[fcol].devname);
129 vp = raidPtr->raid_cinfo[fcol].ci_vp;
130 ac = raidPtr->Disks[fcol].auto_configured;
131 rf_close_component(raidPtr, vp, ac);
132 raidPtr->raid_cinfo[fcol].ci_vp = NULL;
133
134 }
135 /* note that this disk was *not* auto_configured (any longer) */
136 raidPtr->Disks[fcol].auto_configured = 0;
137
138 printf("About to (re-)open the device: %s\n",
139 raidPtr->Disks[fcol].devname);
140
141 retcode = raidlookup(raidPtr->Disks[fcol].devname, proc, &vp);
142
143 if (retcode) {
144 printf("raid%d: copyback: raidlookup on device: %s failed: %d!\n",
145 raidPtr->raidid, raidPtr->Disks[fcol].devname,
146 retcode);
147
148 /* XXX the component isn't responding properly... must be
149 * still dead :-( */
150 return;
151
152 } else {
153
154 /* Ok, so we can at least do a lookup... How about actually
155 * getting a vp for it? */
156
157 if ((retcode = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
158 return;
159 }
160 retcode = VOP_IOCTL(vp, DIOCGPART, &dpart,
161 FREAD, proc->p_ucred, proc);
162 if (retcode) {
163 return;
164 }
165 raidPtr->Disks[fcol].blockSize = dpart.disklab->d_secsize;
166
167 raidPtr->Disks[fcol].numBlocks = dpart.part->p_size -
168 rf_protectedSectors;
169
170 raidPtr->raid_cinfo[fcol].ci_vp = vp;
171 raidPtr->raid_cinfo[fcol].ci_dev = va.va_rdev;
172
173 raidPtr->Disks[fcol].dev = va.va_rdev; /* XXX or the above? */
174
175 /* we allow the user to specify that only a fraction of the
176 * disks should be used this is just for debug: it speeds up
177 * the parity scan */
178 raidPtr->Disks[fcol].numBlocks =
179 raidPtr->Disks[fcol].numBlocks *
180 rf_sizePercentage / 100;
181 }
182
183 if (retcode) {
184 printf("raid%d: copyback: target disk failed TUR\n",
185 raidPtr->raidid);
186 return;
187 }
188 /* get a buffer to hold one SU */
189 RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (char *));
190
191 /* create a descriptor */
192 RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *));
193 desc->raidPtr = raidPtr;
194 desc->status = 0;
195 desc->fcol = fcol;
196 desc->spCol = badDisk->spareCol;
197 desc->stripeAddr = 0;
198 desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
199 desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.numDataCol;
200 desc->databuf = databuf;
201 desc->mcpair = rf_AllocMCPair();
202
203 /* quiesce the array, since we don't want to code support for user
204 * accs here */
205 rf_SuspendNewRequestsAndWait(raidPtr);
206
207 /* adjust state of the array and of the disks */
208 RF_LOCK_MUTEX(raidPtr->mutex);
209 raidPtr->Disks[desc->fcol].status = rf_ds_optimal;
210 raidPtr->status = rf_rs_optimal;
211 rf_copyback_in_progress = 1; /* debug only */
212 RF_UNLOCK_MUTEX(raidPtr->mutex);
213
214 RF_GETTIME(desc->starttime);
215 rf_ContinueCopyback(desc);
216
217 /* Data has been restored. Fix up the component label. */
218 /* Don't actually need the read here.. */
219 raidread_component_label( raidPtr->raid_cinfo[fcol].ci_dev,
220 raidPtr->raid_cinfo[fcol].ci_vp,
221 &c_label);
222
223 raid_init_component_label( raidPtr, &c_label );
224
225 c_label.row = 0;
226 c_label.column = fcol;
227 c_label.partitionSize = raidPtr->Disks[fcol].partitionSize;
228
229 raidwrite_component_label( raidPtr->raid_cinfo[fcol].ci_dev,
230 raidPtr->raid_cinfo[fcol].ci_vp,
231 &c_label);
232 rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
233 }
234
235
236 /*
237 * invoked via callback after a copyback I/O has completed to
238 * continue on with the next one
239 */
240 void
241 rf_ContinueCopyback(RF_CopybackDesc_t *desc)
242 {
243 RF_SectorNum_t testOffs, stripeAddr;
244 RF_Raid_t *raidPtr = desc->raidPtr;
245 RF_RaidAddr_t addr;
246 RF_RowCol_t testCol;
247 #if RF_DEBUG_RECON
248 int old_pctg, new_pctg;
249 struct timeval t, diff;
250 #endif
251 int done;
252
253 #if RF_DEBUG_RECON
254 old_pctg = (-1);
255 #endif
256 while (1) {
257 stripeAddr = desc->stripeAddr;
258 desc->raidPtr->copyback_stripes_done = stripeAddr
259 / desc->sectPerStripe;
260 #if RF_DEBUG_RECON
261 if (rf_prReconSched) {
262 old_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors;
263 }
264 #endif
265 desc->stripeAddr += desc->sectPerStripe;
266 #if RF_DEBUG_RECON
267 if (rf_prReconSched) {
268 new_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors;
269 if (new_pctg != old_pctg) {
270 RF_GETTIME(t);
271 RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
272 printf("%d %d.%06d\n", new_pctg, (int) diff.tv_sec, (int) diff.tv_usec);
273 }
274 }
275 #endif
276 if (stripeAddr >= raidPtr->totalSectors) {
277 rf_CopybackComplete(desc, 0);
278 return;
279 }
280 /* walk through the current stripe, su-by-su */
281 for (done = 0, addr = stripeAddr; addr < stripeAddr + desc->sectPerStripe; addr += desc->sectPerSU) {
282
283 /* map the SU, disallowing remap to spare space */
284 (raidPtr->Layout.map->MapSector) (raidPtr, addr, &testCol, &testOffs, RF_DONT_REMAP);
285
286 if (testCol == desc->fcol) {
287 rf_CopybackOne(desc, RF_COPYBACK_DATA, addr, testCol, testOffs);
288 done = 1;
289 break;
290 }
291 }
292
293 if (!done) {
294 /* we didn't find the failed disk in the data part.
295 * check parity. */
296
297 /* map the parity for this stripe, disallowing remap
298 * to spare space */
299 (raidPtr->Layout.map->MapParity) (raidPtr, stripeAddr, &testCol, &testOffs, RF_DONT_REMAP);
300
301 if (testCol == desc->fcol) {
302 rf_CopybackOne(desc, RF_COPYBACK_PARITY, stripeAddr, testCol, testOffs);
303 }
304 }
305 /* check to see if the last read/write pair failed */
306 if (desc->status) {
307 rf_CopybackComplete(desc, 1);
308 return;
309 }
310 /* we didn't find any units to copy back in this stripe.
311 * Continue with the next one */
312 }
313 }
314
315
316 /* copyback one unit */
317 static void
318 rf_CopybackOne(RF_CopybackDesc_t *desc, int typ, RF_RaidAddr_t addr,
319 RF_RowCol_t testCol, RF_SectorNum_t testOffs)
320 {
321 RF_SectorCount_t sectPerSU = desc->sectPerSU;
322 RF_Raid_t *raidPtr = desc->raidPtr;
323 RF_RowCol_t spCol = desc->spCol;
324 RF_SectorNum_t spOffs;
325
326 /* find the spare spare location for this SU */
327 if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
328 if (typ == RF_COPYBACK_DATA)
329 raidPtr->Layout.map->MapSector(raidPtr, addr, &spCol, &spOffs, RF_REMAP);
330 else
331 raidPtr->Layout.map->MapParity(raidPtr, addr, &spCol, &spOffs, RF_REMAP);
332 } else {
333 spOffs = testOffs;
334 }
335
336 /* create reqs to read the old location & write the new */
337 desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs,
338 sectPerSU, desc->databuf, 0L, 0,
339 (int (*) (void *, int)) rf_CopybackReadDoneProc, desc,
340 NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL);
341 desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs,
342 sectPerSU, desc->databuf, 0L, 0,
343 (int (*) (void *, int)) rf_CopybackWriteDoneProc, desc,
344 NULL, NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL);
345 desc->fcol = testCol;
346
347 /* enqueue the read. the write will go out as part of the callback on
348 * the read. at user-level & in the kernel, wait for the read-write
349 * pair to complete. in the simulator, just return, since everything
350 * will happen as callbacks */
351
352 RF_LOCK_MUTEX(desc->mcpair->mutex);
353 desc->mcpair->flag = 0;
354 RF_UNLOCK_MUTEX(desc->mcpair->mutex);
355
356 rf_DiskIOEnqueue(&raidPtr->Queues[spCol], desc->readreq, RF_IO_NORMAL_PRIORITY);
357
358 RF_LOCK_MUTEX(desc->mcpair->mutex);
359 while (!desc->mcpair->flag) {
360 RF_WAIT_MCPAIR(desc->mcpair);
361 }
362 RF_UNLOCK_MUTEX(desc->mcpair->mutex);
363 rf_FreeDiskQueueData(desc->readreq);
364 rf_FreeDiskQueueData(desc->writereq);
365
366 }
367
368
369 /* called at interrupt context when the read has completed. just send out the write */
370 static int
371 rf_CopybackReadDoneProc(RF_CopybackDesc_t *desc, int status)
372 {
373 if (status) { /* invoke the callback with bad status */
374 printf("raid%d: copyback read failed. Aborting.\n",
375 desc->raidPtr->raidid);
376 (desc->writereq->CompleteFunc) (desc, -100);
377 } else {
378 rf_DiskIOEnqueue(&(desc->raidPtr->Queues[desc->fcol]), desc->writereq, RF_IO_NORMAL_PRIORITY);
379 }
380 return (0);
381 }
382 /* called at interrupt context when the write has completed.
383 * at user level & in the kernel, wake up the copyback thread.
384 * in the simulator, invoke the next copyback directly.
385 * can't free diskqueuedata structs in the kernel b/c we're at interrupt context.
386 */
387 static int
388 rf_CopybackWriteDoneProc(RF_CopybackDesc_t *desc, int status)
389 {
390 if (status && status != -100) {
391 printf("raid%d: copyback write failed. Aborting.\n",
392 desc->raidPtr->raidid);
393 }
394 desc->status = status;
395 rf_MCPairWakeupFunc(desc->mcpair);
396 return (0);
397 }
398 /* invoked when the copyback has completed */
399 static void
400 rf_CopybackComplete(RF_CopybackDesc_t *desc, int status)
401 {
402 RF_Raid_t *raidPtr = desc->raidPtr;
403 struct timeval t, diff;
404
405 if (!status) {
406 RF_LOCK_MUTEX(raidPtr->mutex);
407 if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
408 RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D');
409 rf_FreeSpareTable(raidPtr);
410 } else {
411 raidPtr->Disks[desc->spCol].status = rf_ds_spare;
412 }
413 RF_UNLOCK_MUTEX(raidPtr->mutex);
414
415 RF_GETTIME(t);
416 RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
417 #if 0
418 printf("Copyback time was %d.%06d seconds\n",
419 (int) diff.tv_sec, (int) diff.tv_usec);
420 #endif
421 } else
422 printf("raid%d: Copyback failure. Status: %d\n",
423 raidPtr->raidid, status);
424
425 RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU));
426 rf_FreeMCPair(desc->mcpair);
427 RF_Free(desc, sizeof(*desc));
428
429 rf_copyback_in_progress = 0;
430 rf_ResumeNewRequests(raidPtr);
431 }
Cache object: 69422fae3278e88e18b394580f43c76c
|