1 /* $NetBSD: rf_paritylog.c,v 1.9 2002/09/14 17:53:58 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /* Code for manipulating in-core parity logs
30 *
31 */
32
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.9 2002/09/14 17:53:58 oster Exp $");
35
36 #include "rf_archs.h"
37
38 #if RF_INCLUDE_PARITYLOGGING > 0
39
40 /*
41 * Append-only log for recording parity "update" and "overwrite" records
42 */
43
44 #include <dev/raidframe/raidframevar.h>
45
46 #include "rf_threadstuff.h"
47 #include "rf_mcpair.h"
48 #include "rf_raid.h"
49 #include "rf_dag.h"
50 #include "rf_dagfuncs.h"
51 #include "rf_desc.h"
52 #include "rf_layout.h"
53 #include "rf_diskqueue.h"
54 #include "rf_etimer.h"
55 #include "rf_paritylog.h"
56 #include "rf_general.h"
57 #include "rf_map.h"
58 #include "rf_paritylogging.h"
59 #include "rf_paritylogDiskMgr.h"
60
61 static RF_CommonLogData_t *
62 AllocParityLogCommonData(RF_Raid_t * raidPtr)
63 {
64 RF_CommonLogData_t *common = NULL;
65 int rc;
66
67 /* Return a struct for holding common parity log information from the
68 * free list (rf_parityLogDiskQueue.freeCommonList). If the free list
69 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
70
71 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
72 if (raidPtr->parityLogDiskQueue.freeCommonList) {
73 common = raidPtr->parityLogDiskQueue.freeCommonList;
74 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
75 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
76 } else {
77 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
78 RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
79 rc = rf_mutex_init(&common->mutex);
80 if (rc) {
81 rf_print_unable_to_init_mutex(__FILE__, __LINE__, rc);
82 RF_Free(common, sizeof(RF_CommonLogData_t));
83 common = NULL;
84 }
85 }
86 common->next = NULL;
87 return (common);
88 }
89
90 static void
91 FreeParityLogCommonData(RF_CommonLogData_t * common)
92 {
93 RF_Raid_t *raidPtr;
94
95 /* Insert a single struct for holding parity log information (data)
96 * into the free list (rf_parityLogDiskQueue.freeCommonList).
97 * NON-BLOCKING */
98
99 raidPtr = common->raidPtr;
100 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
101 common->next = raidPtr->parityLogDiskQueue.freeCommonList;
102 raidPtr->parityLogDiskQueue.freeCommonList = common;
103 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
104 }
105
106 static RF_ParityLogData_t *
107 AllocParityLogData(RF_Raid_t * raidPtr)
108 {
109 RF_ParityLogData_t *data = NULL;
110
111 /* Return a struct for holding parity log information from the free
112 * list (rf_parityLogDiskQueue.freeList). If the free list is empty,
113 * call RF_Malloc to create a new structure. NON-BLOCKING */
114
115 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
116 if (raidPtr->parityLogDiskQueue.freeDataList) {
117 data = raidPtr->parityLogDiskQueue.freeDataList;
118 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
119 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
120 } else {
121 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
122 RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
123 }
124 data->next = NULL;
125 data->prev = NULL;
126 return (data);
127 }
128
129
130 static void
131 FreeParityLogData(RF_ParityLogData_t * data)
132 {
133 RF_ParityLogData_t *nextItem;
134 RF_Raid_t *raidPtr;
135
136 /* Insert a linked list of structs for holding parity log information
137 * (data) into the free list (parityLogDiskQueue.freeList).
138 * NON-BLOCKING */
139
140 raidPtr = data->common->raidPtr;
141 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
142 while (data) {
143 nextItem = data->next;
144 data->next = raidPtr->parityLogDiskQueue.freeDataList;
145 raidPtr->parityLogDiskQueue.freeDataList = data;
146 data = nextItem;
147 }
148 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
149 }
150
151
152 static void
153 EnqueueParityLogData(
154 RF_ParityLogData_t * data,
155 RF_ParityLogData_t ** head,
156 RF_ParityLogData_t ** tail)
157 {
158 RF_Raid_t *raidPtr;
159
160 /* Insert an in-core parity log (*data) into the head of a disk queue
161 * (*head, *tail). NON-BLOCKING */
162
163 raidPtr = data->common->raidPtr;
164 if (rf_parityLogDebug)
165 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
166 RF_ASSERT(data->prev == NULL);
167 RF_ASSERT(data->next == NULL);
168 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
169 if (*head) {
170 /* insert into head of queue */
171 RF_ASSERT((*head)->prev == NULL);
172 RF_ASSERT((*tail)->next == NULL);
173 data->next = *head;
174 (*head)->prev = data;
175 *head = data;
176 } else {
177 /* insert into empty list */
178 RF_ASSERT(*head == NULL);
179 RF_ASSERT(*tail == NULL);
180 *head = data;
181 *tail = data;
182 }
183 RF_ASSERT((*head)->prev == NULL);
184 RF_ASSERT((*tail)->next == NULL);
185 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
186 }
187
188 static RF_ParityLogData_t *
189 DequeueParityLogData(
190 RF_Raid_t * raidPtr,
191 RF_ParityLogData_t ** head,
192 RF_ParityLogData_t ** tail,
193 int ignoreLocks)
194 {
195 RF_ParityLogData_t *data;
196
197 /* Remove and return an in-core parity log from the tail of a disk
198 * queue (*head, *tail). NON-BLOCKING */
199
200 /* remove from tail, preserving FIFO order */
201 if (!ignoreLocks)
202 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
203 data = *tail;
204 if (data) {
205 if (*head == *tail) {
206 /* removing last item from queue */
207 *head = NULL;
208 *tail = NULL;
209 } else {
210 *tail = (*tail)->prev;
211 (*tail)->next = NULL;
212 RF_ASSERT((*head)->prev == NULL);
213 RF_ASSERT((*tail)->next == NULL);
214 }
215 data->next = NULL;
216 data->prev = NULL;
217 if (rf_parityLogDebug)
218 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
219 }
220 if (*head) {
221 RF_ASSERT((*head)->prev == NULL);
222 RF_ASSERT((*tail)->next == NULL);
223 }
224 if (!ignoreLocks)
225 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
226 return (data);
227 }
228
229
230 static void
231 RequeueParityLogData(
232 RF_ParityLogData_t * data,
233 RF_ParityLogData_t ** head,
234 RF_ParityLogData_t ** tail)
235 {
236 RF_Raid_t *raidPtr;
237
238 /* Insert an in-core parity log (*data) into the tail of a disk queue
239 * (*head, *tail). NON-BLOCKING */
240
241 raidPtr = data->common->raidPtr;
242 RF_ASSERT(data);
243 if (rf_parityLogDebug)
244 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
245 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
246 if (*tail) {
247 /* append to tail of list */
248 data->prev = *tail;
249 data->next = NULL;
250 (*tail)->next = data;
251 *tail = data;
252 } else {
253 /* inserting into an empty list */
254 *head = data;
255 *tail = data;
256 (*head)->prev = NULL;
257 (*tail)->next = NULL;
258 }
259 RF_ASSERT((*head)->prev == NULL);
260 RF_ASSERT((*tail)->next == NULL);
261 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
262 }
263
264 RF_ParityLogData_t *
265 rf_CreateParityLogData(
266 RF_ParityRecordType_t operation,
267 RF_PhysDiskAddr_t * pda,
268 caddr_t bufPtr,
269 RF_Raid_t * raidPtr,
270 int (*wakeFunc) (RF_DagNode_t * node, int status),
271 void *wakeArg,
272 RF_AccTraceEntry_t * tracerec,
273 RF_Etimer_t startTime)
274 {
275 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
276 RF_CommonLogData_t *common;
277 RF_PhysDiskAddr_t *diskAddress;
278 int boundary, offset = 0;
279
280 /* Return an initialized struct of info to be logged. Build one item
281 * per physical disk address, one item per region.
282 *
283 * NON-BLOCKING */
284
285 diskAddress = pda;
286 common = AllocParityLogCommonData(raidPtr);
287 RF_ASSERT(common);
288
289 common->operation = operation;
290 common->bufPtr = bufPtr;
291 common->raidPtr = raidPtr;
292 common->wakeFunc = wakeFunc;
293 common->wakeArg = wakeArg;
294 common->tracerec = tracerec;
295 common->startTime = startTime;
296 common->cnt = 0;
297
298 if (rf_parityLogDebug)
299 printf("[entering CreateParityLogData]\n");
300 while (diskAddress) {
301 common->cnt++;
302 data = AllocParityLogData(raidPtr);
303 RF_ASSERT(data);
304 data->common = common;
305 data->next = NULL;
306 data->prev = NULL;
307 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
308 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
309 /* disk address does not cross a region boundary */
310 data->diskAddress = *diskAddress;
311 data->bufOffset = offset;
312 offset = offset + diskAddress->numSector;
313 EnqueueParityLogData(data, &resultHead, &resultTail);
314 /* adjust disk address */
315 diskAddress = diskAddress->next;
316 } else {
317 /* disk address crosses a region boundary */
318 /* find address where region is crossed */
319 boundary = 0;
320 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
321 boundary++;
322
323 /* enter data before the boundary */
324 data->diskAddress = *diskAddress;
325 data->diskAddress.numSector = boundary;
326 data->bufOffset = offset;
327 offset += boundary;
328 EnqueueParityLogData(data, &resultHead, &resultTail);
329 /* adjust disk address */
330 diskAddress->startSector += boundary;
331 diskAddress->numSector -= boundary;
332 }
333 }
334 if (rf_parityLogDebug)
335 printf("[leaving CreateParityLogData]\n");
336 return (resultHead);
337 }
338
339
340 RF_ParityLogData_t *
341 rf_SearchAndDequeueParityLogData(
342 RF_Raid_t * raidPtr,
343 int regionID,
344 RF_ParityLogData_t ** head,
345 RF_ParityLogData_t ** tail,
346 int ignoreLocks)
347 {
348 RF_ParityLogData_t *w;
349
350 /* Remove and return an in-core parity log from a specified region
351 * (regionID). If a matching log is not found, return NULL.
352 *
353 * NON-BLOCKING. */
354
355 /* walk backward through a list, looking for an entry with a matching
356 * region ID */
357 if (!ignoreLocks)
358 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
359 w = (*tail);
360 while (w) {
361 if (w->regionID == regionID) {
362 /* remove an element from the list */
363 if (w == *tail) {
364 if (*head == *tail) {
365 /* removing only element in the list */
366 *head = NULL;
367 *tail = NULL;
368 } else {
369 /* removing last item in the list */
370 *tail = (*tail)->prev;
371 (*tail)->next = NULL;
372 RF_ASSERT((*head)->prev == NULL);
373 RF_ASSERT((*tail)->next == NULL);
374 }
375 } else {
376 if (w == *head) {
377 /* removing first item in the list */
378 *head = (*head)->next;
379 (*head)->prev = NULL;
380 RF_ASSERT((*head)->prev == NULL);
381 RF_ASSERT((*tail)->next == NULL);
382 } else {
383 /* removing an item from the middle of
384 * the list */
385 w->prev->next = w->next;
386 w->next->prev = w->prev;
387 RF_ASSERT((*head)->prev == NULL);
388 RF_ASSERT((*tail)->next == NULL);
389 }
390 }
391 w->prev = NULL;
392 w->next = NULL;
393 if (rf_parityLogDebug)
394 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
395 return (w);
396 } else
397 w = w->prev;
398 }
399 if (!ignoreLocks)
400 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
401 return (NULL);
402 }
403
404 static RF_ParityLogData_t *
405 DequeueMatchingLogData(
406 RF_Raid_t * raidPtr,
407 RF_ParityLogData_t ** head,
408 RF_ParityLogData_t ** tail)
409 {
410 RF_ParityLogData_t *logDataList, *logData;
411 int regionID;
412
413 /* Remove and return an in-core parity log from the tail of a disk
414 * queue (*head, *tail). Then remove all matching (identical
415 * regionIDs) logData and return as a linked list.
416 *
417 * NON-BLOCKING */
418
419 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
420 if (logDataList) {
421 regionID = logDataList->regionID;
422 logData = logDataList;
423 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
424 while (logData->next) {
425 logData = logData->next;
426 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
427 }
428 }
429 return (logDataList);
430 }
431
432
433 static RF_ParityLog_t *
434 AcquireParityLog(
435 RF_ParityLogData_t * logData,
436 int finish)
437 {
438 RF_ParityLog_t *log = NULL;
439 RF_Raid_t *raidPtr;
440
441 /* Grab a log buffer from the pool and return it. If no buffers are
442 * available, return NULL. NON-BLOCKING */
443 raidPtr = logData->common->raidPtr;
444 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
445 if (raidPtr->parityLogPool.parityLogs) {
446 log = raidPtr->parityLogPool.parityLogs;
447 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
448 log->regionID = logData->regionID;
449 log->numRecords = 0;
450 log->next = NULL;
451 raidPtr->logsInUse++;
452 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
453 } else {
454 /* no logs available, so place ourselves on the queue of work
455 * waiting on log buffers this is done while
456 * parityLogPool.mutex is held, to ensure synchronization with
457 * ReleaseParityLogs. */
458 if (rf_parityLogDebug)
459 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
460 if (finish)
461 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
462 else
463 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
464 }
465 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
466 return (log);
467 }
468
469 void
470 rf_ReleaseParityLogs(
471 RF_Raid_t * raidPtr,
472 RF_ParityLog_t * firstLog)
473 {
474 RF_ParityLogData_t *logDataList;
475 RF_ParityLog_t *log, *lastLog;
476 int cnt;
477
478 /* Insert a linked list of parity logs (firstLog) to the free list
479 * (parityLogPool.parityLogPool)
480 *
481 * NON-BLOCKING. */
482
483 RF_ASSERT(firstLog);
484
485 /* Before returning logs to global free list, service all requests
486 * which are blocked on logs. Holding mutexes for parityLogPool and
487 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
488 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
489 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
490 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
491 log = firstLog;
492 if (firstLog)
493 firstLog = firstLog->next;
494 log->numRecords = 0;
495 log->next = NULL;
496 while (logDataList && log) {
497 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
498 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
499 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
500 if (rf_parityLogDebug)
501 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
502 if (log == NULL) {
503 log = firstLog;
504 if (firstLog) {
505 firstLog = firstLog->next;
506 log->numRecords = 0;
507 log->next = NULL;
508 }
509 }
510 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
511 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
512 if (log)
513 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
514 }
515 /* return remaining logs to pool */
516 if (log) {
517 log->next = firstLog;
518 firstLog = log;
519 }
520 if (firstLog) {
521 lastLog = firstLog;
522 raidPtr->logsInUse--;
523 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
524 while (lastLog->next) {
525 lastLog = lastLog->next;
526 raidPtr->logsInUse--;
527 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
528 }
529 lastLog->next = raidPtr->parityLogPool.parityLogs;
530 raidPtr->parityLogPool.parityLogs = firstLog;
531 cnt = 0;
532 log = raidPtr->parityLogPool.parityLogs;
533 while (log) {
534 cnt++;
535 log = log->next;
536 }
537 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
538 }
539 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
540 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
541 }
542
543 static void
544 ReintLog(
545 RF_Raid_t * raidPtr,
546 int regionID,
547 RF_ParityLog_t * log)
548 {
549 RF_ASSERT(log);
550
551 /* Insert an in-core parity log (log) into the disk queue of
552 * reintegration work. Set the flag (reintInProgress) for the
553 * specified region (regionID) to indicate that reintegration is in
554 * progress for this region. NON-BLOCKING */
555
556 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
557 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint
558 * complete */
559
560 if (rf_parityLogDebug)
561 printf("[requesting reintegration of region %d]\n", log->regionID);
562 /* move record to reintegration queue */
563 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
564 log->next = raidPtr->parityLogDiskQueue.reintQueue;
565 raidPtr->parityLogDiskQueue.reintQueue = log;
566 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
567 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
568 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
569 }
570
571 static void
572 FlushLog(
573 RF_Raid_t * raidPtr,
574 RF_ParityLog_t * log)
575 {
576 /* insert a core log (log) into a list of logs
577 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
578 * NON-BLOCKING */
579
580 RF_ASSERT(log);
581 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
582 RF_ASSERT(log->next == NULL);
583 /* move log to flush queue */
584 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
585 log->next = raidPtr->parityLogDiskQueue.flushQueue;
586 raidPtr->parityLogDiskQueue.flushQueue = log;
587 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
588 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
589 }
590
591 static int
592 DumpParityLogToDisk(
593 int finish,
594 RF_ParityLogData_t * logData)
595 {
596 int i, diskCount, regionID = logData->regionID;
597 RF_ParityLog_t *log;
598 RF_Raid_t *raidPtr;
599
600 raidPtr = logData->common->raidPtr;
601
602 /* Move a core log to disk. If the log disk is full, initiate
603 * reintegration.
604 *
605 * Return (0) if we can enqueue the dump immediately, otherwise return
606 * (1) to indicate we are blocked on reintegration and control of the
607 * thread should be relinquished.
608 *
609 * Caller must hold regionInfo[regionID].mutex
610 *
611 * NON-BLOCKING */
612
613 if (rf_parityLogDebug)
614 printf("[dumping parity log to disk, region %d]\n", regionID);
615 log = raidPtr->regionInfo[regionID].coreLog;
616 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
617 RF_ASSERT(log->next == NULL);
618
619 /* if reintegration is in progress, must queue work */
620 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
621 if (raidPtr->regionInfo[regionID].reintInProgress) {
622 /* Can not proceed since this region is currently being
623 * reintegrated. We can not block, so queue remaining work and
624 * return */
625 if (rf_parityLogDebug)
626 printf("[region %d waiting on reintegration]\n", regionID);
627 /* XXX not sure about the use of finish - shouldn't this
628 * always be "Enqueue"? */
629 if (finish)
630 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
631 else
632 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
633 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
634 return (1); /* relenquish control of this thread */
635 }
636 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
637 raidPtr->regionInfo[regionID].coreLog = NULL;
638 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
639 /* IMPORTANT!! this loop bound assumes region disk holds an
640 * integral number of core logs */
641 {
642 /* update disk map for this region */
643 diskCount = raidPtr->regionInfo[regionID].diskCount;
644 for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
645 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
646 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
647 }
648 log->diskOffset = diskCount;
649 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
650 FlushLog(raidPtr, log);
651 } else {
652 /* no room for log on disk, send it to disk manager and
653 * request reintegration */
654 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
655 ReintLog(raidPtr, regionID, log);
656 }
657 if (rf_parityLogDebug)
658 printf("[finished dumping parity log to disk, region %d]\n", regionID);
659 return (0);
660 }
661
662 int
663 rf_ParityLogAppend(
664 RF_ParityLogData_t * logData,
665 int finish,
666 RF_ParityLog_t ** incomingLog,
667 int clearReintFlag)
668 {
669 int regionID, logItem, itemDone;
670 RF_ParityLogData_t *item;
671 int punt, done = RF_FALSE;
672 RF_ParityLog_t *log;
673 RF_Raid_t *raidPtr;
674 RF_Etimer_t timer;
675 int (*wakeFunc) (RF_DagNode_t * node, int status);
676 void *wakeArg;
677
678 /* Add parity to the appropriate log, one sector at a time. This
679 * routine is called is called by dag functions ParityLogUpdateFunc
680 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
681 *
682 * Parity to be logged is contained in a linked-list (logData). When
683 * this routine returns, every sector in the list will be in one of
684 * three places: 1) entered into the parity log 2) queued, waiting on
685 * reintegration 3) queued, waiting on a core log
686 *
687 * Blocked work is passed to the ParityLoggingDiskManager for completion.
688 * Later, as conditions which required the block are removed, the work
689 * reenters this routine with the "finish" parameter set to "RF_TRUE."
690 *
691 * NON-BLOCKING */
692
693 raidPtr = logData->common->raidPtr;
694 /* lock the region for the first item in logData */
695 RF_ASSERT(logData != NULL);
696 regionID = logData->regionID;
697 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
698 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
699
700 if (clearReintFlag) {
701 /* Enable flushing for this region. Holding both locks
702 * provides a synchronization barrier with DumpParityLogToDisk */
703 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
704 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
705 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
706 raidPtr->regionInfo[regionID].diskCount = 0;
707 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
708 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now
709 * enabled */
710 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
711 }
712 /* process each item in logData */
713 while (logData) {
714 /* remove an item from logData */
715 item = logData;
716 logData = logData->next;
717 item->next = NULL;
718 item->prev = NULL;
719
720 if (rf_parityLogDebug)
721 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
722
723 /* see if we moved to a new region */
724 if (regionID != item->regionID) {
725 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
726 regionID = item->regionID;
727 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
728 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
729 }
730 punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This
731 * can happen in one of two ways: 1) no core
732 * log (AcquireParityLog) 2) waiting on
733 * reintegration (DumpParityLogToDisk) If punt
734 * is RF_TRUE, the dataItem was queued, so
735 * skip to next item. */
736
737 /* process item, one sector at a time, until all sectors
738 * processed or we punt */
739 if (item->diskAddress.numSector > 0)
740 done = RF_FALSE;
741 else
742 RF_ASSERT(0);
743 while (!punt && !done) {
744 /* verify that a core log exists for this region */
745 if (!raidPtr->regionInfo[regionID].coreLog) {
746 /* Attempt to acquire a parity log. If
747 * acquisition fails, queue remaining work in
748 * data item and move to nextItem. */
749 if (incomingLog)
750 if (*incomingLog) {
751 RF_ASSERT((*incomingLog)->next == NULL);
752 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
753 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
754 *incomingLog = NULL;
755 } else
756 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
757 else
758 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
759 /* Note: AcquireParityLog either returns a log
760 * or enqueues currentItem */
761 }
762 if (!raidPtr->regionInfo[regionID].coreLog)
763 punt = RF_TRUE; /* failed to find a core log */
764 else {
765 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
766 /* verify that the log has room for new
767 * entries */
768 /* if log is full, dump it to disk and grab a
769 * new log */
770 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
771 /* log is full, dump it to disk */
772 if (DumpParityLogToDisk(finish, item))
773 punt = RF_TRUE; /* dump unsuccessful,
774 * blocked on
775 * reintegration */
776 else {
777 /* dump was successful */
778 if (incomingLog)
779 if (*incomingLog) {
780 RF_ASSERT((*incomingLog)->next == NULL);
781 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
782 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
783 *incomingLog = NULL;
784 } else
785 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
786 else
787 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
788 /* if a core log is not
789 * available, must queue work
790 * and return */
791 if (!raidPtr->regionInfo[regionID].coreLog)
792 punt = RF_TRUE; /* blocked on log
793 * availability */
794 }
795 }
796 }
797 /* if we didn't punt on this item, attempt to add a
798 * sector to the core log */
799 if (!punt) {
800 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
801 /* at this point, we have a core log with
802 * enough room for a sector */
803 /* copy a sector into the log */
804 log = raidPtr->regionInfo[regionID].coreLog;
805 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
806 logItem = log->numRecords++;
807 log->records[logItem].parityAddr = item->diskAddress;
808 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
809 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
810 log->records[logItem].parityAddr.numSector = 1;
811 log->records[logItem].operation = item->common->operation;
812 memcpy(log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), (1 << item->common->raidPtr->logBytesPerSector));
813 item->diskAddress.numSector--;
814 item->diskAddress.startSector++;
815 if (item->diskAddress.numSector == 0)
816 done = RF_TRUE;
817 }
818 }
819
820 if (!punt) {
821 /* Processed this item completely, decrement count of
822 * items to be processed. */
823 RF_ASSERT(item->diskAddress.numSector == 0);
824 RF_LOCK_MUTEX(item->common->mutex);
825 item->common->cnt--;
826 if (item->common->cnt == 0)
827 itemDone = RF_TRUE;
828 else
829 itemDone = RF_FALSE;
830 RF_UNLOCK_MUTEX(item->common->mutex);
831 if (itemDone) {
832 /* Finished processing all log data for this
833 * IO Return structs to free list and invoke
834 * wakeup function. */
835 timer = item->common->startTime; /* grab initial value of
836 * timer */
837 RF_ETIMER_STOP(timer);
838 RF_ETIMER_EVAL(timer);
839 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
840 if (rf_parityLogDebug)
841 printf("[waking process for region %d]\n", item->regionID);
842 wakeFunc = item->common->wakeFunc;
843 wakeArg = item->common->wakeArg;
844 FreeParityLogCommonData(item->common);
845 FreeParityLogData(item);
846 (wakeFunc) (wakeArg, 0);
847 } else
848 FreeParityLogData(item);
849 }
850 }
851 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
852 if (rf_parityLogDebug)
853 printf("[exiting ParityLogAppend]\n");
854 return (0);
855 }
856
857
858 void
859 rf_EnableParityLogging(RF_Raid_t * raidPtr)
860 {
861 int regionID;
862
863 for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
864 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
865 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
866 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
867 }
868 if (rf_parityLogDebug)
869 printf("[parity logging enabled]\n");
870 }
871 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
Cache object: b937a06a91beaa8e862033316c10138e
|