1 /*
2 * Copyright (c) International Business Machines Corp., 2000-2002
3 * Portions Copyright (c) Christoph Hellwig, 2001-2002
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20 /*
21 * jfs_logmgr.c: log manager
22 *
23 * for related information, see transaction manager (jfs_txnmgr.c), and
24 * recovery manager (jfs_logredo.c).
25 *
26 * note: for detail, RTFS.
27 *
28 * log buffer manager:
29 * special purpose buffer manager supporting log i/o requirements.
30 * per log serial pageout of logpage
31 * queuing i/o requests and redrive i/o at iodone
32 * maintain current logpage buffer
33 * no caching since append only
34 * appropriate jfs buffer cache buffers as needed
35 *
36 * group commit:
37 * transactions which wrote COMMIT records in the same in-memory
38 * log page during the pageout of previous/current log page(s) are
39 * committed together by the pageout of the page.
40 *
41 * TBD lazy commit:
42 * transactions are committed asynchronously when the log page
43 * containing it COMMIT is paged out when it becomes full;
44 *
45 * serialization:
46 * . a per log lock serialize log write.
47 * . a per log lock serialize group commit.
48 * . a per log lock serialize log open/close;
49 *
50 * TBD log integrity:
51 * careful-write (ping-pong) of last logpage to recover from crash
52 * in overwrite.
53 * detection of split (out-of-order) write of physical sectors
54 * of last logpage via timestamp at end of each sector
55 * with its mirror data array at trailer).
56 *
57 * alternatives:
58 * lsn - 64-bit monotonically increasing integer vs
59 * 32-bit lspn and page eor.
60 */
61
62 #include <linux/fs.h>
63 #include <linux/locks.h>
64 #include <linux/blkdev.h>
65 #include <linux/interrupt.h>
66 #include <linux/smp_lock.h>
67 #include <linux/completion.h>
68 #include "jfs_incore.h"
69 #include "jfs_filsys.h"
70 #include "jfs_metapage.h"
71 #include "jfs_txnmgr.h"
72 #include "jfs_debug.h"
73
74
75 /*
76 * lbuf's ready to be redriven. Protected by log_redrive_lock (jfsIO thread)
77 */
78 static struct lbuf *log_redrive_list;
79 static spinlock_t log_redrive_lock = SPIN_LOCK_UNLOCKED;
80 DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait);
81
82
83 /*
84 * log read/write serialization (per log)
85 */
86 #define LOG_LOCK_INIT(log) init_MUTEX(&(log)->loglock)
87 #define LOG_LOCK(log) down(&((log)->loglock))
88 #define LOG_UNLOCK(log) up(&((log)->loglock))
89
90
91 /*
92 * log group commit serialization (per log)
93 */
94
95 #define LOGGC_LOCK_INIT(log) spin_lock_init(&(log)->gclock)
96 #define LOGGC_LOCK(log) spin_lock_irq(&(log)->gclock)
97 #define LOGGC_UNLOCK(log) spin_unlock_irq(&(log)->gclock)
98 #define LOGGC_WAKEUP(tblk) wake_up_all(&(tblk)->gcwait)
99
100 /*
101 * log sync serialization (per log)
102 */
103 #define LOGSYNC_DELTA(logsize) min((logsize)/8, 128*LOGPSIZE)
104 #define LOGSYNC_BARRIER(logsize) ((logsize)/4)
105 /*
106 #define LOGSYNC_DELTA(logsize) min((logsize)/4, 256*LOGPSIZE)
107 #define LOGSYNC_BARRIER(logsize) ((logsize)/2)
108 */
109
110
111 /*
112 * log buffer cache synchronization
113 */
114 static spinlock_t jfsLCacheLock = SPIN_LOCK_UNLOCKED;
115
116 #define LCACHE_LOCK(flags) spin_lock_irqsave(&jfsLCacheLock, flags)
117 #define LCACHE_UNLOCK(flags) spin_unlock_irqrestore(&jfsLCacheLock, flags)
118
119 /*
120 * See __SLEEP_COND in jfs_locks.h
121 */
122 #define LCACHE_SLEEP_COND(wq, cond, flags) \
123 do { \
124 if (cond) \
125 break; \
126 __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
127 } while (0)
128
129 #define LCACHE_WAKEUP(event) wake_up(event)
130
131
132 /*
133 * lbuf buffer cache (lCache) control
134 */
135 /* log buffer manager pageout control (cumulative, inclusive) */
136 #define lbmREAD 0x0001
137 #define lbmWRITE 0x0002 /* enqueue at tail of write queue;
138 * init pageout if at head of queue;
139 */
140 #define lbmRELEASE 0x0004 /* remove from write queue
141 * at completion of pageout;
142 * do not free/recycle it yet:
143 * caller will free it;
144 */
145 #define lbmSYNC 0x0008 /* do not return to freelist
146 * when removed from write queue;
147 */
148 #define lbmFREE 0x0010 /* return to freelist
149 * at completion of pageout;
150 * the buffer may be recycled;
151 */
152 #define lbmDONE 0x0020
153 #define lbmERROR 0x0040
154 #define lbmGC 0x0080 /* lbmIODone to perform post-GC processing
155 * of log page
156 */
157 #define lbmDIRECT 0x0100
158
159 /*
160 * external references
161 */
162 extern void txLazyUnlock(struct tblock * tblk);
163 extern int jfs_stop_threads;
164 extern struct completion jfsIOwait;
165
166 /*
167 * forward references
168 */
169 static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
170 struct lrd * lrd, struct tlock * tlck);
171
172 static int lmNextPage(struct jfs_log * log);
173 static int lmLogFileSystem(struct jfs_log * log, char *uuid, int activate);
174
175 static int lbmLogInit(struct jfs_log * log);
176 static void lbmLogShutdown(struct jfs_log * log);
177 static struct lbuf *lbmAllocate(struct jfs_log * log, int);
178 static void lbmFree(struct lbuf * bp);
179 static void lbmfree(struct lbuf * bp);
180 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
181 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
182 int cant_block);
183 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
184 static int lbmIOWait(struct lbuf * bp, int flag);
185 static void lbmIODone(struct buffer_head *bh, int);
186 static void lbmStartIO(struct lbuf * bp);
187 static void lmGCwrite(struct jfs_log * log, int cant_block);
188
189
190 /*
191 * statistics
192 */
193 #ifdef CONFIG_JFS_STATISTICS
194 struct lmStat {
195 uint commit; /* # of commit */
196 uint pagedone; /* # of page written */
197 uint submitted; /* # of pages submitted */
198 uint full_page; /* # of full pages submitted */
199 uint partial_page; /* # of partial pages submitted */
200 } lmStat;
201 #endif
202
203
204 /*
205 * NAME: lmLog()
206 *
207 * FUNCTION: write a log record;
208 *
209 * PARAMETER:
210 *
211 * RETURN: lsn - offset to the next log record to write (end-of-log);
212 * -1 - error;
213 *
214 * note: todo: log error handler
215 */
216 int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
217 struct tlock * tlck)
218 {
219 int lsn;
220 int diffp, difft;
221 struct metapage *mp = NULL;
222
223 jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
224 log, tblk, lrd, tlck);
225
226 LOG_LOCK(log);
227
228 /* log by (out-of-transaction) JFS ? */
229 if (tblk == NULL)
230 goto writeRecord;
231
232 /* log from page ? */
233 if (tlck == NULL ||
234 tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
235 goto writeRecord;
236
237 /*
238 * initialize/update page/transaction recovery lsn
239 */
240 lsn = log->lsn;
241
242 LOGSYNC_LOCK(log);
243
244 /*
245 * initialize page lsn if first log write of the page
246 */
247 if (mp->lsn == 0) {
248 mp->log = log;
249 mp->lsn = lsn;
250 log->count++;
251
252 /* insert page at tail of logsynclist */
253 list_add_tail(&mp->synclist, &log->synclist);
254 }
255
256 /*
257 * initialize/update lsn of tblock of the page
258 *
259 * transaction inherits oldest lsn of pages associated
260 * with allocation/deallocation of resources (their
261 * log records are used to reconstruct allocation map
262 * at recovery time: inode for inode allocation map,
263 * B+-tree index of extent descriptors for block
264 * allocation map);
265 * allocation map pages inherit transaction lsn at
266 * commit time to allow forwarding log syncpt past log
267 * records associated with allocation/deallocation of
268 * resources only after persistent map of these map pages
269 * have been updated and propagated to home.
270 */
271 /*
272 * initialize transaction lsn:
273 */
274 if (tblk->lsn == 0) {
275 /* inherit lsn of its first page logged */
276 tblk->lsn = mp->lsn;
277 log->count++;
278
279 /* insert tblock after the page on logsynclist */
280 list_add(&tblk->synclist, &mp->synclist);
281 }
282 /*
283 * update transaction lsn:
284 */
285 else {
286 /* inherit oldest/smallest lsn of page */
287 logdiff(diffp, mp->lsn, log);
288 logdiff(difft, tblk->lsn, log);
289 if (diffp < difft) {
290 /* update tblock lsn with page lsn */
291 tblk->lsn = mp->lsn;
292
293 /* move tblock after page on logsynclist */
294 list_del(&tblk->synclist);
295 list_add(&tblk->synclist, &mp->synclist);
296 }
297 }
298
299 LOGSYNC_UNLOCK(log);
300
301 /*
302 * write the log record
303 */
304 writeRecord:
305 lsn = lmWriteRecord(log, tblk, lrd, tlck);
306
307 /*
308 * forward log syncpt if log reached next syncpt trigger
309 */
310 logdiff(diffp, lsn, log);
311 if (diffp >= log->nextsync)
312 lsn = lmLogSync(log, 0);
313
314 /* update end-of-log lsn */
315 log->lsn = lsn;
316
317 LOG_UNLOCK(log);
318
319 /* return end-of-log address */
320 return lsn;
321 }
322
323
324 /*
325 * NAME: lmWriteRecord()
326 *
327 * FUNCTION: move the log record to current log page
328 *
329 * PARAMETER: cd - commit descriptor
330 *
331 * RETURN: end-of-log address
332 *
333 * serialization: LOG_LOCK() held on entry/exit
334 */
335 static int
336 lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
337 struct tlock * tlck)
338 {
339 int lsn = 0; /* end-of-log address */
340 struct lbuf *bp; /* dst log page buffer */
341 struct logpage *lp; /* dst log page */
342 caddr_t dst; /* destination address in log page */
343 int dstoffset; /* end-of-log offset in log page */
344 int freespace; /* free space in log page */
345 caddr_t p; /* src meta-data page */
346 caddr_t src;
347 int srclen;
348 int nbytes; /* number of bytes to move */
349 int i;
350 int len;
351 struct linelock *linelock;
352 struct lv *lv;
353 struct lvd *lvd;
354 int l2linesize;
355
356 len = 0;
357
358 /* retrieve destination log page to write */
359 bp = (struct lbuf *) log->bp;
360 lp = (struct logpage *) bp->l_ldata;
361 dstoffset = log->eor;
362
363 /* any log data to write ? */
364 if (tlck == NULL)
365 goto moveLrd;
366
367 /*
368 * move log record data
369 */
370 /* retrieve source meta-data page to log */
371 if (tlck->flag & tlckPAGELOCK) {
372 p = (caddr_t) (tlck->mp->data);
373 linelock = (struct linelock *) & tlck->lock;
374 }
375 /* retrieve source in-memory inode to log */
376 else if (tlck->flag & tlckINODELOCK) {
377 if (tlck->type & tlckDTREE)
378 p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
379 else
380 p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
381 linelock = (struct linelock *) & tlck->lock;
382 }
383 #ifdef _JFS_WIP
384 else if (tlck->flag & tlckINLINELOCK) {
385
386 inlinelock = (struct inlinelock *) & tlck;
387 p = (caddr_t) & inlinelock->pxd;
388 linelock = (struct linelock *) & tlck;
389 }
390 #endif /* _JFS_WIP */
391 else {
392 jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
393 return 0; /* Probably should trap */
394 }
395 l2linesize = linelock->l2linesize;
396
397 moveData:
398 ASSERT(linelock->index <= linelock->maxcnt);
399
400 lv = linelock->lv;
401 for (i = 0; i < linelock->index; i++, lv++) {
402 if (lv->length == 0)
403 continue;
404
405 /* is page full ? */
406 if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
407 /* page become full: move on to next page */
408 lmNextPage(log);
409
410 bp = log->bp;
411 lp = (struct logpage *) bp->l_ldata;
412 dstoffset = LOGPHDRSIZE;
413 }
414
415 /*
416 * move log vector data
417 */
418 src = (u8 *) p + (lv->offset << l2linesize);
419 srclen = lv->length << l2linesize;
420 len += srclen;
421 while (srclen > 0) {
422 freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
423 nbytes = min(freespace, srclen);
424 dst = (caddr_t) lp + dstoffset;
425 memcpy(dst, src, nbytes);
426 dstoffset += nbytes;
427
428 /* is page not full ? */
429 if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
430 break;
431
432 /* page become full: move on to next page */
433 lmNextPage(log);
434
435 bp = (struct lbuf *) log->bp;
436 lp = (struct logpage *) bp->l_ldata;
437 dstoffset = LOGPHDRSIZE;
438
439 srclen -= nbytes;
440 src += nbytes;
441 }
442
443 /*
444 * move log vector descriptor
445 */
446 len += 4;
447 lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
448 lvd->offset = cpu_to_le16(lv->offset);
449 lvd->length = cpu_to_le16(lv->length);
450 dstoffset += 4;
451 jfs_info("lmWriteRecord: lv offset:%d length:%d",
452 lv->offset, lv->length);
453 }
454
455 if ((i = linelock->next)) {
456 linelock = (struct linelock *) lid_to_tlock(i);
457 goto moveData;
458 }
459
460 /*
461 * move log record descriptor
462 */
463 moveLrd:
464 lrd->length = cpu_to_le16(len);
465
466 src = (caddr_t) lrd;
467 srclen = LOGRDSIZE;
468
469 while (srclen > 0) {
470 freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
471 nbytes = min(freespace, srclen);
472 dst = (caddr_t) lp + dstoffset;
473 memcpy(dst, src, nbytes);
474
475 dstoffset += nbytes;
476 srclen -= nbytes;
477
478 /* are there more to move than freespace of page ? */
479 if (srclen)
480 goto pageFull;
481
482 /*
483 * end of log record descriptor
484 */
485
486 /* update last log record eor */
487 log->eor = dstoffset;
488 bp->l_eor = dstoffset;
489 lsn = (log->page << L2LOGPSIZE) + dstoffset;
490
491 if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
492 tblk->clsn = lsn;
493 jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
494 bp->l_eor);
495
496 INCREMENT(lmStat.commit); /* # of commit */
497
498 /*
499 * enqueue tblock for group commit:
500 *
501 * enqueue tblock of non-trivial/synchronous COMMIT
502 * at tail of group commit queue
503 * (trivial/asynchronous COMMITs are ignored by
504 * group commit.)
505 */
506 LOGGC_LOCK(log);
507
508 /* init tblock gc state */
509 tblk->flag = tblkGC_QUEUE;
510 tblk->bp = log->bp;
511 tblk->pn = log->page;
512 tblk->eor = log->eor;
513
514 /* enqueue transaction to commit queue */
515 tblk->cqnext = NULL;
516 if (log->cqueue.head) {
517 log->cqueue.tail->cqnext = tblk;
518 log->cqueue.tail = tblk;
519 } else
520 log->cqueue.head = log->cqueue.tail = tblk;
521
522 LOGGC_UNLOCK(log);
523 }
524
525 jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
526 le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
527
528 /* page not full ? */
529 if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
530 return lsn;
531
532 pageFull:
533 /* page become full: move on to next page */
534 lmNextPage(log);
535
536 bp = (struct lbuf *) log->bp;
537 lp = (struct logpage *) bp->l_ldata;
538 dstoffset = LOGPHDRSIZE;
539 src += nbytes;
540 }
541
542 return lsn;
543 }
544
545
546 /*
547 * NAME: lmNextPage()
548 *
549 * FUNCTION: write current page and allocate next page.
550 *
551 * PARAMETER: log
552 *
553 * RETURN: 0
554 *
555 * serialization: LOG_LOCK() held on entry/exit
556 */
557 static int lmNextPage(struct jfs_log * log)
558 {
559 struct logpage *lp;
560 int lspn; /* log sequence page number */
561 int pn; /* current page number */
562 struct lbuf *bp;
563 struct lbuf *nextbp;
564 struct tblock *tblk;
565
566 /* get current log page number and log sequence page number */
567 pn = log->page;
568 bp = log->bp;
569 lp = (struct logpage *) bp->l_ldata;
570 lspn = le32_to_cpu(lp->h.page);
571
572 LOGGC_LOCK(log);
573
574 /*
575 * write or queue the full page at the tail of write queue
576 */
577 /* get the tail tblk on commit queue */
578 tblk = log->cqueue.tail;
579
580 /* every tblk who has COMMIT record on the current page,
581 * and has not been committed, must be on commit queue
582 * since tblk is queued at commit queueu at the time
583 * of writing its COMMIT record on the page before
584 * page becomes full (even though the tblk thread
585 * who wrote COMMIT record may have been suspended
586 * currently);
587 */
588
589 /* is page bound with outstanding tail tblk ? */
590 if (tblk && tblk->pn == pn) {
591 /* mark tblk for end-of-page */
592 tblk->flag |= tblkGC_EOP;
593
594 if (log->cflag & logGC_PAGEOUT) {
595 /* if page is not already on write queue,
596 * just enqueue (no lbmWRITE to prevent redrive)
597 * buffer to wqueue to ensure correct serial order
598 * of the pages since log pages will be added
599 * continuously
600 */
601 if (bp->l_wqnext == NULL)
602 lbmWrite(log, bp, 0, 0);
603 } else {
604 /*
605 * No current GC leader, initiate group commit
606 */
607 log->cflag |= logGC_PAGEOUT;
608 lmGCwrite(log, 0);
609 }
610 }
611 /* page is not bound with outstanding tblk:
612 * init write or mark it to be redriven (lbmWRITE)
613 */
614 else {
615 /* finalize the page */
616 bp->l_ceor = bp->l_eor;
617 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
618 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
619 }
620 LOGGC_UNLOCK(log);
621
622 /*
623 * allocate/initialize next page
624 */
625 /* if log wraps, the first data page of log is 2
626 * (0 never used, 1 is superblock).
627 */
628 log->page = (pn == log->size - 1) ? 2 : pn + 1;
629 log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */
630
631 /* allocate/initialize next log page buffer */
632 nextbp = lbmAllocate(log, log->page);
633 nextbp->l_eor = log->eor;
634 log->bp = nextbp;
635
636 /* initialize next log page */
637 lp = (struct logpage *) nextbp->l_ldata;
638 lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
639 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
640
641 return 0;
642 }
643
644
645 /*
646 * NAME: lmGroupCommit()
647 *
648 * FUNCTION: group commit
649 * initiate pageout of the pages with COMMIT in the order of
650 * page number - redrive pageout of the page at the head of
651 * pageout queue until full page has been written.
652 *
653 * RETURN:
654 *
655 * NOTE:
656 * LOGGC_LOCK serializes log group commit queue, and
657 * transaction blocks on the commit queue.
658 * N.B. LOG_LOCK is NOT held during lmGroupCommit().
659 */
660 int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
661 {
662 int rc = 0;
663
664 LOGGC_LOCK(log);
665
666 /* group committed already ? */
667 if (tblk->flag & tblkGC_COMMITTED) {
668 if (tblk->flag & tblkGC_ERROR)
669 rc = EIO;
670
671 LOGGC_UNLOCK(log);
672 return rc;
673 }
674 jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
675
676 if (tblk->xflag & COMMIT_LAZY)
677 tblk->flag |= tblkGC_LAZY;
678
679 if ((!(log->cflag & logGC_PAGEOUT)) && log->cqueue.head &&
680 (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag))) {
681 /*
682 * No pageout in progress
683 *
684 * start group commit as its group leader.
685 */
686 log->cflag |= logGC_PAGEOUT;
687
688 lmGCwrite(log, 0);
689 }
690
691 if (tblk->xflag & COMMIT_LAZY) {
692 /*
693 * Lazy transactions can leave now
694 */
695 LOGGC_UNLOCK(log);
696 return 0;
697 }
698
699 /* lmGCwrite gives up LOGGC_LOCK, check again */
700
701 if (tblk->flag & tblkGC_COMMITTED) {
702 if (tblk->flag & tblkGC_ERROR)
703 rc = EIO;
704
705 LOGGC_UNLOCK(log);
706 return rc;
707 }
708
709 /* upcount transaction waiting for completion
710 */
711 log->gcrtc++;
712 tblk->flag |= tblkGC_READY;
713
714 __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
715 LOGGC_LOCK(log), LOGGC_UNLOCK(log));
716
717 /* removed from commit queue */
718 if (tblk->flag & tblkGC_ERROR)
719 rc = EIO;
720
721 LOGGC_UNLOCK(log);
722 return rc;
723 }
724
725 /*
726 * NAME: lmGCwrite()
727 *
728 * FUNCTION: group commit write
729 * initiate write of log page, building a group of all transactions
730 * with commit records on that page.
731 *
732 * RETURN: None
733 *
734 * NOTE:
735 * LOGGC_LOCK must be held by caller.
736 * N.B. LOG_LOCK is NOT held during lmGroupCommit().
737 */
738 static void lmGCwrite(struct jfs_log * log, int cant_write)
739 {
740 struct lbuf *bp;
741 struct logpage *lp;
742 int gcpn; /* group commit page number */
743 struct tblock *tblk;
744 struct tblock *xtblk;
745
746 /*
747 * build the commit group of a log page
748 *
749 * scan commit queue and make a commit group of all
750 * transactions with COMMIT records on the same log page.
751 */
752 /* get the head tblk on the commit queue */
753 tblk = xtblk = log->cqueue.head;
754 gcpn = tblk->pn;
755
756 while (tblk && tblk->pn == gcpn) {
757 xtblk = tblk;
758
759 /* state transition: (QUEUE, READY) -> COMMIT */
760 tblk->flag |= tblkGC_COMMIT;
761 tblk = tblk->cqnext;
762 }
763 tblk = xtblk; /* last tblk of the page */
764
765 /*
766 * pageout to commit transactions on the log page.
767 */
768 bp = (struct lbuf *) tblk->bp;
769 lp = (struct logpage *) bp->l_ldata;
770 /* is page already full ? */
771 if (tblk->flag & tblkGC_EOP) {
772 /* mark page to free at end of group commit of the page */
773 tblk->flag &= ~tblkGC_EOP;
774 tblk->flag |= tblkGC_FREE;
775 bp->l_ceor = bp->l_eor;
776 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
777 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
778 cant_write);
779 INCREMENT(lmStat.full_page);
780 }
781 /* page is not yet full */
782 else {
783 bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */
784 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
785 lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
786 INCREMENT(lmStat.partial_page);
787 }
788 }
789
790 /*
791 * NAME: lmPostGC()
792 *
793 * FUNCTION: group commit post-processing
794 * Processes transactions after their commit records have been written
795 * to disk, redriving log I/O if necessary.
796 *
797 * RETURN: None
798 *
799 * NOTE:
800 * This routine is called a interrupt time by lbmIODone
801 */
802 void lmPostGC(struct lbuf * bp)
803 {
804 unsigned long flags;
805 struct jfs_log *log = bp->l_log;
806 struct logpage *lp;
807 struct tblock *tblk;
808
809 //LOGGC_LOCK(log);
810 spin_lock_irqsave(&log->gclock, flags);
811 /*
812 * current pageout of group commit completed.
813 *
814 * remove/wakeup transactions from commit queue who were
815 * group committed with the current log page
816 */
817 while ((tblk = log->cqueue.head) && (tblk->flag & tblkGC_COMMIT)) {
818 /* if transaction was marked GC_COMMIT then
819 * it has been shipped in the current pageout
820 * and made it to disk - it is committed.
821 */
822
823 if (bp->l_flag & lbmERROR)
824 tblk->flag |= tblkGC_ERROR;
825
826 /* remove it from the commit queue */
827 log->cqueue.head = tblk->cqnext;
828 if (log->cqueue.head == NULL)
829 log->cqueue.tail = NULL;
830 tblk->flag &= ~tblkGC_QUEUE;
831 tblk->cqnext = 0;
832
833 if (tblk == log->flush_tblk) {
834 /* we can stop flushing the log now */
835 clear_bit(log_FLUSH, &log->flag);
836 log->flush_tblk = NULL;
837 }
838
839 jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
840 tblk->flag);
841
842 if (!(tblk->xflag & COMMIT_FORCE))
843 /*
844 * Hand tblk over to lazy commit thread
845 */
846 txLazyUnlock(tblk);
847 else {
848 /* state transition: COMMIT -> COMMITTED */
849 tblk->flag |= tblkGC_COMMITTED;
850
851 if (tblk->flag & tblkGC_READY)
852 log->gcrtc--;
853
854 LOGGC_WAKEUP(tblk);
855 }
856
857 /* was page full before pageout ?
858 * (and this is the last tblk bound with the page)
859 */
860 if (tblk->flag & tblkGC_FREE)
861 lbmFree(bp);
862 /* did page become full after pageout ?
863 * (and this is the last tblk bound with the page)
864 */
865 else if (tblk->flag & tblkGC_EOP) {
866 /* finalize the page */
867 lp = (struct logpage *) bp->l_ldata;
868 bp->l_ceor = bp->l_eor;
869 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
870 jfs_info("lmPostGC: calling lbmWrite");
871 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
872 1);
873 }
874
875 }
876
877 /* are there any transactions who have entered lnGroupCommit()
878 * (whose COMMITs are after that of the last log page written.
879 * They are waiting for new group commit (above at (SLEEP 1))
880 * or lazy transactions are on a full (queued) log page,
881 * select the latest ready transaction as new group leader and
882 * wake her up to lead her group.
883 */
884 if ((tblk = log->cqueue.head) &&
885 ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
886 test_bit(log_FLUSH, &log->flag)))
887 /*
888 * Call lmGCwrite with new group leader
889 */
890 lmGCwrite(log, 1);
891
892 /* no transaction are ready yet (transactions are only just
893 * queued (GC_QUEUE) and not entered for group commit yet).
894 * the first transaction entering group commit
895 * will elect herself as new group leader.
896 */
897 else
898 log->cflag &= ~logGC_PAGEOUT;
899
900 //LOGGC_UNLOCK(log);
901 spin_unlock_irqrestore(&log->gclock, flags);
902 return;
903 }
904
905 /*
906 * NAME: lmLogSync()
907 *
908 * FUNCTION: write log SYNCPT record for specified log
909 * if new sync address is available
910 * (normally the case if sync() is executed by back-ground
911 * process).
912 * if not, explicitly run jfs_blogsync() to initiate
913 * getting of new sync address.
914 * calculate new value of i_nextsync which determines when
915 * this code is called again.
916 *
917 * this is called only from lmLog().
918 *
919 * PARAMETER: ip - pointer to logs inode.
920 *
921 * RETURN: 0
922 *
923 * serialization: LOG_LOCK() held on entry/exit
924 */
925 int lmLogSync(struct jfs_log * log, int nosyncwait)
926 {
927 int logsize;
928 int written; /* written since last syncpt */
929 int free; /* free space left available */
930 int delta; /* additional delta to write normally */
931 int more; /* additional write granted */
932 struct lrd lrd;
933 int lsn;
934 struct logsyncblk *lp;
935
936 /*
937 * forward syncpt
938 */
939 /* if last sync is same as last syncpt,
940 * invoke sync point forward processing to update sync.
941 */
942
943 if (log->sync == log->syncpt) {
944 LOGSYNC_LOCK(log);
945 /* ToDo: push dirty metapages out to disk */
946 // bmLogSync(log);
947
948 if (list_empty(&log->synclist))
949 log->sync = log->lsn;
950 else {
951 lp = list_entry(log->synclist.next,
952 struct logsyncblk, synclist);
953 log->sync = lp->lsn;
954 }
955 LOGSYNC_UNLOCK(log);
956
957 }
958
959 /* if sync is different from last syncpt,
960 * write a SYNCPT record with syncpt = sync.
961 * reset syncpt = sync
962 */
963 if (log->sync != log->syncpt) {
964 struct super_block *sb = log->sb;
965 struct jfs_sb_info *sbi = JFS_SBI(sb);
966
967 /*
968 * We need to make sure all of the "written" metapages
969 * actually make it to disk
970 */
971 fsync_inode_data_buffers(sbi->ipbmap);
972 fsync_inode_data_buffers(sbi->ipimap);
973 fsync_inode_data_buffers(sb->s_bdev->bd_inode);
974
975 lrd.logtid = 0;
976 lrd.backchain = 0;
977 lrd.type = cpu_to_le16(LOG_SYNCPT);
978 lrd.length = 0;
979 lrd.log.syncpt.sync = cpu_to_le32(log->sync);
980 lsn = lmWriteRecord(log, NULL, &lrd, NULL);
981
982 log->syncpt = log->sync;
983 } else
984 lsn = log->lsn;
985
986 /*
987 * setup next syncpt trigger (SWAG)
988 */
989 logsize = log->logsize;
990
991 logdiff(written, lsn, log);
992 free = logsize - written;
993 delta = LOGSYNC_DELTA(logsize);
994 more = min(free / 2, delta);
995 if (more < 2 * LOGPSIZE) {
996 jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
997 /*
998 * log wrapping
999 *
1000 * option 1 - panic ? No.!
1001 * option 2 - shutdown file systems
1002 * associated with log ?
1003 * option 3 - extend log ?
1004 */
1005 /*
1006 * option 4 - second chance
1007 *
1008 * mark log wrapped, and continue.
1009 * when all active transactions are completed,
1010 * mark log vaild for recovery.
1011 * if crashed during invalid state, log state
1012 * implies invald log, forcing fsck().
1013 */
1014 /* mark log state log wrap in log superblock */
1015 /* log->state = LOGWRAP; */
1016
1017 /* reset sync point computation */
1018 log->syncpt = log->sync = lsn;
1019 log->nextsync = delta;
1020 } else
1021 /* next syncpt trigger = written + more */
1022 log->nextsync = written + more;
1023
1024 /* return if lmLogSync() from outside of transaction, e.g., sync() */
1025 if (nosyncwait)
1026 return lsn;
1027
1028 /* if number of bytes written from last sync point is more
1029 * than 1/4 of the log size, stop new transactions from
1030 * starting until all current transactions are completed
1031 * by setting syncbarrier flag.
1032 */
1033 if (written > LOGSYNC_BARRIER(logsize) && logsize > 32 * LOGPSIZE) {
1034 set_bit(log_SYNCBARRIER, &log->flag);
1035 jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1036 log->syncpt);
1037 /*
1038 * We may have to initiate group commit
1039 */
1040 jfs_flush_journal(log, 0);
1041 }
1042
1043 return lsn;
1044 }
1045
1046
1047 /*
1048 * NAME: lmLogOpen()
1049 *
1050 * FUNCTION: open the log on first open;
1051 * insert filesystem in the active list of the log.
1052 *
1053 * PARAMETER: ipmnt - file system mount inode
1054 * iplog - log inode (out)
1055 *
1056 * RETURN:
1057 *
1058 * serialization:
1059 */
1060 int lmLogOpen(struct super_block *sb, struct jfs_log ** logptr)
1061 {
1062 int rc;
1063 struct block_device *bdev;
1064 struct jfs_log *log;
1065
1066 if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1067 return ENOMEM;
1068 memset(log, 0, sizeof(struct jfs_log));
1069 init_waitqueue_head(&log->syncwait);
1070
1071 log->sb = sb; /* This should be a list */
1072
1073 if (!(JFS_SBI(sb)->mntflag & JFS_INLINELOG))
1074 goto externalLog;
1075
1076 /*
1077 * in-line log in host file system
1078 *
1079 * file system to log have 1-to-1 relationship;
1080 */
1081
1082 set_bit(log_INLINELOG, &log->flag);
1083 log->bdev = sb->s_bdev;
1084 log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1085 log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1086 (L2LOGPSIZE - sb->s_blocksize_bits);
1087 log->l2bsize = sb->s_blocksize_bits;
1088 ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1089
1090 /*
1091 * initialize log.
1092 */
1093 if ((rc = lmLogInit(log)))
1094 goto free;
1095 goto out;
1096
1097 /*
1098 * external log as separate logical volume
1099 *
1100 * file systems to log may have n-to-1 relationship;
1101 */
1102 externalLog:
1103
1104 /*
1105 * TODO: Check for already opened log devices
1106 */
1107
1108 if (!(bdev = bdget(kdev_t_to_nr(JFS_SBI(sb)->logdev)))) {
1109 rc = ENODEV;
1110 goto free;
1111 }
1112
1113 if ((rc = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_FS))) {
1114 rc = -rc;
1115 goto free;
1116 }
1117
1118 log->bdev = bdev;
1119 memcpy(log->uuid, JFS_SBI(sb)->loguuid, sizeof(log->uuid));
1120
1121 /*
1122 * initialize log:
1123 */
1124 if ((rc = lmLogInit(log)))
1125 goto close;
1126
1127 /*
1128 * add file system to log active file system list
1129 */
1130 if ((rc = lmLogFileSystem(log, JFS_SBI(sb)->uuid, 1)))
1131 goto shutdown;
1132
1133 out:
1134 *logptr = log;
1135 return 0;
1136
1137 /*
1138 * unwind on error
1139 */
1140 shutdown: /* unwind lbmLogInit() */
1141 lbmLogShutdown(log);
1142
1143 close: /* close external log device */
1144 blkdev_put(bdev, BDEV_FS);
1145
1146 free: /* free log descriptor */
1147 kfree(log);
1148
1149 jfs_warn("lmLogOpen: exit(%d)", rc);
1150 return rc;
1151 }
1152
1153
1154 /*
1155 * NAME: lmLogInit()
1156 *
1157 * FUNCTION: log initialization at first log open.
1158 *
1159 * logredo() (or logformat()) should have been run previously.
1160 * initialize the log inode from log superblock.
1161 * set the log state in the superblock to LOGMOUNT and
1162 * write SYNCPT log record.
1163 *
1164 * PARAMETER: log - log structure
1165 *
1166 * RETURN: 0 - if ok
1167 * EINVAL - bad log magic number or superblock dirty
1168 * error returned from logwait()
1169 *
1170 * serialization: single first open thread
1171 */
1172 int lmLogInit(struct jfs_log * log)
1173 {
1174 int rc = 0;
1175 struct lrd lrd;
1176 struct logsuper *logsuper;
1177 struct lbuf *bpsuper;
1178 struct lbuf *bp;
1179 struct logpage *lp;
1180 int lsn;
1181
1182 jfs_info("lmLogInit: log:0x%p", log);
1183
1184 /*
1185 * log inode is overlaid on generic inode where
1186 * dinode have been zeroed out by iRead();
1187 */
1188
1189 /*
1190 * initialize log i/o
1191 */
1192 if ((rc = lbmLogInit(log)))
1193 return rc;
1194
1195 /*
1196 * validate log superblock
1197 */
1198
1199
1200 if (!test_bit(log_INLINELOG, &log->flag))
1201 log->l2bsize = 12; /* XXX kludge alert XXX */
1202 if ((rc = lbmRead(log, 1, &bpsuper)))
1203 goto errout10;
1204
1205 logsuper = (struct logsuper *) bpsuper->l_ldata;
1206
1207 if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1208 jfs_warn("*** Log Format Error ! ***");
1209 rc = EINVAL;
1210 goto errout20;
1211 }
1212
1213 /* logredo() should have been run successfully. */
1214 if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1215 jfs_warn("*** Log Is Dirty ! ***");
1216 rc = EINVAL;
1217 goto errout20;
1218 }
1219
1220 /* initialize log inode from log superblock */
1221 if (test_bit(log_INLINELOG,&log->flag)) {
1222 if (log->size != le32_to_cpu(logsuper->size)) {
1223 rc = EINVAL;
1224 goto errout20;
1225 }
1226 jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1227 log, (unsigned long long) log->base, log->size);
1228 } else {
1229 if (memcmp(logsuper->uuid, log->uuid, 16)) {
1230 jfs_warn("wrong uuid on JFS log device");
1231 goto errout20;
1232 }
1233 log->size = le32_to_cpu(logsuper->size);
1234 log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1235 jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1236 log, (unsigned long long) log->base, log->size);
1237 }
1238
1239 log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1240 log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1241
1242 /*
1243 * initialize for log append write mode
1244 */
1245 /* establish current/end-of-log page/buffer */
1246 if ((rc = lbmRead(log, log->page, &bp)))
1247 goto errout20;
1248
1249 lp = (struct logpage *) bp->l_ldata;
1250
1251 jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1252 le32_to_cpu(logsuper->end), log->page, log->eor,
1253 le16_to_cpu(lp->h.eor));
1254
1255 // ASSERT(log->eor == lp->h.eor);
1256
1257 log->bp = bp;
1258 bp->l_pn = log->page;
1259 bp->l_eor = log->eor;
1260
1261 /* initialize the group commit serialization lock */
1262 LOGGC_LOCK_INIT(log);
1263
1264 /* if current page is full, move on to next page */
1265 if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1266 lmNextPage(log);
1267
1268 /* allocate/initialize the log write serialization lock */
1269 LOG_LOCK_INIT(log);
1270
1271 /*
1272 * initialize log syncpoint
1273 */
1274 /*
1275 * write the first SYNCPT record with syncpoint = 0
1276 * (i.e., log redo up to HERE !);
1277 * remove current page from lbm write queue at end of pageout
1278 * (to write log superblock update), but do not release to freelist;
1279 */
1280 lrd.logtid = 0;
1281 lrd.backchain = 0;
1282 lrd.type = cpu_to_le16(LOG_SYNCPT);
1283 lrd.length = 0;
1284 lrd.log.syncpt.sync = 0;
1285 lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1286 bp = log->bp;
1287 bp->l_ceor = bp->l_eor;
1288 lp = (struct logpage *) bp->l_ldata;
1289 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1290 lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1291 if ((rc = lbmIOWait(bp, 0)))
1292 goto errout30;
1293
1294 /* initialize logsync parameters */
1295 log->logsize = (log->size - 2) << L2LOGPSIZE;
1296 log->lsn = lsn;
1297 log->syncpt = lsn;
1298 log->sync = log->syncpt;
1299 log->nextsync = LOGSYNC_DELTA(log->logsize);
1300
1301 jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1302 log->lsn, log->syncpt, log->sync);
1303
1304 LOGSYNC_LOCK_INIT(log);
1305
1306 INIT_LIST_HEAD(&log->synclist);
1307
1308 log->cqueue.head = log->cqueue.tail = NULL;
1309 log->flush_tblk = NULL;
1310
1311 log->count = 0;
1312
1313 /*
1314 * initialize for lazy/group commit
1315 */
1316 log->clsn = lsn;
1317
1318 /*
1319 * update/write superblock
1320 */
1321 logsuper->state = cpu_to_le32(LOGMOUNT);
1322 log->serial = le32_to_cpu(logsuper->serial) + 1;
1323 logsuper->serial = cpu_to_le32(log->serial);
1324 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1325 if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1326 goto errout30;
1327
1328 return 0;
1329
1330 /*
1331 * unwind on error
1332 */
1333 errout30: /* release log page */
1334 lbmFree(bp);
1335
1336 errout20: /* release log superblock */
1337 lbmFree(bpsuper);
1338
1339 errout10: /* unwind lbmLogInit() */
1340 lbmLogShutdown(log);
1341
1342 jfs_warn("lmLogInit: exit(%d)", rc);
1343 return rc;
1344 }
1345
1346
1347 /*
1348 * NAME: lmLogClose()
1349 *
1350 * FUNCTION: remove file system <ipmnt> from active list of log <iplog>
1351 * and close it on last close.
1352 *
1353 * PARAMETER: sb - superblock
1354 * log - log inode
1355 *
1356 * RETURN: errors from subroutines
1357 *
1358 * serialization:
1359 */
1360 int lmLogClose(struct super_block *sb, struct jfs_log * log)
1361 {
1362 int rc;
1363
1364 jfs_info("lmLogClose: log:0x%p", log);
1365
1366 if (!test_bit(log_INLINELOG, &log->flag))
1367 goto externalLog;
1368
1369 /*
1370 * in-line log in host file system
1371 */
1372 rc = lmLogShutdown(log);
1373 goto out;
1374
1375 /*
1376 * external log as separate logical volume
1377 */
1378 externalLog:
1379 lmLogFileSystem(log, JFS_SBI(sb)->uuid, 0);
1380 rc = lmLogShutdown(log);
1381 blkdev_put(log->bdev, BDEV_FS);
1382
1383 out:
1384 jfs_info("lmLogClose: exit(%d)", rc);
1385 return rc;
1386 }
1387
1388
1389 /*
1390 * NAME: jfs_flush_journal()
1391 *
1392 * FUNCTION: initiate write of any outstanding transactions to the journal
1393 * and optionally wait until they are all written to disk
1394 *
1395 * wait == 0 flush until latest txn is committed, don't wait
1396 * wait == 1 flush until latest txn is committed, wait
1397 * wait > 1 flush until all txn's are complete, wait
1398 */
1399 void jfs_flush_journal(struct jfs_log *log, int wait)
1400 {
1401 int i;
1402 struct tblock *target;
1403
1404 jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1405
1406 LOGGC_LOCK(log);
1407
1408 target = log->cqueue.head;
1409
1410 if (target) {
1411 /*
1412 * This ensures that we will keep writing to the journal as long
1413 * as there are unwritten commit records
1414 */
1415
1416 if (test_bit(log_FLUSH, &log->flag)) {
1417 /*
1418 * We're already flushing.
1419 * if flush_tblk is NULL, we are flushing everything,
1420 * so leave it that way. Otherwise, update it to the
1421 * latest transaction
1422 */
1423 if (log->flush_tblk)
1424 log->flush_tblk = target;
1425 } else {
1426 /* Only flush until latest transaction is committed */
1427 log->flush_tblk = target;
1428 set_bit(log_FLUSH, &log->flag);
1429
1430 /*
1431 * Initiate I/O on outstanding transactions
1432 */
1433 if (!(log->cflag & logGC_PAGEOUT)) {
1434 log->cflag |= logGC_PAGEOUT;
1435 lmGCwrite(log, 0);
1436 }
1437 }
1438 }
1439 if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1440 /* Flush until all activity complete */
1441 set_bit(log_FLUSH, &log->flag);
1442 log->flush_tblk = NULL;
1443 }
1444
1445 if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1446 DECLARE_WAITQUEUE(__wait, current);
1447
1448 add_wait_queue(&target->gcwait, &__wait);
1449 set_current_state(TASK_UNINTERRUPTIBLE);
1450 LOGGC_UNLOCK(log);
1451 schedule();
1452 current->state = TASK_RUNNING;
1453 LOGGC_LOCK(log);
1454 remove_wait_queue(&target->gcwait, &__wait);
1455 }
1456 LOGGC_UNLOCK(log);
1457
1458 if (wait < 2)
1459 return;
1460
1461 /*
1462 * If there was recent activity, we may need to wait
1463 * for the lazycommit thread to catch up
1464 */
1465 if (log->cqueue.head || !list_empty(&log->synclist)) {
1466 for (i = 0; i < 800; i++) { /* Too much? */
1467 current->state = TASK_INTERRUPTIBLE;
1468 schedule_timeout(HZ / 4);
1469 if ((log->cqueue.head == NULL) &&
1470 list_empty(&log->synclist))
1471 break;
1472 }
1473 }
1474 assert(log->cqueue.head == NULL);
1475 assert(list_empty(&log->synclist));
1476 clear_bit(log_FLUSH, &log->flag);
1477 }
1478
1479 /*
1480 * NAME: lmLogShutdown()
1481 *
1482 * FUNCTION: log shutdown at last LogClose().
1483 *
1484 * write log syncpt record.
1485 * update super block to set redone flag to 0.
1486 *
1487 * PARAMETER: log - log inode
1488 *
1489 * RETURN: 0 - success
1490 *
1491 * serialization: single last close thread
1492 */
1493 int lmLogShutdown(struct jfs_log * log)
1494 {
1495 int rc;
1496 struct lrd lrd;
1497 int lsn;
1498 struct logsuper *logsuper;
1499 struct lbuf *bpsuper;
1500 struct lbuf *bp;
1501 struct logpage *lp;
1502
1503 jfs_info("lmLogShutdown: log:0x%p", log);
1504
1505 jfs_flush_journal(log, 2);
1506
1507 /*
1508 * We need to make sure all of the "written" metapages
1509 * actually make it to disk
1510 */
1511 fsync_no_super(log->sb->s_dev);
1512
1513 /*
1514 * write the last SYNCPT record with syncpoint = 0
1515 * (i.e., log redo up to HERE !)
1516 */
1517 lrd.logtid = 0;
1518 lrd.backchain = 0;
1519 lrd.type = cpu_to_le16(LOG_SYNCPT);
1520 lrd.length = 0;
1521 lrd.log.syncpt.sync = 0;
1522 lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1523 bp = log->bp;
1524 lp = (struct logpage *) bp->l_ldata;
1525 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1526 lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1527 lbmIOWait(log->bp, lbmFREE);
1528
1529 /*
1530 * synchronous update log superblock
1531 * mark log state as shutdown cleanly
1532 * (i.e., Log does not need to be replayed).
1533 */
1534 if ((rc = lbmRead(log, 1, &bpsuper)))
1535 goto out;
1536
1537 logsuper = (struct logsuper *) bpsuper->l_ldata;
1538 logsuper->state = cpu_to_le32(LOGREDONE);
1539 logsuper->end = cpu_to_le32(lsn);
1540 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1541 rc = lbmIOWait(bpsuper, lbmFREE);
1542
1543 jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1544 lsn, log->page, log->eor);
1545
1546 out:
1547 /*
1548 * shutdown per log i/o
1549 */
1550 lbmLogShutdown(log);
1551
1552 if (rc) {
1553 jfs_warn("lmLogShutdown: exit(%d)", rc);
1554 }
1555 return rc;
1556 }
1557
1558
1559 /*
1560 * NAME: lmLogFileSystem()
1561 *
1562 * FUNCTION: insert (<activate> = true)/remove (<activate> = false)
1563 * file system into/from log active file system list.
1564 *
1565 * PARAMETE: log - pointer to logs inode.
1566 * fsdev - kdev_t of filesystem.
1567 * serial - pointer to returned log serial number
1568 * activate - insert/remove device from active list.
1569 *
1570 * RETURN: 0 - success
1571 * errors returned by vms_iowait().
1572 */
1573 static int lmLogFileSystem(struct jfs_log * log, char *uuid, int activate)
1574 {
1575 int rc = 0;
1576 int i;
1577 struct logsuper *logsuper;
1578 struct lbuf *bpsuper;
1579
1580 /*
1581 * insert/remove file system device to log active file system list.
1582 */
1583 if ((rc = lbmRead(log, 1, &bpsuper)))
1584 return rc;
1585
1586 logsuper = (struct logsuper *) bpsuper->l_ldata;
1587 if (activate) {
1588 for (i = 0; i < MAX_ACTIVE; i++)
1589 if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) {
1590 memcpy(logsuper->active[i].uuid, uuid, 16);
1591 break;
1592 }
1593 if (i == MAX_ACTIVE) {
1594 jfs_warn("Too many file systems sharing journal!");
1595 lbmFree(bpsuper);
1596 return EMFILE; /* Is there a better rc? */
1597 }
1598 } else {
1599 for (i = 0; i < MAX_ACTIVE; i++)
1600 if (!memcmp(logsuper->active[i].uuid, uuid, 16)) {
1601 memcpy(logsuper->active[i].uuid, NULL_UUID, 16);
1602 break;
1603 }
1604 if (i == MAX_ACTIVE) {
1605 jfs_warn("Somebody stomped on the journal!");
1606 lbmFree(bpsuper);
1607 return EIO;
1608 }
1609
1610 }
1611
1612 /*
1613 * synchronous write log superblock:
1614 *
1615 * write sidestream bypassing write queue:
1616 * at file system mount, log super block is updated for
1617 * activation of the file system before any log record
1618 * (MOUNT record) of the file system, and at file system
1619 * unmount, all meta data for the file system has been
1620 * flushed before log super block is updated for deactivation
1621 * of the file system.
1622 */
1623 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1624 rc = lbmIOWait(bpsuper, lbmFREE);
1625
1626 return rc;
1627 }
1628
1629 /*
1630 * log buffer manager (lbm)
1631 * ------------------------
1632 *
1633 * special purpose buffer manager supporting log i/o requirements.
1634 *
1635 * per log write queue:
1636 * log pageout occurs in serial order by fifo write queue and
1637 * restricting to a single i/o in pregress at any one time.
1638 * a circular singly-linked list
1639 * (log->wrqueue points to the tail, and buffers are linked via
1640 * bp->wrqueue field), and
1641 * maintains log page in pageout ot waiting for pageout in serial pageout.
1642 */
1643
1644 /*
1645 * lbmLogInit()
1646 *
1647 * initialize per log I/O setup at lmLogInit()
1648 */
1649 static int lbmLogInit(struct jfs_log * log)
1650 { /* log inode */
1651 int i;
1652 struct lbuf *lbuf;
1653
1654 jfs_info("lbmLogInit: log:0x%p", log);
1655
1656 /* initialize current buffer cursor */
1657 log->bp = NULL;
1658
1659 /* initialize log device write queue */
1660 log->wqueue = NULL;
1661
1662 /*
1663 * Each log has its own buffer pages allocated to it. These are
1664 * not managed by the page cache. This ensures that a transaction
1665 * writing to the log does not block trying to allocate a page from
1666 * the page cache (for the log). This would be bad, since page
1667 * allocation waits on the kswapd thread that may be committing inodes
1668 * which would cause log activity. Was that clear? I'm trying to
1669 * avoid deadlock here.
1670 */
1671 init_waitqueue_head(&log->free_wait);
1672
1673 log->lbuf_free = NULL;
1674
1675 for (i = 0; i < LOGPAGES; i++) {
1676 lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1677 if (lbuf == 0)
1678 goto error;
1679 lbuf->l_bh.b_data = lbuf->l_ldata =
1680 (char *) __get_free_page(GFP_KERNEL);
1681 if (lbuf->l_ldata == 0) {
1682 kfree(lbuf);
1683 goto error;
1684 }
1685 lbuf->l_log = log;
1686 init_waitqueue_head(&lbuf->l_ioevent);
1687
1688 lbuf->l_bh.b_size = LOGPSIZE;
1689 lbuf->l_bh.b_dev = to_kdev_t(log->bdev->bd_dev);
1690 lbuf->l_bh.b_end_io = lbmIODone;
1691 lbuf->l_bh.b_private = lbuf;
1692 lbuf->l_bh.b_page = virt_to_page(lbuf->l_ldata);
1693 lbuf->l_bh.b_state = 0;
1694 init_waitqueue_head(&lbuf->l_bh.b_wait);
1695
1696 lbuf->l_freelist = log->lbuf_free;
1697 log->lbuf_free = lbuf;
1698 }
1699
1700 return (0);
1701
1702 error:
1703 lbmLogShutdown(log);
1704 return (ENOMEM);
1705 }
1706
1707
1708 /*
1709 * lbmLogShutdown()
1710 *
1711 * finalize per log I/O setup at lmLogShutdown()
1712 */
1713 static void lbmLogShutdown(struct jfs_log * log)
1714 {
1715 struct lbuf *lbuf;
1716
1717 jfs_info("lbmLogShutdown: log:0x%p", log);
1718
1719 lbuf = log->lbuf_free;
1720 while (lbuf) {
1721 struct lbuf *next = lbuf->l_freelist;
1722 free_page((unsigned long) lbuf->l_ldata);
1723 kfree(lbuf);
1724 lbuf = next;
1725 }
1726
1727 log->bp = NULL;
1728 }
1729
1730
1731 /*
1732 * lbmAllocate()
1733 *
1734 * allocate an empty log buffer
1735 */
1736 static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1737 {
1738 struct lbuf *bp;
1739 unsigned long flags;
1740
1741 /*
1742 * recycle from log buffer freelist if any
1743 */
1744 LCACHE_LOCK(flags);
1745 LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1746 log->lbuf_free = bp->l_freelist;
1747 LCACHE_UNLOCK(flags);
1748
1749 bp->l_flag = 0;
1750
1751 bp->l_wqnext = NULL;
1752 bp->l_freelist = NULL;
1753
1754 bp->l_pn = pn;
1755 bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1756 bp->l_bh.b_blocknr = bp->l_blkno;
1757 bp->l_ceor = 0;
1758
1759 return bp;
1760 }
1761
1762
1763 /*
1764 * lbmFree()
1765 *
1766 * release a log buffer to freelist
1767 */
1768 static void lbmFree(struct lbuf * bp)
1769 {
1770 unsigned long flags;
1771
1772 LCACHE_LOCK(flags);
1773
1774 lbmfree(bp);
1775
1776 LCACHE_UNLOCK(flags);
1777 }
1778
1779 static void lbmfree(struct lbuf * bp)
1780 {
1781 struct jfs_log *log = bp->l_log;
1782
1783 assert(bp->l_wqnext == NULL);
1784
1785 /*
1786 * return the buffer to head of freelist
1787 */
1788 bp->l_freelist = log->lbuf_free;
1789 log->lbuf_free = bp;
1790
1791 wake_up(&log->free_wait);
1792 return;
1793 }
1794
1795
1796 /*
1797 * NAME: lbmRedrive
1798 *
1799 * FUNCTION: add a log buffer to the the log redrive list
1800 *
1801 * PARAMETER:
1802 * bp - log buffer
1803 *
1804 * NOTES:
1805 * Takes log_redrive_lock.
1806 */
1807 static inline void lbmRedrive(struct lbuf *bp)
1808 {
1809 unsigned long flags;
1810
1811 spin_lock_irqsave(&log_redrive_lock, flags);
1812 bp->l_redrive_next = log_redrive_list;
1813 log_redrive_list = bp;
1814 spin_unlock_irqrestore(&log_redrive_lock, flags);
1815
1816 wake_up(&jfs_IO_thread_wait);
1817 }
1818
1819
1820 /*
1821 * lbmRead()
1822 */
1823 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1824 {
1825 struct lbuf *bp;
1826
1827 /*
1828 * allocate a log buffer
1829 */
1830 *bpp = bp = lbmAllocate(log, pn);
1831 jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1832
1833 bp->l_flag |= lbmREAD;
1834 bp->l_bh.b_reqnext = NULL;
1835 clear_bit(BH_Uptodate, &bp->l_bh.b_state);
1836 lock_buffer(&bp->l_bh);
1837 set_bit(BH_Mapped, &bp->l_bh.b_state);
1838 set_bit(BH_Req, &bp->l_bh.b_state);
1839 bp->l_bh.b_rdev = bp->l_bh.b_dev;
1840 bp->l_bh.b_rsector = bp->l_blkno << (log->l2bsize - 9);
1841 generic_make_request(READ, &bp->l_bh);
1842 run_task_queue(&tq_disk);
1843
1844 wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
1845
1846 return 0;
1847 }
1848
1849
1850 /*
1851 * lbmWrite()
1852 *
1853 * buffer at head of pageout queue stays after completion of
1854 * partial-page pageout and redriven by explicit initiation of
1855 * pageout by caller until full-page pageout is completed and
1856 * released.
1857 *
1858 * device driver i/o done redrives pageout of new buffer at
1859 * head of pageout queue when current buffer at head of pageout
1860 * queue is released at the completion of its full-page pageout.
1861 *
1862 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
1863 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
1864 */
1865 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
1866 int cant_block)
1867 {
1868 struct lbuf *tail;
1869 unsigned long flags;
1870
1871 jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
1872
1873 /* map the logical block address to physical block address */
1874 bp->l_blkno =
1875 log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
1876
1877 LCACHE_LOCK(flags); /* disable+lock */
1878
1879 /*
1880 * initialize buffer for device driver
1881 */
1882 bp->l_flag = flag;
1883
1884 /*
1885 * insert bp at tail of write queue associated with log
1886 *
1887 * (request is either for bp already/currently at head of queue
1888 * or new bp to be inserted at tail)
1889 */
1890 tail = log->wqueue;
1891
1892 /* is buffer not already on write queue ? */
1893 if (bp->l_wqnext == NULL) {
1894 /* insert at tail of wqueue */
1895 if (tail == NULL) {
1896 log->wqueue = bp;
1897 bp->l_wqnext = bp;
1898 } else {
1899 log->wqueue = bp;
1900 bp->l_wqnext = tail->l_wqnext;
1901 tail->l_wqnext = bp;
1902 }
1903
1904 tail = bp;
1905 }
1906
1907 /* is buffer at head of wqueue and for write ? */
1908 if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
1909 LCACHE_UNLOCK(flags); /* unlock+enable */
1910 return;
1911 }
1912
1913 LCACHE_UNLOCK(flags); /* unlock+enable */
1914
1915 if (cant_block)
1916 lbmRedrive(bp);
1917 else if (flag & lbmSYNC)
1918 lbmStartIO(bp);
1919 else {
1920 LOGGC_UNLOCK(log);
1921 lbmStartIO(bp);
1922 LOGGC_LOCK(log);
1923 }
1924 }
1925
1926
1927 /*
1928 * lbmDirectWrite()
1929 *
1930 * initiate pageout bypassing write queue for sidestream
1931 * (e.g., log superblock) write;
1932 */
1933 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
1934 {
1935 jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
1936 bp, flag, bp->l_pn);
1937
1938 /*
1939 * initialize buffer for device driver
1940 */
1941 bp->l_flag = flag | lbmDIRECT;
1942
1943 /* map the logical block address to physical block address */
1944 bp->l_blkno =
1945 log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
1946
1947 /*
1948 * initiate pageout of the page
1949 */
1950 lbmStartIO(bp);
1951 }
1952
1953
1954 /*
1955 * NAME: lbmStartIO()
1956 *
1957 * FUNCTION: Interface to DD strategy routine
1958 *
1959 * RETURN: none
1960 *
1961 * serialization: LCACHE_LOCK() is NOT held during log i/o;
1962 */
1963 static void lbmStartIO(struct lbuf * bp)
1964 {
1965 jfs_info("lbmStartIO");
1966
1967 bp->l_bh.b_reqnext = NULL;
1968 set_bit(BH_Dirty, &bp->l_bh.b_state);
1969 // lock_buffer(&bp->l_bh);
1970 assert(!test_bit(BH_Lock, &bp->l_bh.b_state));
1971 set_bit(BH_Lock, &bp->l_bh.b_state);
1972
1973 set_bit(BH_Mapped, &bp->l_bh.b_state);
1974 set_bit(BH_Req, &bp->l_bh.b_state);
1975 bp->l_bh.b_rdev = bp->l_bh.b_dev;
1976 bp->l_bh.b_rsector = bp->l_blkno << (bp->l_log->l2bsize - 9);
1977 generic_make_request(WRITE, &bp->l_bh);
1978
1979 INCREMENT(lmStat.submitted);
1980 run_task_queue(&tq_disk);
1981 }
1982
1983
1984 /*
1985 * lbmIOWait()
1986 */
1987 static int lbmIOWait(struct lbuf * bp, int flag)
1988 {
1989 unsigned long flags;
1990 int rc = 0;
1991
1992 jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
1993
1994 LCACHE_LOCK(flags); /* disable+lock */
1995
1996 LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
1997
1998 rc = (bp->l_flag & lbmERROR) ? EIO : 0;
1999
2000 if (flag & lbmFREE)
2001 lbmfree(bp);
2002
2003 LCACHE_UNLOCK(flags); /* unlock+enable */
2004
2005 jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2006 return rc;
2007 }
2008
2009 /*
2010 * lbmIODone()
2011 *
2012 * executed at INTIODONE level
2013 */
2014 static void lbmIODone(struct buffer_head *bh, int uptodate)
2015 {
2016 struct lbuf *bp = bh->b_private;
2017 struct lbuf *nextbp, *tail;
2018 struct jfs_log *log;
2019 unsigned long flags;
2020
2021 /*
2022 * get back jfs buffer bound to the i/o buffer
2023 */
2024 jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2025
2026 LCACHE_LOCK(flags); /* disable+lock */
2027
2028 unlock_buffer(&bp->l_bh);
2029 bp->l_flag |= lbmDONE;
2030
2031 if (!uptodate) {
2032 bp->l_flag |= lbmERROR;
2033
2034 jfs_err("lbmIODone: I/O error in JFS log");
2035 }
2036
2037 /*
2038 * pagein completion
2039 */
2040 if (bp->l_flag & lbmREAD) {
2041 bp->l_flag &= ~lbmREAD;
2042
2043 LCACHE_UNLOCK(flags); /* unlock+enable */
2044
2045 /* wakeup I/O initiator */
2046 LCACHE_WAKEUP(&bp->l_ioevent);
2047
2048 return;
2049 }
2050
2051 /*
2052 * pageout completion
2053 *
2054 * the bp at the head of write queue has completed pageout.
2055 *
2056 * if single-commit/full-page pageout, remove the current buffer
2057 * from head of pageout queue, and redrive pageout with
2058 * the new buffer at head of pageout queue;
2059 * otherwise, the partial-page pageout buffer stays at
2060 * the head of pageout queue to be redriven for pageout
2061 * by lmGroupCommit() until full-page pageout is completed.
2062 */
2063 bp->l_flag &= ~lbmWRITE;
2064 INCREMENT(lmStat.pagedone);
2065
2066 /* update committed lsn */
2067 log = bp->l_log;
2068 log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2069
2070 if (bp->l_flag & lbmDIRECT) {
2071 LCACHE_WAKEUP(&bp->l_ioevent);
2072 LCACHE_UNLOCK(flags);
2073 return;
2074 }
2075
2076 tail = log->wqueue;
2077
2078 /* single element queue */
2079 if (bp == tail) {
2080 /* remove head buffer of full-page pageout
2081 * from log device write queue
2082 */
2083 if (bp->l_flag & lbmRELEASE) {
2084 log->wqueue = NULL;
2085 bp->l_wqnext = NULL;
2086 }
2087 }
2088 /* multi element queue */
2089 else {
2090 /* remove head buffer of full-page pageout
2091 * from log device write queue
2092 */
2093 if (bp->l_flag & lbmRELEASE) {
2094 nextbp = tail->l_wqnext = bp->l_wqnext;
2095 bp->l_wqnext = NULL;
2096
2097 /*
2098 * redrive pageout of next page at head of write queue:
2099 * redrive next page without any bound tblk
2100 * (i.e., page w/o any COMMIT records), or
2101 * first page of new group commit which has been
2102 * queued after current page (subsequent pageout
2103 * is performed synchronously, except page without
2104 * any COMMITs) by lmGroupCommit() as indicated
2105 * by lbmWRITE flag;
2106 */
2107 if (nextbp->l_flag & lbmWRITE) {
2108 /*
2109 * We can't do the I/O at interrupt time.
2110 * The jfsIO thread can do it
2111 */
2112 lbmRedrive(nextbp);
2113 }
2114 }
2115 }
2116
2117 /*
2118 * synchronous pageout:
2119 *
2120 * buffer has not necessarily been removed from write queue
2121 * (e.g., synchronous write of partial-page with COMMIT):
2122 * leave buffer for i/o initiator to dispose
2123 */
2124 if (bp->l_flag & lbmSYNC) {
2125 LCACHE_UNLOCK(flags); /* unlock+enable */
2126
2127 /* wakeup I/O initiator */
2128 LCACHE_WAKEUP(&bp->l_ioevent);
2129 }
2130
2131 /*
2132 * Group Commit pageout:
2133 */
2134 else if (bp->l_flag & lbmGC) {
2135 LCACHE_UNLOCK(flags);
2136 lmPostGC(bp);
2137 }
2138
2139 /*
2140 * asynchronous pageout:
2141 *
2142 * buffer must have been removed from write queue:
2143 * insert buffer at head of freelist where it can be recycled
2144 */
2145 else {
2146 assert(bp->l_flag & lbmRELEASE);
2147 assert(bp->l_flag & lbmFREE);
2148 lbmfree(bp);
2149
2150 LCACHE_UNLOCK(flags); /* unlock+enable */
2151 }
2152 }
2153
2154 int jfsIOWait(void *arg)
2155 {
2156 struct lbuf *bp;
2157
2158 lock_kernel();
2159
2160 daemonize();
2161 current->tty = NULL;
2162 strcpy(current->comm, "jfsIO");
2163
2164 unlock_kernel();
2165
2166 spin_lock_irq(¤t->sigmask_lock);
2167 sigfillset(¤t->blocked);
2168 recalc_sigpending(current);
2169 spin_unlock_irq(¤t->sigmask_lock);
2170
2171 complete(&jfsIOwait);
2172
2173 do {
2174 DECLARE_WAITQUEUE(wq, current);
2175
2176 spin_lock_irq(&log_redrive_lock);
2177 while ((bp = log_redrive_list)) {
2178 log_redrive_list = bp->l_redrive_next;
2179 bp->l_redrive_next = NULL;
2180 spin_unlock_irq(&log_redrive_lock);
2181 lbmStartIO(bp);
2182 spin_lock_irq(&log_redrive_lock);
2183 }
2184 add_wait_queue(&jfs_IO_thread_wait, &wq);
2185 set_current_state(TASK_INTERRUPTIBLE);
2186 spin_unlock_irq(&log_redrive_lock);
2187 schedule();
2188 current->state = TASK_RUNNING;
2189 remove_wait_queue(&jfs_IO_thread_wait, &wq);
2190 } while (!jfs_stop_threads);
2191
2192 jfs_info("jfsIOWait being killed!");
2193 complete(&jfsIOwait);
2194 return 0;
2195 }
2196
2197 /*
2198 * NAME: lmLogFormat()/jfs_logform()
2199 *
2200 * FUNCTION: format file system log
2201 *
2202 * PARAMETERS:
2203 * log - volume log
2204 * logAddress - start address of log space in FS block
2205 * logSize - length of log space in FS block;
2206 *
2207 * RETURN: 0 - success
2208 * -EIO - i/o error
2209 *
2210 * XXX: We're synchronously writing one page at a time. This needs to
2211 * be improved by writing multiple pages at once.
2212 */
2213 int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2214 {
2215 int rc = -EIO;
2216 struct jfs_sb_info *sbi = JFS_SBI(log->sb);
2217 struct logsuper *logsuper;
2218 struct logpage *lp;
2219 int lspn; /* log sequence page number */
2220 struct lrd *lrd_ptr;
2221 int npages = 0;
2222 struct lbuf *bp;
2223
2224 jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2225 (long long)logAddress, logSize);
2226
2227 /* allocate a log buffer */
2228 bp = lbmAllocate(log, 1);
2229
2230 npages = logSize >> sbi->l2nbperpage;
2231
2232 /*
2233 * log space:
2234 *
2235 * page 0 - reserved;
2236 * page 1 - log superblock;
2237 * page 2 - log data page: A SYNC log record is written
2238 * into this page at logform time;
2239 * pages 3-N - log data page: set to empty log data pages;
2240 */
2241 /*
2242 * init log superblock: log page 1
2243 */
2244 logsuper = (struct logsuper *) bp->l_ldata;
2245
2246 logsuper->magic = cpu_to_le32(LOGMAGIC);
2247 logsuper->version = cpu_to_le32(LOGVERSION);
2248 logsuper->state = cpu_to_le32(LOGREDONE);
2249 logsuper->flag = cpu_to_le32(sbi->mntflag); /* ? */
2250 logsuper->size = cpu_to_le32(npages);
2251 logsuper->bsize = cpu_to_le32(sbi->bsize);
2252 logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2253 logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2254
2255 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2256 bp->l_blkno = logAddress + sbi->nbperpage;
2257 lbmStartIO(bp);
2258 if ((rc = lbmIOWait(bp, 0)))
2259 goto exit;
2260
2261 /*
2262 * init pages 2 to npages-1 as log data pages:
2263 *
2264 * log page sequence number (lpsn) initialization:
2265 *
2266 * pn: 0 1 2 3 n-1
2267 * +-----+-----+=====+=====+===.....===+=====+
2268 * lspn: N-1 0 1 N-2
2269 * <--- N page circular file ---->
2270 *
2271 * the N (= npages-2) data pages of the log is maintained as
2272 * a circular file for the log records;
2273 * lpsn grows by 1 monotonically as each log page is written
2274 * to the circular file of the log;
2275 * and setLogpage() will not reset the page number even if
2276 * the eor is equal to LOGPHDRSIZE. In order for binary search
2277 * still work in find log end process, we have to simulate the
2278 * log wrap situation at the log format time.
2279 * The 1st log page written will have the highest lpsn. Then
2280 * the succeeding log pages will have ascending order of
2281 * the lspn starting from 0, ... (N-2)
2282 */
2283 lp = (struct logpage *) bp->l_ldata;
2284 /*
2285 * initialize 1st log page to be written: lpsn = N - 1,
2286 * write a SYNCPT log record is written to this page
2287 */
2288 lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2289 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2290
2291 lrd_ptr = (struct lrd *) &lp->data;
2292 lrd_ptr->logtid = 0;
2293 lrd_ptr->backchain = 0;
2294 lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2295 lrd_ptr->length = 0;
2296 lrd_ptr->log.syncpt.sync = 0;
2297
2298 bp->l_blkno += sbi->nbperpage;
2299 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2300 lbmStartIO(bp);
2301 if ((rc = lbmIOWait(bp, 0)))
2302 goto exit;
2303
2304 /*
2305 * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2306 */
2307 for (lspn = 0; lspn < npages - 3; lspn++) {
2308 lp->h.page = lp->t.page = cpu_to_le32(lspn);
2309 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2310
2311 bp->l_blkno += sbi->nbperpage;
2312 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2313 lbmStartIO(bp);
2314 if ((rc = lbmIOWait(bp, 0)))
2315 goto exit;
2316 }
2317
2318 rc = 0;
2319 exit:
2320 /*
2321 * finalize log
2322 */
2323 /* release the buffer */
2324 lbmFree(bp);
2325
2326 return rc;
2327 }
2328
2329 #ifdef CONFIG_JFS_STATISTICS
2330 int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length,
2331 int *eof, void *data)
2332 {
2333 int len = 0;
2334 off_t begin;
2335
2336 len += sprintf(buffer,
2337 "JFS Logmgr stats\n"
2338 "================\n"
2339 "commits = %d\n"
2340 "writes submitted = %d\n"
2341 "writes completed = %d\n"
2342 "full pages submitted = %d\n"
2343 "partial pages submitted = %d\n",
2344 lmStat.commit,
2345 lmStat.submitted,
2346 lmStat.pagedone,
2347 lmStat.full_page,
2348 lmStat.partial_page);
2349
2350 begin = offset;
2351 *start = buffer + begin;
2352 len -= begin;
2353
2354 if (len > length)
2355 len = length;
2356 else
2357 *eof = 1;
2358
2359 if (len < 0)
2360 len = 0;
2361
2362 return len;
2363 }
2364 #endif /* CONFIG_JFS_STATISTICS */
Cache object: dfd1fd4c1bcd32ebf45ea9bab3b526ca
|