1 /*-
2 * Copyright (c) 1993
3 * The Regents of the University of California. All rights reserved.
4 * Modifications/enhancements:
5 * Copyright (c) 1995 John S. Dyson. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)vfs_cluster.c 8.7 (Berkeley) 2/13/94
36 * $FreeBSD: src/sys/kern/vfs_cluster.c,v 1.38.2.3 1999/09/05 08:15:40 peter Exp $
37 */
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/proc.h>
42 #include <sys/buf.h>
43 #include <sys/vnode.h>
44 #include <sys/mount.h>
45 #include <sys/malloc.h>
46 #include <sys/resourcevar.h>
47 #include <sys/vmmeter.h>
48 #include <miscfs/specfs/specdev.h>
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/vm_prot.h>
52 #include <vm/vm_object.h>
53 #include <vm/vm_page.h>
54
55 #ifdef notyet_block_reallocation_enabled
56 #ifdef DEBUG
57 #include <sys/sysctl.h>
58 #include <sys/kernel.h>
59
60 static int doreallocblks = 0;
61 SYSCTL_INT(_debug, 13, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
62 #else
63 #define doreallocblks 0
64 #endif
65 #endif /* notyet_block_reallocation_enabled */
66
67 #ifdef notyet_block_reallocation_enabled
68 static struct cluster_save *
69 cluster_collectbufs __P((struct vnode *vp, struct buf *last_bp));
70 #endif
71 static struct buf *
72 cluster_rbuild __P((struct vnode *vp, u_quad_t filesize, daddr_t lbn,
73 daddr_t blkno, long size, int run));
74
75 static int totreads;
76 static int totreadblocks;
77 extern vm_page_t bogus_page;
78
79 #ifdef DIAGNOSTIC
80 /*
81 * Set to 1 if reads of block zero should cause readahead to be done.
82 * Set to 0 treats a read of block zero as a non-sequential read.
83 *
84 * Setting to one assumes that most reads of block zero of files are due to
85 * sequential passes over the files (e.g. cat, sum) where additional blocks
86 * will soon be needed. Setting to zero assumes that the majority are
87 * surgical strikes to get particular info (e.g. size, file) where readahead
88 * blocks will not be used and, in fact, push out other potentially useful
89 * blocks from the cache. The former seems intuitive, but some quick tests
90 * showed that the latter performed better from a system-wide point of view.
91 */
92 int doclusterraz = 0;
93
94 #define ISSEQREAD(vp, blk) \
95 (((blk) != 0 || doclusterraz) && \
96 ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr))
97 #else
98 #define ISSEQREAD(vp, blk) \
99 (/* (blk) != 0 && */ ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr))
100 #endif
101
102 /*
103 * allow for three entire read-aheads... The system will
104 * adjust downwards rapidly if needed...
105 */
106 #define RA_MULTIPLE_FAST 2
107 #define RA_MULTIPLE_SLOW 3
108 #define RA_SHIFTDOWN 1 /* approx lg2(RA_MULTIPLE) */
109 /*
110 * This replaces bread. If this is a bread at the beginning of a file and
111 * lastr is 0, we assume this is the first read and we'll read up to two
112 * blocks if they are sequential. After that, we'll do regular read ahead
113 * in clustered chunks.
114 * bp is the block requested.
115 * rbp is the read-ahead block.
116 * If either is NULL, then you don't have to do the I/O.
117 */
118 int
119 cluster_read(vp, filesize, lblkno, size, cred, bpp)
120 struct vnode *vp;
121 u_quad_t filesize;
122 daddr_t lblkno;
123 long size;
124 struct ucred *cred;
125 struct buf **bpp;
126 {
127 struct buf *bp, *rbp;
128 daddr_t blkno, rablkno, origlblkno;
129 int error, num_ra, alreadyincore;
130 int i;
131 int seq;
132
133 error = 0;
134 /*
135 * get the requested block
136 */
137 origlblkno = lblkno;
138 *bpp = bp = getblk(vp, lblkno, size, 0, 0);
139
140 seq = ISSEQREAD(vp, lblkno);
141 /*
142 * if it is in the cache, then check to see if the reads have been
143 * sequential. If they have, then try some read-ahead, otherwise
144 * back-off on prospective read-aheads.
145 */
146 if (bp->b_flags & B_CACHE) {
147 if (!seq) {
148 vp->v_maxra = bp->b_lblkno + bp->b_bcount / size;
149 vp->v_ralen >>= RA_SHIFTDOWN;
150 return 0;
151 } else if( vp->v_maxra > lblkno) {
152 if ((vp->v_ralen + 1) < RA_MULTIPLE_FAST * (MAXPHYS / size))
153 ++vp->v_ralen;
154 if ( vp->v_maxra > lblkno + vp->v_ralen ) {
155 return 0;
156 }
157 lblkno = vp->v_maxra;
158 } else {
159 lblkno += 1;
160 }
161 bp = NULL;
162 } else {
163 /*
164 * if it isn't in the cache, then get a chunk from disk if
165 * sequential, otherwise just get the block.
166 */
167 bp->b_flags |= B_READ;
168 lblkno += 1;
169 curproc->p_stats->p_ru.ru_inblock++; /* XXX */
170 vp->v_ralen = 0;
171 }
172 /*
173 * assume no read-ahead
174 */
175 alreadyincore = 1;
176 rablkno = lblkno;
177
178 /*
179 * if we have been doing sequential I/O, then do some read-ahead
180 */
181 if (seq) {
182 alreadyincore = 0;
183
184 /*
185 * bump ralen a bit...
186 */
187 if ((vp->v_ralen + 1) < RA_MULTIPLE_SLOW*(MAXPHYS / size))
188 ++vp->v_ralen;
189 /*
190 * this code makes sure that the stuff that we have read-ahead
191 * is still in the cache. If it isn't, we have been reading
192 * ahead too much, and we need to back-off, otherwise we might
193 * try to read more.
194 */
195 for (i = 0; i < vp->v_maxra - lblkno; i++) {
196 rablkno = lblkno + i;
197 alreadyincore = (int) incore(vp, rablkno);
198 if (!alreadyincore) {
199 vp->v_maxra = rablkno;
200 vp->v_ralen >>= RA_SHIFTDOWN;
201 alreadyincore = 1;
202 }
203 }
204 }
205 /*
206 * we now build the read-ahead buffer if it is desirable.
207 */
208 rbp = NULL;
209 if (!alreadyincore &&
210 ((u_quad_t)(rablkno + 1) * size) <= filesize &&
211 !(error = VOP_BMAP(vp, rablkno, NULL, &blkno, &num_ra, NULL)) &&
212 blkno != -1) {
213 if (num_ra > vp->v_ralen)
214 num_ra = vp->v_ralen;
215
216 if (num_ra) {
217 rbp = cluster_rbuild(vp, filesize, rablkno, blkno, size,
218 num_ra + 1);
219 } else {
220 rbp = getblk(vp, rablkno, size, 0, 0);
221 rbp->b_flags |= B_READ | B_ASYNC;
222 rbp->b_blkno = blkno;
223 }
224 }
225
226 /*
227 * handle the synchronous read
228 */
229 if (bp) {
230 if (bp->b_flags & (B_DONE | B_DELWRI))
231 panic("cluster_read: DONE bp");
232 else {
233 vfs_busy_pages(bp, 0);
234 error = VOP_STRATEGY(bp);
235 vp->v_maxra = bp->b_lblkno + bp->b_bcount / size;
236 totreads++;
237 totreadblocks += bp->b_bcount / size;
238 curproc->p_stats->p_ru.ru_inblock++;
239 }
240 }
241 /*
242 * and if we have read-aheads, do them too
243 */
244 if (rbp) {
245 vp->v_maxra = rbp->b_lblkno + rbp->b_bcount / size;
246 if (error) {
247 rbp->b_flags &= ~(B_ASYNC | B_READ);
248 brelse(rbp);
249 } else if (rbp->b_flags & B_CACHE) {
250 rbp->b_flags &= ~(B_ASYNC | B_READ);
251 bqrelse(rbp);
252 } else {
253 if ((rbp->b_flags & B_CLUSTER) == 0)
254 vfs_busy_pages(rbp, 0);
255 (void) VOP_STRATEGY(rbp);
256 totreads++;
257 totreadblocks += rbp->b_bcount / size;
258 curproc->p_stats->p_ru.ru_inblock++;
259 }
260 }
261 if (bp && ((bp->b_flags & B_ASYNC) == 0))
262 return (biowait(bp));
263 return (error);
264 }
265
266 /*
267 * If blocks are contiguous on disk, use this to provide clustered
268 * read ahead. We will read as many blocks as possible sequentially
269 * and then parcel them up into logical blocks in the buffer hash table.
270 */
271 static struct buf *
272 cluster_rbuild(vp, filesize, lbn, blkno, size, run)
273 struct vnode *vp;
274 u_quad_t filesize;
275 daddr_t lbn;
276 daddr_t blkno;
277 long size;
278 int run;
279 {
280 struct buf *bp, *tbp;
281 daddr_t bn;
282 int i, inc, j;
283
284 #ifdef DIAGNOSTIC
285 if (size != vp->v_mount->mnt_stat.f_iosize)
286 panic("cluster_rbuild: size %d != filesize %d\n",
287 size, vp->v_mount->mnt_stat.f_iosize);
288 #endif
289 /*
290 * avoid a division
291 */
292 while ((u_quad_t) size * (lbn + run) > filesize) {
293 --run;
294 }
295
296 tbp = getblk(vp, lbn, size, 0, 0);
297 if (tbp->b_flags & B_CACHE)
298 return tbp;
299
300 tbp->b_blkno = blkno;
301 tbp->b_flags |= B_ASYNC | B_READ;
302 if( (tbp->b_flags & B_MALLOC) ||
303 ((tbp->b_flags & B_VMIO) == 0) || (run <= 1) )
304 return tbp;
305
306 bp = trypbuf();
307 if (bp == 0)
308 return tbp;
309
310 (vm_offset_t) bp->b_data |= ((vm_offset_t) tbp->b_data) & PAGE_MASK;
311 bp->b_flags = B_ASYNC | B_READ | B_CALL | B_BUSY | B_CLUSTER | B_VMIO;
312 bp->b_iodone = cluster_callback;
313 bp->b_blkno = blkno;
314 bp->b_lblkno = lbn;
315 pbgetvp(vp, bp);
316
317 TAILQ_INIT(&bp->b_cluster.cluster_head);
318
319 bp->b_bcount = 0;
320 bp->b_bufsize = 0;
321 bp->b_npages = 0;
322
323 inc = btodb(size);
324 for (bn = blkno, i = 0; i < run; ++i, bn += inc) {
325 if (i != 0) {
326 if ((bp->b_npages * PAGE_SIZE) +
327 round_page(size) > MAXPHYS)
328 break;
329
330 if (incore(vp, lbn + i))
331 break;
332
333 tbp = getblk(vp, lbn + i, size, 0, 0);
334
335 if ((tbp->b_flags & B_CACHE) ||
336 (tbp->b_flags & B_VMIO) == 0) {
337 bqrelse(tbp);
338 break;
339 }
340
341 for (j=0;j<tbp->b_npages;j++) {
342 if (tbp->b_pages[j]->valid) {
343 break;
344 }
345 }
346
347 if (j != tbp->b_npages) {
348 /*
349 * force buffer to be re-constituted later
350 */
351 tbp->b_flags |= B_RELBUF;
352 brelse(tbp);
353 break;
354 }
355
356 tbp->b_flags |= B_READ | B_ASYNC;
357 if (tbp->b_blkno == tbp->b_lblkno) {
358 tbp->b_blkno = bn;
359 } else if (tbp->b_blkno != bn) {
360 brelse(tbp);
361 break;
362 }
363 }
364 TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
365 tbp, b_cluster.cluster_entry);
366 for (j = 0; j < tbp->b_npages; j += 1) {
367 vm_page_t m;
368 m = tbp->b_pages[j];
369 ++m->busy;
370 ++m->object->paging_in_progress;
371 if ((bp->b_npages == 0) ||
372 (bp->b_pages[bp->b_npages-1] != m)) {
373 bp->b_pages[bp->b_npages] = m;
374 bp->b_npages++;
375 }
376 if ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL)
377 tbp->b_pages[j] = bogus_page;
378 }
379 bp->b_bcount += tbp->b_bcount;
380 bp->b_bufsize += tbp->b_bufsize;
381 }
382
383 for(j=0;j<bp->b_npages;j++) {
384 if ((bp->b_pages[j]->valid & VM_PAGE_BITS_ALL) ==
385 VM_PAGE_BITS_ALL)
386 bp->b_pages[j] = bogus_page;
387 }
388 if (bp->b_bufsize > bp->b_kvasize)
389 panic("cluster_rbuild: b_bufsize(%d) > b_kvasize(%d)\n",
390 bp->b_bufsize, bp->b_kvasize);
391 bp->b_kvasize = bp->b_bufsize;
392
393 pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
394 (vm_page_t *)bp->b_pages, bp->b_npages);
395 return (bp);
396 }
397
398 /*
399 * Cleanup after a clustered read or write.
400 * This is complicated by the fact that any of the buffers might have
401 * extra memory (if there were no empty buffer headers at allocbuf time)
402 * that we will need to shift around.
403 */
404 void
405 cluster_callback(bp)
406 struct buf *bp;
407 {
408 struct buf *nbp, *tbp;
409 int error = 0;
410
411 /*
412 * Must propogate errors to all the components.
413 */
414 if (bp->b_flags & B_ERROR)
415 error = bp->b_error;
416
417 pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
418 /*
419 * Move memory from the large cluster buffer into the component
420 * buffers and mark IO as done on these.
421 */
422 for (tbp = bp->b_cluster.cluster_head.tqh_first;
423 tbp; tbp = nbp) {
424 nbp = tbp->b_cluster.cluster_entry.tqe_next;
425 if (error) {
426 tbp->b_flags |= B_ERROR;
427 tbp->b_error = error;
428 } else
429 tbp->b_dirtyoff = tbp->b_dirtyend = 0;
430 biodone(tbp);
431 }
432 relpbuf(bp);
433 }
434
435 /*
436 * Do clustered write for FFS.
437 *
438 * Three cases:
439 * 1. Write is not sequential (write asynchronously)
440 * Write is sequential:
441 * 2. beginning of cluster - begin cluster
442 * 3. middle of a cluster - add to cluster
443 * 4. end of a cluster - asynchronously write cluster
444 */
445 void
446 cluster_write(bp, filesize)
447 struct buf *bp;
448 u_quad_t filesize;
449 {
450 struct vnode *vp;
451 daddr_t lbn;
452 int maxclen, cursize;
453 int lblocksize;
454 int async;
455
456 vp = bp->b_vp;
457 async = (vp->v_mount && (vp->v_mount->mnt_flag & MNT_ASYNC));
458 lblocksize = vp->v_mount->mnt_stat.f_iosize;
459 lbn = bp->b_lblkno;
460
461 /* Initialize vnode to beginning of file. */
462 if (lbn == 0)
463 vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
464
465 if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 ||
466 (bp->b_blkno != vp->v_lasta + btodb(lblocksize))) {
467 maxclen = MAXPHYS / lblocksize - 1;
468 if (vp->v_clen != 0) {
469 /*
470 * Next block is not sequential.
471 *
472 * If we are not writing at end of file, the process
473 * seeked to another point in the file since its last
474 * write, or we have reached our maximum cluster size,
475 * then push the previous cluster. Otherwise try
476 * reallocating to make it sequential.
477 */
478 cursize = vp->v_lastw - vp->v_cstart + 1;
479 #ifndef notyet_block_reallocation_enabled
480 if (((u_quad_t)(lbn + 1) * lblocksize) != filesize ||
481 lbn != vp->v_lastw + 1 ||
482 vp->v_clen <= cursize) {
483 if (!async)
484 cluster_wbuild(vp, lblocksize,
485 vp->v_cstart, cursize);
486 }
487 #else
488 if (!doreallocblks ||
489 (lbn + 1) * lblocksize != filesize ||
490 lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) {
491 if (!async)
492 cluster_wbuild(vp, lblocksize,
493 vp->v_cstart, cursize);
494 } else {
495 struct buf **bpp, **endbp;
496 struct cluster_save *buflist;
497
498 buflist = cluster_collectbufs(vp, bp);
499 endbp = &buflist->bs_children
500 [buflist->bs_nchildren - 1];
501 if (VOP_REALLOCBLKS(vp, buflist)) {
502 /*
503 * Failed, push the previous cluster.
504 */
505 for (bpp = buflist->bs_children;
506 bpp < endbp; bpp++)
507 brelse(*bpp);
508 free(buflist, M_SEGMENT);
509 cluster_wbuild(vp, lblocksize,
510 vp->v_cstart, cursize);
511 } else {
512 /*
513 * Succeeded, keep building cluster.
514 */
515 for (bpp = buflist->bs_children;
516 bpp <= endbp; bpp++)
517 bdwrite(*bpp);
518 free(buflist, M_SEGMENT);
519 vp->v_lastw = lbn;
520 vp->v_lasta = bp->b_blkno;
521 return;
522 }
523 }
524 #endif /* notyet_block_reallocation_enabled */
525 }
526 /*
527 * Consider beginning a cluster. If at end of file, make
528 * cluster as large as possible, otherwise find size of
529 * existing cluster.
530 */
531 if (((u_quad_t) (lbn + 1) * lblocksize) != filesize &&
532 (bp->b_blkno == bp->b_lblkno) &&
533 (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen, NULL) ||
534 bp->b_blkno == -1)) {
535 bawrite(bp);
536 vp->v_clen = 0;
537 vp->v_lasta = bp->b_blkno;
538 vp->v_cstart = lbn + 1;
539 vp->v_lastw = lbn;
540 return;
541 }
542 vp->v_clen = maxclen;
543 if (!async && maxclen == 0) { /* I/O not contiguous */
544 vp->v_cstart = lbn + 1;
545 bawrite(bp);
546 } else { /* Wait for rest of cluster */
547 vp->v_cstart = lbn;
548 bdwrite(bp);
549 }
550 } else if (lbn == vp->v_cstart + vp->v_clen) {
551 /*
552 * At end of cluster, write it out.
553 */
554 bdwrite(bp);
555 cluster_wbuild(vp, lblocksize, vp->v_cstart, vp->v_clen + 1);
556 vp->v_clen = 0;
557 vp->v_cstart = lbn + 1;
558 } else
559 /*
560 * In the middle of a cluster, so just delay the I/O for now.
561 */
562 bdwrite(bp);
563 vp->v_lastw = lbn;
564 vp->v_lasta = bp->b_blkno;
565 }
566
567
568 /*
569 * This is an awful lot like cluster_rbuild...wish they could be combined.
570 * The last lbn argument is the current block on which I/O is being
571 * performed. Check to see that it doesn't fall in the middle of
572 * the current block (if last_bp == NULL).
573 */
574 int
575 cluster_wbuild(vp, size, start_lbn, len)
576 struct vnode *vp;
577 long size;
578 daddr_t start_lbn;
579 int len;
580 {
581 struct buf *bp, *tbp;
582 int i, j, s;
583 int totalwritten = 0;
584 int dbsize = btodb(size);
585 while (len > 0) {
586 s = splbio();
587 if ( ((tbp = gbincore(vp, start_lbn)) == NULL) ||
588 ((tbp->b_flags & (B_INVAL|B_BUSY|B_DELWRI)) != B_DELWRI)) {
589 ++start_lbn;
590 --len;
591 splx(s);
592 continue;
593 }
594 bremfree(tbp);
595 tbp->b_flags |= B_BUSY;
596 tbp->b_flags &= ~B_DONE;
597 splx(s);
598
599 /*
600 * Extra memory in the buffer, punt on this buffer. XXX we could
601 * handle this in most cases, but we would have to push the extra
602 * memory down to after our max possible cluster size and then
603 * potentially pull it back up if the cluster was terminated
604 * prematurely--too much hassle.
605 */
606 if (((tbp->b_flags & (B_CLUSTEROK|B_MALLOC)) != B_CLUSTEROK) ||
607 (tbp->b_bcount != tbp->b_bufsize) ||
608 (tbp->b_bcount != size) ||
609 len == 1) {
610 totalwritten += tbp->b_bufsize;
611 bawrite(tbp);
612 ++start_lbn;
613 --len;
614 continue;
615 }
616
617 bp = trypbuf();
618 if (bp == NULL) {
619 totalwritten += tbp->b_bufsize;
620 bawrite(tbp);
621 ++start_lbn;
622 --len;
623 continue;
624 }
625
626 TAILQ_INIT(&bp->b_cluster.cluster_head);
627 bp->b_bcount = 0;
628 bp->b_bufsize = 0;
629 bp->b_npages = 0;
630 if (tbp->b_wcred != NOCRED) {
631 bp->b_wcred = tbp->b_wcred;
632 crhold(bp->b_wcred);
633 }
634
635 bp->b_blkno = tbp->b_blkno;
636 bp->b_lblkno = tbp->b_lblkno;
637 (vm_offset_t) bp->b_data |= ((vm_offset_t) tbp->b_data) & PAGE_MASK;
638 bp->b_flags |= B_CALL | B_BUSY | B_CLUSTER | (tbp->b_flags & (B_VMIO|B_NEEDCOMMIT));
639 bp->b_iodone = cluster_callback;
640 pbgetvp(vp, bp);
641
642 for (i = 0; i < len; ++i, ++start_lbn) {
643 if (i != 0) {
644 s = splbio();
645 if ((tbp = gbincore(vp, start_lbn)) == NULL) {
646 splx(s);
647 break;
648 }
649
650 if ((tbp->b_flags & (B_VMIO|B_CLUSTEROK|B_INVAL|B_BUSY|B_DELWRI|B_NEEDCOMMIT)) != (B_DELWRI|B_CLUSTEROK|(bp->b_flags & (B_VMIO|B_NEEDCOMMIT)))) {
651 splx(s);
652 break;
653 }
654
655 if (tbp->b_wcred != bp->b_wcred) {
656 splx(s);
657 break;
658 }
659
660 if ((tbp->b_bcount != size) ||
661 ((bp->b_blkno + dbsize * i) != tbp->b_blkno) ||
662 ((tbp->b_npages + bp->b_npages) > (MAXPHYS / PAGE_SIZE))) {
663 splx(s);
664 break;
665 }
666 bremfree(tbp);
667 tbp->b_flags |= B_BUSY;
668 tbp->b_flags &= ~B_DONE;
669 splx(s);
670 }
671 if (tbp->b_flags & B_VMIO) {
672 for (j = 0; j < tbp->b_npages; j += 1) {
673 vm_page_t m;
674 m = tbp->b_pages[j];
675 ++m->busy;
676 ++m->object->paging_in_progress;
677 if ((bp->b_npages == 0) ||
678 (bp->b_pages[bp->b_npages - 1] != m)) {
679 bp->b_pages[bp->b_npages] = m;
680 bp->b_npages++;
681 }
682 }
683 }
684 bp->b_bcount += size;
685 bp->b_bufsize += size;
686
687 tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
688 tbp->b_flags |= B_ASYNC;
689 s = splbio();
690 reassignbuf(tbp, tbp->b_vp); /* put on clean list */
691 ++tbp->b_vp->v_numoutput;
692 splx(s);
693 TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
694 tbp, b_cluster.cluster_entry);
695 }
696 pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
697 (vm_page_t *) bp->b_pages, bp->b_npages);
698 if (bp->b_bufsize > bp->b_kvasize)
699 panic("cluster_wbuild: b_bufsize(%d) > b_kvasize(%d)\n",
700 bp->b_bufsize, bp->b_kvasize);
701 bp->b_kvasize = bp->b_bufsize;
702 totalwritten += bp->b_bufsize;
703 bp->b_dirtyoff = 0;
704 bp->b_dirtyend = bp->b_bufsize;
705 bawrite(bp);
706
707 len -= i;
708 }
709 return totalwritten;
710 }
711
712 #ifdef notyet_block_reallocation_enabled
713 /*
714 * Collect together all the buffers in a cluster.
715 * Plus add one additional buffer.
716 */
717 static struct cluster_save *
718 cluster_collectbufs(vp, last_bp)
719 struct vnode *vp;
720 struct buf *last_bp;
721 {
722 struct cluster_save *buflist;
723 daddr_t lbn;
724 int i, len;
725
726 len = vp->v_lastw - vp->v_cstart + 1;
727 buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist),
728 M_SEGMENT, M_WAITOK);
729 buflist->bs_nchildren = 0;
730 buflist->bs_children = (struct buf **) (buflist + 1);
731 for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++)
732 (void) bread(vp, lbn, last_bp->b_bcount, NOCRED,
733 &buflist->bs_children[i]);
734 buflist->bs_children[i] = last_bp;
735 buflist->bs_nchildren = i + 1;
736 return (buflist);
737 }
738 #endif /* notyet_block_reallocation_enabled */
Cache object: 6361081e2e9aba0fbe550f56911b161c
|