1 /*
2 * Copyright (c) 2002 Networks Associates Technology, Inc.
3 * All rights reserved.
4 *
5 * This software was developed for the FreeBSD Project by Marshall
6 * Kirk McKusick and Network Associates Laboratories, the Security
7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9 * research program
10 *
11 * Copyright (c) 1982, 1986, 1989, 1993
12 * The Regents of the University of California. All rights reserved.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 * must display the following acknowledgement:
24 * This product includes software developed by the University of
25 * California, Berkeley and its contributors.
26 * 4. Neither the name of the University nor the names of its contributors
27 * may be used to endorse or promote products derived from this software
28 * without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * SUCH DAMAGE.
41 *
42 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
43 */
44
45 #include <sys/cdefs.h>
46 __FBSDID("$FreeBSD: releng/5.2/sys/ufs/ffs/ffs_balloc.c 118969 2003-08-15 20:03:19Z phk $");
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/bio.h>
51 #include <sys/buf.h>
52 #include <sys/lock.h>
53 #include <sys/mount.h>
54 #include <sys/vnode.h>
55
56 #include <ufs/ufs/quota.h>
57 #include <ufs/ufs/inode.h>
58 #include <ufs/ufs/ufs_extern.h>
59 #include <ufs/ufs/extattr.h>
60 #include <ufs/ufs/ufsmount.h>
61
62 #include <ufs/ffs/fs.h>
63 #include <ufs/ffs/ffs_extern.h>
64
65 /*
66 * Balloc defines the structure of filesystem storage
67 * by allocating the physical blocks on a device given
68 * the inode and the logical block number in a file.
69 * This is the allocation strategy for UFS1. Below is
70 * the allocation strategy for UFS2.
71 */
72 int
73 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
74 struct ucred *cred, int flags, struct buf **bpp)
75 {
76 struct inode *ip;
77 struct ufs1_dinode *dp;
78 ufs_lbn_t lbn, lastlbn;
79 struct fs *fs;
80 ufs1_daddr_t nb;
81 struct buf *bp, *nbp;
82 struct indir indirs[NIADDR + 2];
83 int deallocated, osize, nsize, num, i, error;
84 ufs2_daddr_t newb;
85 ufs1_daddr_t *bap, pref;
86 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
87 int unwindidx = -1;
88 struct thread *td = curthread; /* XXX */
89
90 ip = VTOI(vp);
91 dp = ip->i_din1;
92 fs = ip->i_fs;
93 lbn = lblkno(fs, startoffset);
94 size = blkoff(fs, startoffset) + size;
95 if (size > fs->fs_bsize)
96 panic("ffs_balloc_ufs1: blk too big");
97 *bpp = NULL;
98 if (flags & IO_EXT)
99 return (EOPNOTSUPP);
100 if (lbn < 0)
101 return (EFBIG);
102
103 /*
104 * If the next write will extend the file into a new block,
105 * and the file is currently composed of a fragment
106 * this fragment has to be extended to be a full block.
107 */
108 lastlbn = lblkno(fs, ip->i_size);
109 if (lastlbn < NDADDR && lastlbn < lbn) {
110 nb = lastlbn;
111 osize = blksize(fs, ip, nb);
112 if (osize < fs->fs_bsize && osize > 0) {
113 error = ffs_realloccg(ip, nb, dp->di_db[nb],
114 ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
115 &dp->di_db[0]), osize, (int)fs->fs_bsize, cred, &bp);
116 if (error)
117 return (error);
118 if (DOINGSOFTDEP(vp))
119 softdep_setup_allocdirect(ip, nb,
120 dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
121 fs->fs_bsize, osize, bp);
122 ip->i_size = smalllblktosize(fs, nb + 1);
123 dp->di_size = ip->i_size;
124 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
125 ip->i_flag |= IN_CHANGE | IN_UPDATE;
126 if (flags & IO_SYNC)
127 bwrite(bp);
128 else
129 bawrite(bp);
130 }
131 }
132 /*
133 * The first NDADDR blocks are direct blocks
134 */
135 if (lbn < NDADDR) {
136 if (flags & BA_METAONLY)
137 panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
138 nb = dp->di_db[lbn];
139 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
140 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
141 if (error) {
142 brelse(bp);
143 return (error);
144 }
145 bp->b_blkno = fsbtodb(fs, nb);
146 *bpp = bp;
147 return (0);
148 }
149 if (nb != 0) {
150 /*
151 * Consider need to reallocate a fragment.
152 */
153 osize = fragroundup(fs, blkoff(fs, ip->i_size));
154 nsize = fragroundup(fs, size);
155 if (nsize <= osize) {
156 error = bread(vp, lbn, osize, NOCRED, &bp);
157 if (error) {
158 brelse(bp);
159 return (error);
160 }
161 bp->b_blkno = fsbtodb(fs, nb);
162 } else {
163 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
164 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
165 &dp->di_db[0]), osize, nsize, cred, &bp);
166 if (error)
167 return (error);
168 if (DOINGSOFTDEP(vp))
169 softdep_setup_allocdirect(ip, lbn,
170 dbtofsb(fs, bp->b_blkno), nb,
171 nsize, osize, bp);
172 }
173 } else {
174 if (ip->i_size < smalllblktosize(fs, lbn + 1))
175 nsize = fragroundup(fs, size);
176 else
177 nsize = fs->fs_bsize;
178 error = ffs_alloc(ip, lbn,
179 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
180 nsize, cred, &newb);
181 if (error)
182 return (error);
183 bp = getblk(vp, lbn, nsize, 0, 0, 0);
184 bp->b_blkno = fsbtodb(fs, newb);
185 if (flags & BA_CLRBUF)
186 vfs_bio_clrbuf(bp);
187 if (DOINGSOFTDEP(vp))
188 softdep_setup_allocdirect(ip, lbn, newb, 0,
189 nsize, 0, bp);
190 }
191 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
192 ip->i_flag |= IN_CHANGE | IN_UPDATE;
193 *bpp = bp;
194 return (0);
195 }
196 /*
197 * Determine the number of levels of indirection.
198 */
199 pref = 0;
200 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
201 return(error);
202 #ifdef DIAGNOSTIC
203 if (num < 1)
204 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
205 #endif
206 /*
207 * Fetch the first indirect block allocating if necessary.
208 */
209 --num;
210 nb = dp->di_ib[indirs[0].in_off];
211 allocib = NULL;
212 allocblk = allociblk;
213 if (nb == 0) {
214 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
215 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
216 cred, &newb)) != 0)
217 return (error);
218 nb = newb;
219 *allocblk++ = nb;
220 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
221 bp->b_blkno = fsbtodb(fs, nb);
222 vfs_bio_clrbuf(bp);
223 if (DOINGSOFTDEP(vp)) {
224 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
225 newb, 0, fs->fs_bsize, 0, bp);
226 bdwrite(bp);
227 } else {
228 /*
229 * Write synchronously so that indirect blocks
230 * never point at garbage.
231 */
232 if (DOINGASYNC(vp))
233 bdwrite(bp);
234 else if ((error = bwrite(bp)) != 0)
235 goto fail;
236 }
237 allocib = &dp->di_ib[indirs[0].in_off];
238 *allocib = nb;
239 ip->i_flag |= IN_CHANGE | IN_UPDATE;
240 }
241 /*
242 * Fetch through the indirect blocks, allocating as necessary.
243 */
244 for (i = 1;;) {
245 error = bread(vp,
246 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
247 if (error) {
248 brelse(bp);
249 goto fail;
250 }
251 bap = (ufs1_daddr_t *)bp->b_data;
252 nb = bap[indirs[i].in_off];
253 if (i == num)
254 break;
255 i += 1;
256 if (nb != 0) {
257 bqrelse(bp);
258 continue;
259 }
260 if (pref == 0)
261 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
262 if ((error =
263 ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
264 brelse(bp);
265 goto fail;
266 }
267 nb = newb;
268 *allocblk++ = nb;
269 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
270 nbp->b_blkno = fsbtodb(fs, nb);
271 vfs_bio_clrbuf(nbp);
272 if (DOINGSOFTDEP(vp)) {
273 softdep_setup_allocindir_meta(nbp, ip, bp,
274 indirs[i - 1].in_off, nb);
275 bdwrite(nbp);
276 } else {
277 /*
278 * Write synchronously so that indirect blocks
279 * never point at garbage.
280 */
281 if ((error = bwrite(nbp)) != 0) {
282 brelse(bp);
283 goto fail;
284 }
285 }
286 bap[indirs[i - 1].in_off] = nb;
287 if (allocib == NULL && unwindidx < 0)
288 unwindidx = i - 1;
289 /*
290 * If required, write synchronously, otherwise use
291 * delayed write.
292 */
293 if (flags & IO_SYNC) {
294 bwrite(bp);
295 } else {
296 if (bp->b_bufsize == fs->fs_bsize)
297 bp->b_flags |= B_CLUSTEROK;
298 bdwrite(bp);
299 }
300 }
301 /*
302 * If asked only for the indirect block, then return it.
303 */
304 if (flags & BA_METAONLY) {
305 *bpp = bp;
306 return (0);
307 }
308 /*
309 * Get the data block, allocating if necessary.
310 */
311 if (nb == 0) {
312 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]);
313 error = ffs_alloc(ip,
314 lbn, pref, (int)fs->fs_bsize, cred, &newb);
315 if (error) {
316 brelse(bp);
317 goto fail;
318 }
319 nb = newb;
320 *allocblk++ = nb;
321 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
322 nbp->b_blkno = fsbtodb(fs, nb);
323 if (flags & BA_CLRBUF)
324 vfs_bio_clrbuf(nbp);
325 if (DOINGSOFTDEP(vp))
326 softdep_setup_allocindir_page(ip, lbn, bp,
327 indirs[i].in_off, nb, 0, nbp);
328 bap[indirs[i].in_off] = nb;
329 /*
330 * If required, write synchronously, otherwise use
331 * delayed write.
332 */
333 if (flags & IO_SYNC) {
334 bwrite(bp);
335 } else {
336 if (bp->b_bufsize == fs->fs_bsize)
337 bp->b_flags |= B_CLUSTEROK;
338 bdwrite(bp);
339 }
340 *bpp = nbp;
341 return (0);
342 }
343 brelse(bp);
344 if (flags & BA_CLRBUF) {
345 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
346 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
347 error = cluster_read(vp, ip->i_size, lbn,
348 (int)fs->fs_bsize, NOCRED,
349 MAXBSIZE, seqcount, &nbp);
350 } else {
351 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
352 }
353 if (error) {
354 brelse(nbp);
355 goto fail;
356 }
357 } else {
358 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
359 nbp->b_blkno = fsbtodb(fs, nb);
360 }
361 *bpp = nbp;
362 return (0);
363 fail:
364 /*
365 * If we have failed to allocate any blocks, simply return the error.
366 * This is the usual case and avoids the need to fsync the file.
367 */
368 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
369 return (error);
370 /*
371 * If we have failed part way through block allocation, we
372 * have to deallocate any indirect blocks that we have allocated.
373 * We have to fsync the file before we start to get rid of all
374 * of its dependencies so that we do not leave them dangling.
375 * We have to sync it at the end so that the soft updates code
376 * does not find any untracked changes. Although this is really
377 * slow, running out of disk space is not expected to be a common
378 * occurence. The error return from fsync is ignored as we already
379 * have an error to return to the user.
380 */
381 (void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
382 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
383 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
384 deallocated += fs->fs_bsize;
385 }
386 if (allocib != NULL) {
387 *allocib = 0;
388 } else if (unwindidx >= 0) {
389 int r;
390
391 r = bread(vp, indirs[unwindidx].in_lbn,
392 (int)fs->fs_bsize, NOCRED, &bp);
393 if (r) {
394 panic("Could not unwind indirect block, error %d", r);
395 brelse(bp);
396 } else {
397 bap = (ufs1_daddr_t *)bp->b_data;
398 bap[indirs[unwindidx].in_off] = 0;
399 if (flags & IO_SYNC) {
400 bwrite(bp);
401 } else {
402 if (bp->b_bufsize == fs->fs_bsize)
403 bp->b_flags |= B_CLUSTEROK;
404 bdwrite(bp);
405 }
406 }
407 }
408 if (deallocated) {
409 #ifdef QUOTA
410 /*
411 * Restore user's disk quota because allocation failed.
412 */
413 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
414 #endif
415 dp->di_blocks -= btodb(deallocated);
416 ip->i_flag |= IN_CHANGE | IN_UPDATE;
417 }
418 (void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
419 return (error);
420 }
421
422 /*
423 * Balloc defines the structure of file system storage
424 * by allocating the physical blocks on a device given
425 * the inode and the logical block number in a file.
426 * This is the allocation strategy for UFS2. Above is
427 * the allocation strategy for UFS1.
428 */
429 int
430 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
431 struct ucred *cred, int flags, struct buf **bpp)
432 {
433 struct inode *ip;
434 struct ufs2_dinode *dp;
435 ufs_lbn_t lbn, lastlbn;
436 struct fs *fs;
437 struct buf *bp, *nbp;
438 struct indir indirs[NIADDR + 2];
439 ufs2_daddr_t nb, newb, *bap, pref;
440 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
441 int deallocated, osize, nsize, num, i, error;
442 int unwindidx = -1;
443 struct thread *td = curthread; /* XXX */
444
445 ip = VTOI(vp);
446 dp = ip->i_din2;
447 fs = ip->i_fs;
448 lbn = lblkno(fs, startoffset);
449 size = blkoff(fs, startoffset) + size;
450 if (size > fs->fs_bsize)
451 panic("ffs_balloc_ufs2: blk too big");
452 *bpp = NULL;
453 if (lbn < 0)
454 return (EFBIG);
455
456 /*
457 * Check for allocating external data.
458 */
459 if (flags & IO_EXT) {
460 if (lbn >= NXADDR)
461 return (EFBIG);
462 /*
463 * If the next write will extend the data into a new block,
464 * and the data is currently composed of a fragment
465 * this fragment has to be extended to be a full block.
466 */
467 lastlbn = lblkno(fs, dp->di_extsize);
468 if (lastlbn < lbn) {
469 nb = lastlbn;
470 osize = sblksize(fs, dp->di_extsize, nb);
471 if (osize < fs->fs_bsize && osize > 0) {
472 error = ffs_realloccg(ip, -1 - nb,
473 dp->di_extb[nb],
474 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
475 &dp->di_extb[0]), osize,
476 (int)fs->fs_bsize, cred, &bp);
477 if (error)
478 return (error);
479 if (DOINGSOFTDEP(vp))
480 softdep_setup_allocext(ip, nb,
481 dbtofsb(fs, bp->b_blkno),
482 dp->di_extb[nb],
483 fs->fs_bsize, osize, bp);
484 dp->di_extsize = smalllblktosize(fs, nb + 1);
485 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
486 bp->b_xflags |= BX_ALTDATA;
487 ip->i_flag |= IN_CHANGE | IN_UPDATE;
488 if (flags & IO_SYNC)
489 bwrite(bp);
490 else
491 bawrite(bp);
492 }
493 }
494 /*
495 * All blocks are direct blocks
496 */
497 if (flags & BA_METAONLY)
498 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
499 nb = dp->di_extb[lbn];
500 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
501 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
502 if (error) {
503 brelse(bp);
504 return (error);
505 }
506 bp->b_blkno = fsbtodb(fs, nb);
507 bp->b_xflags |= BX_ALTDATA;
508 *bpp = bp;
509 return (0);
510 }
511 if (nb != 0) {
512 /*
513 * Consider need to reallocate a fragment.
514 */
515 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
516 nsize = fragroundup(fs, size);
517 if (nsize <= osize) {
518 error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
519 if (error) {
520 brelse(bp);
521 return (error);
522 }
523 bp->b_blkno = fsbtodb(fs, nb);
524 bp->b_xflags |= BX_ALTDATA;
525 } else {
526 error = ffs_realloccg(ip, -1 - lbn,
527 dp->di_extb[lbn],
528 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
529 &dp->di_extb[0]), osize, nsize, cred, &bp);
530 if (error)
531 return (error);
532 bp->b_xflags |= BX_ALTDATA;
533 if (DOINGSOFTDEP(vp))
534 softdep_setup_allocext(ip, lbn,
535 dbtofsb(fs, bp->b_blkno), nb,
536 nsize, osize, bp);
537 }
538 } else {
539 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
540 nsize = fragroundup(fs, size);
541 else
542 nsize = fs->fs_bsize;
543 error = ffs_alloc(ip, lbn,
544 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
545 nsize, cred, &newb);
546 if (error)
547 return (error);
548 bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0);
549 bp->b_blkno = fsbtodb(fs, newb);
550 bp->b_xflags |= BX_ALTDATA;
551 if (flags & BA_CLRBUF)
552 vfs_bio_clrbuf(bp);
553 if (DOINGSOFTDEP(vp))
554 softdep_setup_allocext(ip, lbn, newb, 0,
555 nsize, 0, bp);
556 }
557 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
558 ip->i_flag |= IN_CHANGE | IN_UPDATE;
559 *bpp = bp;
560 return (0);
561 }
562 /*
563 * If the next write will extend the file into a new block,
564 * and the file is currently composed of a fragment
565 * this fragment has to be extended to be a full block.
566 */
567 lastlbn = lblkno(fs, ip->i_size);
568 if (lastlbn < NDADDR && lastlbn < lbn) {
569 nb = lastlbn;
570 osize = blksize(fs, ip, nb);
571 if (osize < fs->fs_bsize && osize > 0) {
572 error = ffs_realloccg(ip, nb, dp->di_db[nb],
573 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
574 &dp->di_db[0]), osize, (int)fs->fs_bsize,
575 cred, &bp);
576 if (error)
577 return (error);
578 if (DOINGSOFTDEP(vp))
579 softdep_setup_allocdirect(ip, nb,
580 dbtofsb(fs, bp->b_blkno),
581 dp->di_db[nb],
582 fs->fs_bsize, osize, bp);
583 ip->i_size = smalllblktosize(fs, nb + 1);
584 dp->di_size = ip->i_size;
585 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
586 ip->i_flag |= IN_CHANGE | IN_UPDATE;
587 if (flags & IO_SYNC)
588 bwrite(bp);
589 else
590 bawrite(bp);
591 }
592 }
593 /*
594 * The first NDADDR blocks are direct blocks
595 */
596 if (lbn < NDADDR) {
597 if (flags & BA_METAONLY)
598 panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
599 nb = dp->di_db[lbn];
600 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
601 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
602 if (error) {
603 brelse(bp);
604 return (error);
605 }
606 bp->b_blkno = fsbtodb(fs, nb);
607 *bpp = bp;
608 return (0);
609 }
610 if (nb != 0) {
611 /*
612 * Consider need to reallocate a fragment.
613 */
614 osize = fragroundup(fs, blkoff(fs, ip->i_size));
615 nsize = fragroundup(fs, size);
616 if (nsize <= osize) {
617 error = bread(vp, lbn, osize, NOCRED, &bp);
618 if (error) {
619 brelse(bp);
620 return (error);
621 }
622 bp->b_blkno = fsbtodb(fs, nb);
623 } else {
624 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
625 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
626 &dp->di_db[0]), osize, nsize, cred, &bp);
627 if (error)
628 return (error);
629 if (DOINGSOFTDEP(vp))
630 softdep_setup_allocdirect(ip, lbn,
631 dbtofsb(fs, bp->b_blkno), nb,
632 nsize, osize, bp);
633 }
634 } else {
635 if (ip->i_size < smalllblktosize(fs, lbn + 1))
636 nsize = fragroundup(fs, size);
637 else
638 nsize = fs->fs_bsize;
639 error = ffs_alloc(ip, lbn,
640 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
641 &dp->di_db[0]), nsize, cred, &newb);
642 if (error)
643 return (error);
644 bp = getblk(vp, lbn, nsize, 0, 0, 0);
645 bp->b_blkno = fsbtodb(fs, newb);
646 if (flags & BA_CLRBUF)
647 vfs_bio_clrbuf(bp);
648 if (DOINGSOFTDEP(vp))
649 softdep_setup_allocdirect(ip, lbn, newb, 0,
650 nsize, 0, bp);
651 }
652 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
653 ip->i_flag |= IN_CHANGE | IN_UPDATE;
654 *bpp = bp;
655 return (0);
656 }
657 /*
658 * Determine the number of levels of indirection.
659 */
660 pref = 0;
661 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
662 return(error);
663 #ifdef DIAGNOSTIC
664 if (num < 1)
665 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
666 #endif
667 /*
668 * Fetch the first indirect block allocating if necessary.
669 */
670 --num;
671 nb = dp->di_ib[indirs[0].in_off];
672 allocib = NULL;
673 allocblk = allociblk;
674 if (nb == 0) {
675 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
676 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
677 cred, &newb)) != 0)
678 return (error);
679 nb = newb;
680 *allocblk++ = nb;
681 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
682 bp->b_blkno = fsbtodb(fs, nb);
683 vfs_bio_clrbuf(bp);
684 if (DOINGSOFTDEP(vp)) {
685 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
686 newb, 0, fs->fs_bsize, 0, bp);
687 bdwrite(bp);
688 } else {
689 /*
690 * Write synchronously so that indirect blocks
691 * never point at garbage.
692 */
693 if (DOINGASYNC(vp))
694 bdwrite(bp);
695 else if ((error = bwrite(bp)) != 0)
696 goto fail;
697 }
698 allocib = &dp->di_ib[indirs[0].in_off];
699 *allocib = nb;
700 ip->i_flag |= IN_CHANGE | IN_UPDATE;
701 }
702 /*
703 * Fetch through the indirect blocks, allocating as necessary.
704 */
705 for (i = 1;;) {
706 error = bread(vp,
707 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
708 if (error) {
709 brelse(bp);
710 goto fail;
711 }
712 bap = (ufs2_daddr_t *)bp->b_data;
713 nb = bap[indirs[i].in_off];
714 if (i == num)
715 break;
716 i += 1;
717 if (nb != 0) {
718 bqrelse(bp);
719 continue;
720 }
721 if (pref == 0)
722 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
723 if ((error =
724 ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
725 brelse(bp);
726 goto fail;
727 }
728 nb = newb;
729 *allocblk++ = nb;
730 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
731 nbp->b_blkno = fsbtodb(fs, nb);
732 vfs_bio_clrbuf(nbp);
733 if (DOINGSOFTDEP(vp)) {
734 softdep_setup_allocindir_meta(nbp, ip, bp,
735 indirs[i - 1].in_off, nb);
736 bdwrite(nbp);
737 } else {
738 /*
739 * Write synchronously so that indirect blocks
740 * never point at garbage.
741 */
742 if ((error = bwrite(nbp)) != 0) {
743 brelse(bp);
744 goto fail;
745 }
746 }
747 bap[indirs[i - 1].in_off] = nb;
748 if (allocib == NULL && unwindidx < 0)
749 unwindidx = i - 1;
750 /*
751 * If required, write synchronously, otherwise use
752 * delayed write.
753 */
754 if (flags & IO_SYNC) {
755 bwrite(bp);
756 } else {
757 if (bp->b_bufsize == fs->fs_bsize)
758 bp->b_flags |= B_CLUSTEROK;
759 bdwrite(bp);
760 }
761 }
762 /*
763 * If asked only for the indirect block, then return it.
764 */
765 if (flags & BA_METAONLY) {
766 *bpp = bp;
767 return (0);
768 }
769 /*
770 * Get the data block, allocating if necessary.
771 */
772 if (nb == 0) {
773 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]);
774 error = ffs_alloc(ip,
775 lbn, pref, (int)fs->fs_bsize, cred, &newb);
776 if (error) {
777 brelse(bp);
778 goto fail;
779 }
780 nb = newb;
781 *allocblk++ = nb;
782 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
783 nbp->b_blkno = fsbtodb(fs, nb);
784 if (flags & BA_CLRBUF)
785 vfs_bio_clrbuf(nbp);
786 if (DOINGSOFTDEP(vp))
787 softdep_setup_allocindir_page(ip, lbn, bp,
788 indirs[i].in_off, nb, 0, nbp);
789 bap[indirs[i].in_off] = nb;
790 /*
791 * If required, write synchronously, otherwise use
792 * delayed write.
793 */
794 if (flags & IO_SYNC) {
795 bwrite(bp);
796 } else {
797 if (bp->b_bufsize == fs->fs_bsize)
798 bp->b_flags |= B_CLUSTEROK;
799 bdwrite(bp);
800 }
801 *bpp = nbp;
802 return (0);
803 }
804 brelse(bp);
805 /*
806 * If requested clear invalid portions of the buffer. If we
807 * have to do a read-before-write (typical if BA_CLRBUF is set),
808 * try to do some read-ahead in the sequential case to reduce
809 * the number of I/O transactions.
810 */
811 if (flags & BA_CLRBUF) {
812 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
813 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
814 error = cluster_read(vp, ip->i_size, lbn,
815 (int)fs->fs_bsize, NOCRED,
816 MAXBSIZE, seqcount, &nbp);
817 } else {
818 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
819 }
820 if (error) {
821 brelse(nbp);
822 goto fail;
823 }
824 } else {
825 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
826 nbp->b_blkno = fsbtodb(fs, nb);
827 }
828 *bpp = nbp;
829 return (0);
830 fail:
831 /*
832 * If we have failed to allocate any blocks, simply return the error.
833 * This is the usual case and avoids the need to fsync the file.
834 */
835 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
836 return (error);
837 /*
838 * If we have failed part way through block allocation, we
839 * have to deallocate any indirect blocks that we have allocated.
840 * We have to fsync the file before we start to get rid of all
841 * of its dependencies so that we do not leave them dangling.
842 * We have to sync it at the end so that the soft updates code
843 * does not find any untracked changes. Although this is really
844 * slow, running out of disk space is not expected to be a common
845 * occurence. The error return from fsync is ignored as we already
846 * have an error to return to the user.
847 */
848 (void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
849 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
850 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
851 deallocated += fs->fs_bsize;
852 }
853 if (allocib != NULL) {
854 *allocib = 0;
855 } else if (unwindidx >= 0) {
856 int r;
857
858 r = bread(vp, indirs[unwindidx].in_lbn,
859 (int)fs->fs_bsize, NOCRED, &bp);
860 if (r) {
861 panic("Could not unwind indirect block, error %d", r);
862 brelse(bp);
863 } else {
864 bap = (ufs2_daddr_t *)bp->b_data;
865 bap[indirs[unwindidx].in_off] = 0;
866 if (flags & IO_SYNC) {
867 bwrite(bp);
868 } else {
869 if (bp->b_bufsize == fs->fs_bsize)
870 bp->b_flags |= B_CLUSTEROK;
871 bdwrite(bp);
872 }
873 }
874 }
875 if (deallocated) {
876 #ifdef QUOTA
877 /*
878 * Restore user's disk quota because allocation failed.
879 */
880 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
881 #endif
882 dp->di_blocks -= btodb(deallocated);
883 ip->i_flag |= IN_CHANGE | IN_UPDATE;
884 }
885 (void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
886 return (error);
887 }
Cache object: fe2eefaad2cb103a278b542942fbb19b
|