1 /*-
2 * Copyright (c) 2002 Networks Associates Technology, Inc.
3 * All rights reserved.
4 *
5 * This software was developed for the FreeBSD Project by Marshall
6 * Kirk McKusick and Network Associates Laboratories, the Security
7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9 * research program
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * Copyright (c) 1982, 1986, 1989, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 4. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
60 */
61
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD: releng/6.4/sys/ufs/ffs/ffs_balloc.c 176545 2008-02-25 10:00:19Z obrien $");
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/bio.h>
68 #include <sys/buf.h>
69 #include <sys/lock.h>
70 #include <sys/mount.h>
71 #include <sys/vnode.h>
72
73 #include <ufs/ufs/quota.h>
74 #include <ufs/ufs/inode.h>
75 #include <ufs/ufs/ufs_extern.h>
76 #include <ufs/ufs/extattr.h>
77 #include <ufs/ufs/ufsmount.h>
78
79 #include <ufs/ffs/fs.h>
80 #include <ufs/ffs/ffs_extern.h>
81
82 /*
83 * Balloc defines the structure of filesystem storage
84 * by allocating the physical blocks on a device given
85 * the inode and the logical block number in a file.
86 * This is the allocation strategy for UFS1. Below is
87 * the allocation strategy for UFS2.
88 */
89 int
90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
91 struct ucred *cred, int flags, struct buf **bpp)
92 {
93 struct inode *ip;
94 struct ufs1_dinode *dp;
95 ufs_lbn_t lbn, lastlbn;
96 struct fs *fs;
97 ufs1_daddr_t nb;
98 struct buf *bp, *nbp;
99 struct ufsmount *ump;
100 struct indir indirs[NIADDR + 2];
101 int deallocated, osize, nsize, num, i, error;
102 ufs2_daddr_t newb;
103 ufs1_daddr_t *bap, pref;
104 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
105 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
106 int unwindidx = -1;
107
108 ip = VTOI(vp);
109 dp = ip->i_din1;
110 fs = ip->i_fs;
111 ump = ip->i_ump;
112 lbn = lblkno(fs, startoffset);
113 size = blkoff(fs, startoffset) + size;
114 if (size > fs->fs_bsize)
115 panic("ffs_balloc_ufs1: blk too big");
116 *bpp = NULL;
117 if (flags & IO_EXT)
118 return (EOPNOTSUPP);
119 if (lbn < 0)
120 return (EFBIG);
121
122 /*
123 * If the next write will extend the file into a new block,
124 * and the file is currently composed of a fragment
125 * this fragment has to be extended to be a full block.
126 */
127 lastlbn = lblkno(fs, ip->i_size);
128 if (lastlbn < NDADDR && lastlbn < lbn) {
129 nb = lastlbn;
130 osize = blksize(fs, ip, nb);
131 if (osize < fs->fs_bsize && osize > 0) {
132 UFS_LOCK(ump);
133 error = ffs_realloccg(ip, nb, dp->di_db[nb],
134 ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
135 &dp->di_db[0]), osize, (int)fs->fs_bsize, cred, &bp);
136 if (error)
137 return (error);
138 if (DOINGSOFTDEP(vp))
139 softdep_setup_allocdirect(ip, nb,
140 dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
141 fs->fs_bsize, osize, bp);
142 ip->i_size = smalllblktosize(fs, nb + 1);
143 dp->di_size = ip->i_size;
144 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
145 ip->i_flag |= IN_CHANGE | IN_UPDATE;
146 if (flags & IO_SYNC)
147 bwrite(bp);
148 else
149 bawrite(bp);
150 }
151 }
152 /*
153 * The first NDADDR blocks are direct blocks
154 */
155 if (lbn < NDADDR) {
156 if (flags & BA_METAONLY)
157 panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
158 nb = dp->di_db[lbn];
159 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
160 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
161 if (error) {
162 brelse(bp);
163 return (error);
164 }
165 bp->b_blkno = fsbtodb(fs, nb);
166 *bpp = bp;
167 return (0);
168 }
169 if (nb != 0) {
170 /*
171 * Consider need to reallocate a fragment.
172 */
173 osize = fragroundup(fs, blkoff(fs, ip->i_size));
174 nsize = fragroundup(fs, size);
175 if (nsize <= osize) {
176 error = bread(vp, lbn, osize, NOCRED, &bp);
177 if (error) {
178 brelse(bp);
179 return (error);
180 }
181 bp->b_blkno = fsbtodb(fs, nb);
182 } else {
183 UFS_LOCK(ump);
184 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
185 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
186 &dp->di_db[0]), osize, nsize, cred, &bp);
187 if (error)
188 return (error);
189 if (DOINGSOFTDEP(vp))
190 softdep_setup_allocdirect(ip, lbn,
191 dbtofsb(fs, bp->b_blkno), nb,
192 nsize, osize, bp);
193 }
194 } else {
195 if (ip->i_size < smalllblktosize(fs, lbn + 1))
196 nsize = fragroundup(fs, size);
197 else
198 nsize = fs->fs_bsize;
199 UFS_LOCK(ump);
200 error = ffs_alloc(ip, lbn,
201 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
202 nsize, cred, &newb);
203 if (error)
204 return (error);
205 bp = getblk(vp, lbn, nsize, 0, 0, 0);
206 bp->b_blkno = fsbtodb(fs, newb);
207 if (flags & BA_CLRBUF)
208 vfs_bio_clrbuf(bp);
209 if (DOINGSOFTDEP(vp))
210 softdep_setup_allocdirect(ip, lbn, newb, 0,
211 nsize, 0, bp);
212 }
213 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
214 ip->i_flag |= IN_CHANGE | IN_UPDATE;
215 *bpp = bp;
216 return (0);
217 }
218 /*
219 * Determine the number of levels of indirection.
220 */
221 pref = 0;
222 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
223 return(error);
224 #ifdef INVARIANTS
225 if (num < 1)
226 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
227 #endif
228 /*
229 * Fetch the first indirect block allocating if necessary.
230 */
231 --num;
232 nb = dp->di_ib[indirs[0].in_off];
233 allocib = NULL;
234 allocblk = allociblk;
235 lbns_remfree = lbns;
236 if (nb == 0) {
237 UFS_LOCK(ump);
238 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
239 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
240 cred, &newb)) != 0)
241 return (error);
242 nb = newb;
243 *allocblk++ = nb;
244 *lbns_remfree++ = indirs[1].in_lbn;
245 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
246 bp->b_blkno = fsbtodb(fs, nb);
247 vfs_bio_clrbuf(bp);
248 if (DOINGSOFTDEP(vp)) {
249 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
250 newb, 0, fs->fs_bsize, 0, bp);
251 bdwrite(bp);
252 } else {
253 /*
254 * Write synchronously so that indirect blocks
255 * never point at garbage.
256 */
257 if (DOINGASYNC(vp))
258 bdwrite(bp);
259 else if ((error = bwrite(bp)) != 0)
260 goto fail;
261 }
262 allocib = &dp->di_ib[indirs[0].in_off];
263 *allocib = nb;
264 ip->i_flag |= IN_CHANGE | IN_UPDATE;
265 }
266 /*
267 * Fetch through the indirect blocks, allocating as necessary.
268 */
269 for (i = 1;;) {
270 error = bread(vp,
271 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
272 if (error) {
273 brelse(bp);
274 goto fail;
275 }
276 bap = (ufs1_daddr_t *)bp->b_data;
277 nb = bap[indirs[i].in_off];
278 if (i == num)
279 break;
280 i += 1;
281 if (nb != 0) {
282 bqrelse(bp);
283 continue;
284 }
285 UFS_LOCK(ump);
286 if (pref == 0)
287 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
288 if ((error =
289 ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
290 brelse(bp);
291 goto fail;
292 }
293 nb = newb;
294 *allocblk++ = nb;
295 *lbns_remfree++ = indirs[i].in_lbn;
296 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
297 nbp->b_blkno = fsbtodb(fs, nb);
298 vfs_bio_clrbuf(nbp);
299 if (DOINGSOFTDEP(vp)) {
300 softdep_setup_allocindir_meta(nbp, ip, bp,
301 indirs[i - 1].in_off, nb);
302 bdwrite(nbp);
303 } else {
304 /*
305 * Write synchronously so that indirect blocks
306 * never point at garbage.
307 */
308 if ((error = bwrite(nbp)) != 0) {
309 brelse(bp);
310 goto fail;
311 }
312 }
313 bap[indirs[i - 1].in_off] = nb;
314 if (allocib == NULL && unwindidx < 0)
315 unwindidx = i - 1;
316 /*
317 * If required, write synchronously, otherwise use
318 * delayed write.
319 */
320 if (flags & IO_SYNC) {
321 bwrite(bp);
322 } else {
323 if (bp->b_bufsize == fs->fs_bsize)
324 bp->b_flags |= B_CLUSTEROK;
325 bdwrite(bp);
326 }
327 }
328 /*
329 * If asked only for the indirect block, then return it.
330 */
331 if (flags & BA_METAONLY) {
332 *bpp = bp;
333 return (0);
334 }
335 /*
336 * Get the data block, allocating if necessary.
337 */
338 if (nb == 0) {
339 UFS_LOCK(ump);
340 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]);
341 error = ffs_alloc(ip,
342 lbn, pref, (int)fs->fs_bsize, cred, &newb);
343 if (error) {
344 brelse(bp);
345 goto fail;
346 }
347 nb = newb;
348 *allocblk++ = nb;
349 *lbns_remfree++ = lbn;
350 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
351 nbp->b_blkno = fsbtodb(fs, nb);
352 if (flags & BA_CLRBUF)
353 vfs_bio_clrbuf(nbp);
354 if (DOINGSOFTDEP(vp))
355 softdep_setup_allocindir_page(ip, lbn, bp,
356 indirs[i].in_off, nb, 0, nbp);
357 bap[indirs[i].in_off] = nb;
358 /*
359 * If required, write synchronously, otherwise use
360 * delayed write.
361 */
362 if (flags & IO_SYNC) {
363 bwrite(bp);
364 } else {
365 if (bp->b_bufsize == fs->fs_bsize)
366 bp->b_flags |= B_CLUSTEROK;
367 bdwrite(bp);
368 }
369 *bpp = nbp;
370 return (0);
371 }
372 brelse(bp);
373 if (flags & BA_CLRBUF) {
374 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
375 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
376 error = cluster_read(vp, ip->i_size, lbn,
377 (int)fs->fs_bsize, NOCRED,
378 MAXBSIZE, seqcount, &nbp);
379 } else {
380 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
381 }
382 if (error) {
383 brelse(nbp);
384 goto fail;
385 }
386 } else {
387 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
388 nbp->b_blkno = fsbtodb(fs, nb);
389 }
390 *bpp = nbp;
391 return (0);
392 fail:
393 /*
394 * If we have failed to allocate any blocks, simply return the error.
395 * This is the usual case and avoids the need to fsync the file.
396 */
397 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
398 return (error);
399 /*
400 * If we have failed part way through block allocation, we
401 * have to deallocate any indirect blocks that we have allocated.
402 * We have to fsync the file before we start to get rid of all
403 * of its dependencies so that we do not leave them dangling.
404 * We have to sync it at the end so that the soft updates code
405 * does not find any untracked changes. Although this is really
406 * slow, running out of disk space is not expected to be a common
407 * occurence. The error return from fsync is ignored as we already
408 * have an error to return to the user.
409 */
410 (void) ffs_syncvnode(vp, MNT_WAIT);
411 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
412 blkp < allocblk; blkp++, lbns_remfree++) {
413 /*
414 * We shall not leave the freed blocks on the vnode
415 * buffer object lists.
416 */
417 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
418 if (bp != NULL) {
419 bp->b_flags |= (B_INVAL | B_RELBUF);
420 bp->b_flags &= ~B_ASYNC;
421 brelse(bp);
422 }
423 deallocated += fs->fs_bsize;
424 }
425 if (allocib != NULL) {
426 *allocib = 0;
427 } else if (unwindidx >= 0) {
428 int r;
429
430 r = bread(vp, indirs[unwindidx].in_lbn,
431 (int)fs->fs_bsize, NOCRED, &bp);
432 if (r) {
433 panic("Could not unwind indirect block, error %d", r);
434 brelse(bp);
435 } else {
436 bap = (ufs1_daddr_t *)bp->b_data;
437 bap[indirs[unwindidx].in_off] = 0;
438 if (flags & IO_SYNC) {
439 bwrite(bp);
440 } else {
441 if (bp->b_bufsize == fs->fs_bsize)
442 bp->b_flags |= B_CLUSTEROK;
443 bdwrite(bp);
444 }
445 }
446 }
447 if (deallocated) {
448 #ifdef QUOTA
449 /*
450 * Restore user's disk quota because allocation failed.
451 */
452 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
453 #endif
454 dp->di_blocks -= btodb(deallocated);
455 ip->i_flag |= IN_CHANGE | IN_UPDATE;
456 }
457 (void) ffs_syncvnode(vp, MNT_WAIT);
458 /*
459 * After the buffers are invalidated and on-disk pointers are
460 * cleared, free the blocks.
461 */
462 for (blkp = allociblk; blkp < allocblk; blkp++) {
463 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
464 ip->i_number);
465 }
466 return (error);
467 }
468
469 /*
470 * Balloc defines the structure of file system storage
471 * by allocating the physical blocks on a device given
472 * the inode and the logical block number in a file.
473 * This is the allocation strategy for UFS2. Above is
474 * the allocation strategy for UFS1.
475 */
476 int
477 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
478 struct ucred *cred, int flags, struct buf **bpp)
479 {
480 struct inode *ip;
481 struct ufs2_dinode *dp;
482 ufs_lbn_t lbn, lastlbn;
483 struct fs *fs;
484 struct buf *bp, *nbp;
485 struct ufsmount *ump;
486 struct indir indirs[NIADDR + 2];
487 ufs2_daddr_t nb, newb, *bap, pref;
488 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
489 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
490 int deallocated, osize, nsize, num, i, error;
491 int unwindidx = -1;
492
493 ip = VTOI(vp);
494 dp = ip->i_din2;
495 fs = ip->i_fs;
496 ump = ip->i_ump;
497 lbn = lblkno(fs, startoffset);
498 size = blkoff(fs, startoffset) + size;
499 if (size > fs->fs_bsize)
500 panic("ffs_balloc_ufs2: blk too big");
501 *bpp = NULL;
502 if (lbn < 0)
503 return (EFBIG);
504
505 /*
506 * Check for allocating external data.
507 */
508 if (flags & IO_EXT) {
509 if (lbn >= NXADDR)
510 return (EFBIG);
511 /*
512 * If the next write will extend the data into a new block,
513 * and the data is currently composed of a fragment
514 * this fragment has to be extended to be a full block.
515 */
516 lastlbn = lblkno(fs, dp->di_extsize);
517 if (lastlbn < lbn) {
518 nb = lastlbn;
519 osize = sblksize(fs, dp->di_extsize, nb);
520 if (osize < fs->fs_bsize && osize > 0) {
521 UFS_LOCK(ump);
522 error = ffs_realloccg(ip, -1 - nb,
523 dp->di_extb[nb],
524 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
525 &dp->di_extb[0]), osize,
526 (int)fs->fs_bsize, cred, &bp);
527 if (error)
528 return (error);
529 if (DOINGSOFTDEP(vp))
530 softdep_setup_allocext(ip, nb,
531 dbtofsb(fs, bp->b_blkno),
532 dp->di_extb[nb],
533 fs->fs_bsize, osize, bp);
534 dp->di_extsize = smalllblktosize(fs, nb + 1);
535 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
536 bp->b_xflags |= BX_ALTDATA;
537 ip->i_flag |= IN_CHANGE | IN_UPDATE;
538 if (flags & IO_SYNC)
539 bwrite(bp);
540 else
541 bawrite(bp);
542 }
543 }
544 /*
545 * All blocks are direct blocks
546 */
547 if (flags & BA_METAONLY)
548 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
549 nb = dp->di_extb[lbn];
550 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
551 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
552 if (error) {
553 brelse(bp);
554 return (error);
555 }
556 bp->b_blkno = fsbtodb(fs, nb);
557 bp->b_xflags |= BX_ALTDATA;
558 *bpp = bp;
559 return (0);
560 }
561 if (nb != 0) {
562 /*
563 * Consider need to reallocate a fragment.
564 */
565 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
566 nsize = fragroundup(fs, size);
567 if (nsize <= osize) {
568 error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
569 if (error) {
570 brelse(bp);
571 return (error);
572 }
573 bp->b_blkno = fsbtodb(fs, nb);
574 bp->b_xflags |= BX_ALTDATA;
575 } else {
576 UFS_LOCK(ump);
577 error = ffs_realloccg(ip, -1 - lbn,
578 dp->di_extb[lbn],
579 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
580 &dp->di_extb[0]), osize, nsize, cred, &bp);
581 if (error)
582 return (error);
583 bp->b_xflags |= BX_ALTDATA;
584 if (DOINGSOFTDEP(vp))
585 softdep_setup_allocext(ip, lbn,
586 dbtofsb(fs, bp->b_blkno), nb,
587 nsize, osize, bp);
588 }
589 } else {
590 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
591 nsize = fragroundup(fs, size);
592 else
593 nsize = fs->fs_bsize;
594 UFS_LOCK(ump);
595 error = ffs_alloc(ip, lbn,
596 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
597 nsize, cred, &newb);
598 if (error)
599 return (error);
600 bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0);
601 bp->b_blkno = fsbtodb(fs, newb);
602 bp->b_xflags |= BX_ALTDATA;
603 if (flags & BA_CLRBUF)
604 vfs_bio_clrbuf(bp);
605 if (DOINGSOFTDEP(vp))
606 softdep_setup_allocext(ip, lbn, newb, 0,
607 nsize, 0, bp);
608 }
609 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
610 ip->i_flag |= IN_CHANGE | IN_UPDATE;
611 *bpp = bp;
612 return (0);
613 }
614 /*
615 * If the next write will extend the file into a new block,
616 * and the file is currently composed of a fragment
617 * this fragment has to be extended to be a full block.
618 */
619 lastlbn = lblkno(fs, ip->i_size);
620 if (lastlbn < NDADDR && lastlbn < lbn) {
621 nb = lastlbn;
622 osize = blksize(fs, ip, nb);
623 if (osize < fs->fs_bsize && osize > 0) {
624 UFS_LOCK(ump);
625 error = ffs_realloccg(ip, nb, dp->di_db[nb],
626 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
627 &dp->di_db[0]), osize, (int)fs->fs_bsize,
628 cred, &bp);
629 if (error)
630 return (error);
631 if (DOINGSOFTDEP(vp))
632 softdep_setup_allocdirect(ip, nb,
633 dbtofsb(fs, bp->b_blkno),
634 dp->di_db[nb],
635 fs->fs_bsize, osize, bp);
636 ip->i_size = smalllblktosize(fs, nb + 1);
637 dp->di_size = ip->i_size;
638 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
639 ip->i_flag |= IN_CHANGE | IN_UPDATE;
640 if (flags & IO_SYNC)
641 bwrite(bp);
642 else
643 bawrite(bp);
644 }
645 }
646 /*
647 * The first NDADDR blocks are direct blocks
648 */
649 if (lbn < NDADDR) {
650 if (flags & BA_METAONLY)
651 panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
652 nb = dp->di_db[lbn];
653 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
654 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
655 if (error) {
656 brelse(bp);
657 return (error);
658 }
659 bp->b_blkno = fsbtodb(fs, nb);
660 *bpp = bp;
661 return (0);
662 }
663 if (nb != 0) {
664 /*
665 * Consider need to reallocate a fragment.
666 */
667 osize = fragroundup(fs, blkoff(fs, ip->i_size));
668 nsize = fragroundup(fs, size);
669 if (nsize <= osize) {
670 error = bread(vp, lbn, osize, NOCRED, &bp);
671 if (error) {
672 brelse(bp);
673 return (error);
674 }
675 bp->b_blkno = fsbtodb(fs, nb);
676 } else {
677 UFS_LOCK(ump);
678 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
679 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
680 &dp->di_db[0]), osize, nsize, cred, &bp);
681 if (error)
682 return (error);
683 if (DOINGSOFTDEP(vp))
684 softdep_setup_allocdirect(ip, lbn,
685 dbtofsb(fs, bp->b_blkno), nb,
686 nsize, osize, bp);
687 }
688 } else {
689 if (ip->i_size < smalllblktosize(fs, lbn + 1))
690 nsize = fragroundup(fs, size);
691 else
692 nsize = fs->fs_bsize;
693 UFS_LOCK(ump);
694 error = ffs_alloc(ip, lbn,
695 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
696 &dp->di_db[0]), nsize, cred, &newb);
697 if (error)
698 return (error);
699 bp = getblk(vp, lbn, nsize, 0, 0, 0);
700 bp->b_blkno = fsbtodb(fs, newb);
701 if (flags & BA_CLRBUF)
702 vfs_bio_clrbuf(bp);
703 if (DOINGSOFTDEP(vp))
704 softdep_setup_allocdirect(ip, lbn, newb, 0,
705 nsize, 0, bp);
706 }
707 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
708 ip->i_flag |= IN_CHANGE | IN_UPDATE;
709 *bpp = bp;
710 return (0);
711 }
712 /*
713 * Determine the number of levels of indirection.
714 */
715 pref = 0;
716 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
717 return(error);
718 #ifdef INVARIANTS
719 if (num < 1)
720 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
721 #endif
722 /*
723 * Fetch the first indirect block allocating if necessary.
724 */
725 --num;
726 nb = dp->di_ib[indirs[0].in_off];
727 allocib = NULL;
728 allocblk = allociblk;
729 lbns_remfree = lbns;
730 if (nb == 0) {
731 UFS_LOCK(ump);
732 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
733 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
734 cred, &newb)) != 0)
735 return (error);
736 nb = newb;
737 *allocblk++ = nb;
738 *lbns_remfree++ = indirs[1].in_lbn;
739 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
740 bp->b_blkno = fsbtodb(fs, nb);
741 vfs_bio_clrbuf(bp);
742 if (DOINGSOFTDEP(vp)) {
743 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
744 newb, 0, fs->fs_bsize, 0, bp);
745 bdwrite(bp);
746 } else {
747 /*
748 * Write synchronously so that indirect blocks
749 * never point at garbage.
750 */
751 if (DOINGASYNC(vp))
752 bdwrite(bp);
753 else if ((error = bwrite(bp)) != 0)
754 goto fail;
755 }
756 allocib = &dp->di_ib[indirs[0].in_off];
757 *allocib = nb;
758 ip->i_flag |= IN_CHANGE | IN_UPDATE;
759 }
760 /*
761 * Fetch through the indirect blocks, allocating as necessary.
762 */
763 for (i = 1;;) {
764 error = bread(vp,
765 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
766 if (error) {
767 brelse(bp);
768 goto fail;
769 }
770 bap = (ufs2_daddr_t *)bp->b_data;
771 nb = bap[indirs[i].in_off];
772 if (i == num)
773 break;
774 i += 1;
775 if (nb != 0) {
776 bqrelse(bp);
777 continue;
778 }
779 UFS_LOCK(ump);
780 if (pref == 0)
781 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
782 if ((error =
783 ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
784 brelse(bp);
785 goto fail;
786 }
787 nb = newb;
788 *allocblk++ = nb;
789 *lbns_remfree++ = indirs[i].in_lbn;
790 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
791 nbp->b_blkno = fsbtodb(fs, nb);
792 vfs_bio_clrbuf(nbp);
793 if (DOINGSOFTDEP(vp)) {
794 softdep_setup_allocindir_meta(nbp, ip, bp,
795 indirs[i - 1].in_off, nb);
796 bdwrite(nbp);
797 } else {
798 /*
799 * Write synchronously so that indirect blocks
800 * never point at garbage.
801 */
802 if ((error = bwrite(nbp)) != 0) {
803 brelse(bp);
804 goto fail;
805 }
806 }
807 bap[indirs[i - 1].in_off] = nb;
808 if (allocib == NULL && unwindidx < 0)
809 unwindidx = i - 1;
810 /*
811 * If required, write synchronously, otherwise use
812 * delayed write.
813 */
814 if (flags & IO_SYNC) {
815 bwrite(bp);
816 } else {
817 if (bp->b_bufsize == fs->fs_bsize)
818 bp->b_flags |= B_CLUSTEROK;
819 bdwrite(bp);
820 }
821 }
822 /*
823 * If asked only for the indirect block, then return it.
824 */
825 if (flags & BA_METAONLY) {
826 *bpp = bp;
827 return (0);
828 }
829 /*
830 * Get the data block, allocating if necessary.
831 */
832 if (nb == 0) {
833 UFS_LOCK(ump);
834 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]);
835 error = ffs_alloc(ip,
836 lbn, pref, (int)fs->fs_bsize, cred, &newb);
837 if (error) {
838 brelse(bp);
839 goto fail;
840 }
841 nb = newb;
842 *allocblk++ = nb;
843 *lbns_remfree++ = lbn;
844 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
845 nbp->b_blkno = fsbtodb(fs, nb);
846 if (flags & BA_CLRBUF)
847 vfs_bio_clrbuf(nbp);
848 if (DOINGSOFTDEP(vp))
849 softdep_setup_allocindir_page(ip, lbn, bp,
850 indirs[i].in_off, nb, 0, nbp);
851 bap[indirs[i].in_off] = nb;
852 /*
853 * If required, write synchronously, otherwise use
854 * delayed write.
855 */
856 if (flags & IO_SYNC) {
857 bwrite(bp);
858 } else {
859 if (bp->b_bufsize == fs->fs_bsize)
860 bp->b_flags |= B_CLUSTEROK;
861 bdwrite(bp);
862 }
863 *bpp = nbp;
864 return (0);
865 }
866 brelse(bp);
867 /*
868 * If requested clear invalid portions of the buffer. If we
869 * have to do a read-before-write (typical if BA_CLRBUF is set),
870 * try to do some read-ahead in the sequential case to reduce
871 * the number of I/O transactions.
872 */
873 if (flags & BA_CLRBUF) {
874 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
875 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
876 error = cluster_read(vp, ip->i_size, lbn,
877 (int)fs->fs_bsize, NOCRED,
878 MAXBSIZE, seqcount, &nbp);
879 } else {
880 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
881 }
882 if (error) {
883 brelse(nbp);
884 goto fail;
885 }
886 } else {
887 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
888 nbp->b_blkno = fsbtodb(fs, nb);
889 }
890 *bpp = nbp;
891 return (0);
892 fail:
893 /*
894 * If we have failed to allocate any blocks, simply return the error.
895 * This is the usual case and avoids the need to fsync the file.
896 */
897 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
898 return (error);
899 /*
900 * If we have failed part way through block allocation, we
901 * have to deallocate any indirect blocks that we have allocated.
902 * We have to fsync the file before we start to get rid of all
903 * of its dependencies so that we do not leave them dangling.
904 * We have to sync it at the end so that the soft updates code
905 * does not find any untracked changes. Although this is really
906 * slow, running out of disk space is not expected to be a common
907 * occurence. The error return from fsync is ignored as we already
908 * have an error to return to the user.
909 */
910 (void) ffs_syncvnode(vp, MNT_WAIT);
911 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
912 blkp < allocblk; blkp++, lbns_remfree++) {
913 /*
914 * We shall not leave the freed blocks on the vnode
915 * buffer object lists.
916 */
917 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
918 if (bp != NULL) {
919 bp->b_flags |= (B_INVAL | B_RELBUF);
920 bp->b_flags &= ~B_ASYNC;
921 brelse(bp);
922 }
923 deallocated += fs->fs_bsize;
924 }
925 if (allocib != NULL) {
926 *allocib = 0;
927 } else if (unwindidx >= 0) {
928 int r;
929
930 r = bread(vp, indirs[unwindidx].in_lbn,
931 (int)fs->fs_bsize, NOCRED, &bp);
932 if (r) {
933 panic("Could not unwind indirect block, error %d", r);
934 brelse(bp);
935 } else {
936 bap = (ufs2_daddr_t *)bp->b_data;
937 bap[indirs[unwindidx].in_off] = 0;
938 if (flags & IO_SYNC) {
939 bwrite(bp);
940 } else {
941 if (bp->b_bufsize == fs->fs_bsize)
942 bp->b_flags |= B_CLUSTEROK;
943 bdwrite(bp);
944 }
945 }
946 }
947 if (deallocated) {
948 #ifdef QUOTA
949 /*
950 * Restore user's disk quota because allocation failed.
951 */
952 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
953 #endif
954 dp->di_blocks -= btodb(deallocated);
955 ip->i_flag |= IN_CHANGE | IN_UPDATE;
956 }
957 (void) ffs_syncvnode(vp, MNT_WAIT);
958 /*
959 * After the buffers are invalidated and on-disk pointers are
960 * cleared, free the blocks.
961 */
962 for (blkp = allociblk; blkp < allocblk; blkp++) {
963 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
964 ip->i_number);
965 }
966 return (error);
967 }
Cache object: 31a64f647b12044b714bd6b4d80aec80
|