1 /*-
2 * Copyright (c) 2002 Networks Associates Technology, Inc.
3 * All rights reserved.
4 *
5 * This software was developed for the FreeBSD Project by Marshall
6 * Kirk McKusick and Network Associates Laboratories, the Security
7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9 * research program
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * Copyright (c) 1982, 1986, 1989, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 4. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
60 */
61
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD: releng/8.0/sys/ufs/ffs/ffs_balloc.c 187790 2009-01-27 21:48:47Z rwatson $");
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/bio.h>
68 #include <sys/buf.h>
69 #include <sys/lock.h>
70 #include <sys/mount.h>
71 #include <sys/vnode.h>
72
73 #include <ufs/ufs/quota.h>
74 #include <ufs/ufs/inode.h>
75 #include <ufs/ufs/ufs_extern.h>
76 #include <ufs/ufs/extattr.h>
77 #include <ufs/ufs/ufsmount.h>
78
79 #include <ufs/ffs/fs.h>
80 #include <ufs/ffs/ffs_extern.h>
81
82 /*
83 * Balloc defines the structure of filesystem storage
84 * by allocating the physical blocks on a device given
85 * the inode and the logical block number in a file.
86 * This is the allocation strategy for UFS1. Below is
87 * the allocation strategy for UFS2.
88 */
89 int
90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
91 struct ucred *cred, int flags, struct buf **bpp)
92 {
93 struct inode *ip;
94 struct ufs1_dinode *dp;
95 ufs_lbn_t lbn, lastlbn;
96 struct fs *fs;
97 ufs1_daddr_t nb;
98 struct buf *bp, *nbp;
99 struct ufsmount *ump;
100 struct indir indirs[NIADDR + 2];
101 int deallocated, osize, nsize, num, i, error;
102 ufs2_daddr_t newb;
103 ufs1_daddr_t *bap, pref;
104 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
105 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
106 int unwindidx = -1;
107 int saved_inbdflush;
108
109 ip = VTOI(vp);
110 dp = ip->i_din1;
111 fs = ip->i_fs;
112 ump = ip->i_ump;
113 lbn = lblkno(fs, startoffset);
114 size = blkoff(fs, startoffset) + size;
115 if (size > fs->fs_bsize)
116 panic("ffs_balloc_ufs1: blk too big");
117 *bpp = NULL;
118 if (flags & IO_EXT)
119 return (EOPNOTSUPP);
120 if (lbn < 0)
121 return (EFBIG);
122
123 /*
124 * If the next write will extend the file into a new block,
125 * and the file is currently composed of a fragment
126 * this fragment has to be extended to be a full block.
127 */
128 lastlbn = lblkno(fs, ip->i_size);
129 if (lastlbn < NDADDR && lastlbn < lbn) {
130 nb = lastlbn;
131 osize = blksize(fs, ip, nb);
132 if (osize < fs->fs_bsize && osize > 0) {
133 UFS_LOCK(ump);
134 error = ffs_realloccg(ip, nb, dp->di_db[nb],
135 ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
136 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
137 cred, &bp);
138 if (error)
139 return (error);
140 if (DOINGSOFTDEP(vp))
141 softdep_setup_allocdirect(ip, nb,
142 dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
143 fs->fs_bsize, osize, bp);
144 ip->i_size = smalllblktosize(fs, nb + 1);
145 dp->di_size = ip->i_size;
146 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
147 ip->i_flag |= IN_CHANGE | IN_UPDATE;
148 if (flags & IO_SYNC)
149 bwrite(bp);
150 else
151 bawrite(bp);
152 }
153 }
154 /*
155 * The first NDADDR blocks are direct blocks
156 */
157 if (lbn < NDADDR) {
158 if (flags & BA_METAONLY)
159 panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
160 nb = dp->di_db[lbn];
161 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
162 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
163 if (error) {
164 brelse(bp);
165 return (error);
166 }
167 bp->b_blkno = fsbtodb(fs, nb);
168 *bpp = bp;
169 return (0);
170 }
171 if (nb != 0) {
172 /*
173 * Consider need to reallocate a fragment.
174 */
175 osize = fragroundup(fs, blkoff(fs, ip->i_size));
176 nsize = fragroundup(fs, size);
177 if (nsize <= osize) {
178 error = bread(vp, lbn, osize, NOCRED, &bp);
179 if (error) {
180 brelse(bp);
181 return (error);
182 }
183 bp->b_blkno = fsbtodb(fs, nb);
184 } else {
185 UFS_LOCK(ump);
186 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
187 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
188 &dp->di_db[0]), osize, nsize, flags,
189 cred, &bp);
190 if (error)
191 return (error);
192 if (DOINGSOFTDEP(vp))
193 softdep_setup_allocdirect(ip, lbn,
194 dbtofsb(fs, bp->b_blkno), nb,
195 nsize, osize, bp);
196 }
197 } else {
198 if (ip->i_size < smalllblktosize(fs, lbn + 1))
199 nsize = fragroundup(fs, size);
200 else
201 nsize = fs->fs_bsize;
202 UFS_LOCK(ump);
203 error = ffs_alloc(ip, lbn,
204 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
205 nsize, flags, cred, &newb);
206 if (error)
207 return (error);
208 bp = getblk(vp, lbn, nsize, 0, 0, 0);
209 bp->b_blkno = fsbtodb(fs, newb);
210 if (flags & BA_CLRBUF)
211 vfs_bio_clrbuf(bp);
212 if (DOINGSOFTDEP(vp))
213 softdep_setup_allocdirect(ip, lbn, newb, 0,
214 nsize, 0, bp);
215 }
216 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
217 ip->i_flag |= IN_CHANGE | IN_UPDATE;
218 *bpp = bp;
219 return (0);
220 }
221 /*
222 * Determine the number of levels of indirection.
223 */
224 pref = 0;
225 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
226 return(error);
227 #ifdef INVARIANTS
228 if (num < 1)
229 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
230 #endif
231 saved_inbdflush = ~TDP_INBDFLUSH | (curthread->td_pflags &
232 TDP_INBDFLUSH);
233 curthread->td_pflags |= TDP_INBDFLUSH;
234 /*
235 * Fetch the first indirect block allocating if necessary.
236 */
237 --num;
238 nb = dp->di_ib[indirs[0].in_off];
239 allocib = NULL;
240 allocblk = allociblk;
241 lbns_remfree = lbns;
242 if (nb == 0) {
243 UFS_LOCK(ump);
244 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
245 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
246 flags, cred, &newb)) != 0) {
247 curthread->td_pflags &= saved_inbdflush;
248 return (error);
249 }
250 nb = newb;
251 *allocblk++ = nb;
252 *lbns_remfree++ = indirs[1].in_lbn;
253 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
254 bp->b_blkno = fsbtodb(fs, nb);
255 vfs_bio_clrbuf(bp);
256 if (DOINGSOFTDEP(vp)) {
257 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
258 newb, 0, fs->fs_bsize, 0, bp);
259 bdwrite(bp);
260 } else {
261 /*
262 * Write synchronously so that indirect blocks
263 * never point at garbage.
264 */
265 if (DOINGASYNC(vp))
266 bdwrite(bp);
267 else if ((error = bwrite(bp)) != 0)
268 goto fail;
269 }
270 allocib = &dp->di_ib[indirs[0].in_off];
271 *allocib = nb;
272 ip->i_flag |= IN_CHANGE | IN_UPDATE;
273 }
274 /*
275 * Fetch through the indirect blocks, allocating as necessary.
276 */
277 for (i = 1;;) {
278 error = bread(vp,
279 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
280 if (error) {
281 brelse(bp);
282 goto fail;
283 }
284 bap = (ufs1_daddr_t *)bp->b_data;
285 nb = bap[indirs[i].in_off];
286 if (i == num)
287 break;
288 i += 1;
289 if (nb != 0) {
290 bqrelse(bp);
291 continue;
292 }
293 UFS_LOCK(ump);
294 if (pref == 0)
295 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
296 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
297 flags, cred, &newb)) != 0) {
298 brelse(bp);
299 goto fail;
300 }
301 nb = newb;
302 *allocblk++ = nb;
303 *lbns_remfree++ = indirs[i].in_lbn;
304 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
305 nbp->b_blkno = fsbtodb(fs, nb);
306 vfs_bio_clrbuf(nbp);
307 if (DOINGSOFTDEP(vp)) {
308 softdep_setup_allocindir_meta(nbp, ip, bp,
309 indirs[i - 1].in_off, nb);
310 bdwrite(nbp);
311 } else {
312 /*
313 * Write synchronously so that indirect blocks
314 * never point at garbage.
315 */
316 if ((error = bwrite(nbp)) != 0) {
317 brelse(bp);
318 goto fail;
319 }
320 }
321 bap[indirs[i - 1].in_off] = nb;
322 if (allocib == NULL && unwindidx < 0)
323 unwindidx = i - 1;
324 /*
325 * If required, write synchronously, otherwise use
326 * delayed write.
327 */
328 if (flags & IO_SYNC) {
329 bwrite(bp);
330 } else {
331 if (bp->b_bufsize == fs->fs_bsize)
332 bp->b_flags |= B_CLUSTEROK;
333 bdwrite(bp);
334 }
335 }
336 /*
337 * If asked only for the indirect block, then return it.
338 */
339 if (flags & BA_METAONLY) {
340 curthread->td_pflags &= saved_inbdflush;
341 *bpp = bp;
342 return (0);
343 }
344 /*
345 * Get the data block, allocating if necessary.
346 */
347 if (nb == 0) {
348 UFS_LOCK(ump);
349 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]);
350 error = ffs_alloc(ip,
351 lbn, pref, (int)fs->fs_bsize, flags, cred, &newb);
352 if (error) {
353 brelse(bp);
354 goto fail;
355 }
356 nb = newb;
357 *allocblk++ = nb;
358 *lbns_remfree++ = lbn;
359 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
360 nbp->b_blkno = fsbtodb(fs, nb);
361 if (flags & BA_CLRBUF)
362 vfs_bio_clrbuf(nbp);
363 if (DOINGSOFTDEP(vp))
364 softdep_setup_allocindir_page(ip, lbn, bp,
365 indirs[i].in_off, nb, 0, nbp);
366 bap[indirs[i].in_off] = nb;
367 /*
368 * If required, write synchronously, otherwise use
369 * delayed write.
370 */
371 if (flags & IO_SYNC) {
372 bwrite(bp);
373 } else {
374 if (bp->b_bufsize == fs->fs_bsize)
375 bp->b_flags |= B_CLUSTEROK;
376 bdwrite(bp);
377 }
378 curthread->td_pflags &= saved_inbdflush;
379 *bpp = nbp;
380 return (0);
381 }
382 brelse(bp);
383 if (flags & BA_CLRBUF) {
384 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
385 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
386 error = cluster_read(vp, ip->i_size, lbn,
387 (int)fs->fs_bsize, NOCRED,
388 MAXBSIZE, seqcount, &nbp);
389 } else {
390 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
391 }
392 if (error) {
393 brelse(nbp);
394 goto fail;
395 }
396 } else {
397 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
398 nbp->b_blkno = fsbtodb(fs, nb);
399 }
400 curthread->td_pflags &= saved_inbdflush;
401 *bpp = nbp;
402 return (0);
403 fail:
404 curthread->td_pflags &= saved_inbdflush;
405 /*
406 * If we have failed to allocate any blocks, simply return the error.
407 * This is the usual case and avoids the need to fsync the file.
408 */
409 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
410 return (error);
411 /*
412 * If we have failed part way through block allocation, we
413 * have to deallocate any indirect blocks that we have allocated.
414 * We have to fsync the file before we start to get rid of all
415 * of its dependencies so that we do not leave them dangling.
416 * We have to sync it at the end so that the soft updates code
417 * does not find any untracked changes. Although this is really
418 * slow, running out of disk space is not expected to be a common
419 * occurence. The error return from fsync is ignored as we already
420 * have an error to return to the user.
421 */
422 (void) ffs_syncvnode(vp, MNT_WAIT);
423 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
424 blkp < allocblk; blkp++, lbns_remfree++) {
425 /*
426 * We shall not leave the freed blocks on the vnode
427 * buffer object lists.
428 */
429 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
430 if (bp != NULL) {
431 bp->b_flags |= (B_INVAL | B_RELBUF);
432 bp->b_flags &= ~B_ASYNC;
433 brelse(bp);
434 }
435 deallocated += fs->fs_bsize;
436 }
437 if (allocib != NULL) {
438 *allocib = 0;
439 } else if (unwindidx >= 0) {
440 int r;
441
442 r = bread(vp, indirs[unwindidx].in_lbn,
443 (int)fs->fs_bsize, NOCRED, &bp);
444 if (r) {
445 panic("Could not unwind indirect block, error %d", r);
446 brelse(bp);
447 } else {
448 bap = (ufs1_daddr_t *)bp->b_data;
449 bap[indirs[unwindidx].in_off] = 0;
450 if (flags & IO_SYNC) {
451 bwrite(bp);
452 } else {
453 if (bp->b_bufsize == fs->fs_bsize)
454 bp->b_flags |= B_CLUSTEROK;
455 bdwrite(bp);
456 }
457 }
458 }
459 if (deallocated) {
460 #ifdef QUOTA
461 /*
462 * Restore user's disk quota because allocation failed.
463 */
464 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
465 #endif
466 dp->di_blocks -= btodb(deallocated);
467 ip->i_flag |= IN_CHANGE | IN_UPDATE;
468 }
469 (void) ffs_syncvnode(vp, MNT_WAIT);
470 /*
471 * After the buffers are invalidated and on-disk pointers are
472 * cleared, free the blocks.
473 */
474 for (blkp = allociblk; blkp < allocblk; blkp++) {
475 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
476 ip->i_number);
477 }
478 return (error);
479 }
480
481 /*
482 * Balloc defines the structure of file system storage
483 * by allocating the physical blocks on a device given
484 * the inode and the logical block number in a file.
485 * This is the allocation strategy for UFS2. Above is
486 * the allocation strategy for UFS1.
487 */
488 int
489 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
490 struct ucred *cred, int flags, struct buf **bpp)
491 {
492 struct inode *ip;
493 struct ufs2_dinode *dp;
494 ufs_lbn_t lbn, lastlbn;
495 struct fs *fs;
496 struct buf *bp, *nbp;
497 struct ufsmount *ump;
498 struct indir indirs[NIADDR + 2];
499 ufs2_daddr_t nb, newb, *bap, pref;
500 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
501 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
502 int deallocated, osize, nsize, num, i, error;
503 int unwindidx = -1;
504 int saved_inbdflush;
505
506 ip = VTOI(vp);
507 dp = ip->i_din2;
508 fs = ip->i_fs;
509 ump = ip->i_ump;
510 lbn = lblkno(fs, startoffset);
511 size = blkoff(fs, startoffset) + size;
512 if (size > fs->fs_bsize)
513 panic("ffs_balloc_ufs2: blk too big");
514 *bpp = NULL;
515 if (lbn < 0)
516 return (EFBIG);
517
518 /*
519 * Check for allocating external data.
520 */
521 if (flags & IO_EXT) {
522 if (lbn >= NXADDR)
523 return (EFBIG);
524 /*
525 * If the next write will extend the data into a new block,
526 * and the data is currently composed of a fragment
527 * this fragment has to be extended to be a full block.
528 */
529 lastlbn = lblkno(fs, dp->di_extsize);
530 if (lastlbn < lbn) {
531 nb = lastlbn;
532 osize = sblksize(fs, dp->di_extsize, nb);
533 if (osize < fs->fs_bsize && osize > 0) {
534 UFS_LOCK(ump);
535 error = ffs_realloccg(ip, -1 - nb,
536 dp->di_extb[nb],
537 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
538 &dp->di_extb[0]), osize,
539 (int)fs->fs_bsize, flags, cred, &bp);
540 if (error)
541 return (error);
542 if (DOINGSOFTDEP(vp))
543 softdep_setup_allocext(ip, nb,
544 dbtofsb(fs, bp->b_blkno),
545 dp->di_extb[nb],
546 fs->fs_bsize, osize, bp);
547 dp->di_extsize = smalllblktosize(fs, nb + 1);
548 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
549 bp->b_xflags |= BX_ALTDATA;
550 ip->i_flag |= IN_CHANGE;
551 if (flags & IO_SYNC)
552 bwrite(bp);
553 else
554 bawrite(bp);
555 }
556 }
557 /*
558 * All blocks are direct blocks
559 */
560 if (flags & BA_METAONLY)
561 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
562 nb = dp->di_extb[lbn];
563 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
564 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
565 if (error) {
566 brelse(bp);
567 return (error);
568 }
569 bp->b_blkno = fsbtodb(fs, nb);
570 bp->b_xflags |= BX_ALTDATA;
571 *bpp = bp;
572 return (0);
573 }
574 if (nb != 0) {
575 /*
576 * Consider need to reallocate a fragment.
577 */
578 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
579 nsize = fragroundup(fs, size);
580 if (nsize <= osize) {
581 error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
582 if (error) {
583 brelse(bp);
584 return (error);
585 }
586 bp->b_blkno = fsbtodb(fs, nb);
587 bp->b_xflags |= BX_ALTDATA;
588 } else {
589 UFS_LOCK(ump);
590 error = ffs_realloccg(ip, -1 - lbn,
591 dp->di_extb[lbn],
592 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
593 &dp->di_extb[0]), osize, nsize, flags,
594 cred, &bp);
595 if (error)
596 return (error);
597 bp->b_xflags |= BX_ALTDATA;
598 if (DOINGSOFTDEP(vp))
599 softdep_setup_allocext(ip, lbn,
600 dbtofsb(fs, bp->b_blkno), nb,
601 nsize, osize, bp);
602 }
603 } else {
604 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
605 nsize = fragroundup(fs, size);
606 else
607 nsize = fs->fs_bsize;
608 UFS_LOCK(ump);
609 error = ffs_alloc(ip, lbn,
610 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
611 nsize, flags, cred, &newb);
612 if (error)
613 return (error);
614 bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0);
615 bp->b_blkno = fsbtodb(fs, newb);
616 bp->b_xflags |= BX_ALTDATA;
617 if (flags & BA_CLRBUF)
618 vfs_bio_clrbuf(bp);
619 if (DOINGSOFTDEP(vp))
620 softdep_setup_allocext(ip, lbn, newb, 0,
621 nsize, 0, bp);
622 }
623 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
624 ip->i_flag |= IN_CHANGE;
625 *bpp = bp;
626 return (0);
627 }
628 /*
629 * If the next write will extend the file into a new block,
630 * and the file is currently composed of a fragment
631 * this fragment has to be extended to be a full block.
632 */
633 lastlbn = lblkno(fs, ip->i_size);
634 if (lastlbn < NDADDR && lastlbn < lbn) {
635 nb = lastlbn;
636 osize = blksize(fs, ip, nb);
637 if (osize < fs->fs_bsize && osize > 0) {
638 UFS_LOCK(ump);
639 error = ffs_realloccg(ip, nb, dp->di_db[nb],
640 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
641 &dp->di_db[0]), osize, (int)fs->fs_bsize,
642 flags, cred, &bp);
643 if (error)
644 return (error);
645 if (DOINGSOFTDEP(vp))
646 softdep_setup_allocdirect(ip, nb,
647 dbtofsb(fs, bp->b_blkno),
648 dp->di_db[nb],
649 fs->fs_bsize, osize, bp);
650 ip->i_size = smalllblktosize(fs, nb + 1);
651 dp->di_size = ip->i_size;
652 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
653 ip->i_flag |= IN_CHANGE | IN_UPDATE;
654 if (flags & IO_SYNC)
655 bwrite(bp);
656 else
657 bawrite(bp);
658 }
659 }
660 /*
661 * The first NDADDR blocks are direct blocks
662 */
663 if (lbn < NDADDR) {
664 if (flags & BA_METAONLY)
665 panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
666 nb = dp->di_db[lbn];
667 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
668 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
669 if (error) {
670 brelse(bp);
671 return (error);
672 }
673 bp->b_blkno = fsbtodb(fs, nb);
674 *bpp = bp;
675 return (0);
676 }
677 if (nb != 0) {
678 /*
679 * Consider need to reallocate a fragment.
680 */
681 osize = fragroundup(fs, blkoff(fs, ip->i_size));
682 nsize = fragroundup(fs, size);
683 if (nsize <= osize) {
684 error = bread(vp, lbn, osize, NOCRED, &bp);
685 if (error) {
686 brelse(bp);
687 return (error);
688 }
689 bp->b_blkno = fsbtodb(fs, nb);
690 } else {
691 UFS_LOCK(ump);
692 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
693 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
694 &dp->di_db[0]), osize, nsize, flags,
695 cred, &bp);
696 if (error)
697 return (error);
698 if (DOINGSOFTDEP(vp))
699 softdep_setup_allocdirect(ip, lbn,
700 dbtofsb(fs, bp->b_blkno), nb,
701 nsize, osize, bp);
702 }
703 } else {
704 if (ip->i_size < smalllblktosize(fs, lbn + 1))
705 nsize = fragroundup(fs, size);
706 else
707 nsize = fs->fs_bsize;
708 UFS_LOCK(ump);
709 error = ffs_alloc(ip, lbn,
710 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
711 &dp->di_db[0]), nsize, flags, cred, &newb);
712 if (error)
713 return (error);
714 bp = getblk(vp, lbn, nsize, 0, 0, 0);
715 bp->b_blkno = fsbtodb(fs, newb);
716 if (flags & BA_CLRBUF)
717 vfs_bio_clrbuf(bp);
718 if (DOINGSOFTDEP(vp))
719 softdep_setup_allocdirect(ip, lbn, newb, 0,
720 nsize, 0, bp);
721 }
722 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
723 ip->i_flag |= IN_CHANGE | IN_UPDATE;
724 *bpp = bp;
725 return (0);
726 }
727 /*
728 * Determine the number of levels of indirection.
729 */
730 pref = 0;
731 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
732 return(error);
733 #ifdef INVARIANTS
734 if (num < 1)
735 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
736 #endif
737 saved_inbdflush = ~TDP_INBDFLUSH | (curthread->td_pflags &
738 TDP_INBDFLUSH);
739 curthread->td_pflags |= TDP_INBDFLUSH;
740 /*
741 * Fetch the first indirect block allocating if necessary.
742 */
743 --num;
744 nb = dp->di_ib[indirs[0].in_off];
745 allocib = NULL;
746 allocblk = allociblk;
747 lbns_remfree = lbns;
748 if (nb == 0) {
749 UFS_LOCK(ump);
750 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
751 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
752 flags, cred, &newb)) != 0) {
753 curthread->td_pflags &= saved_inbdflush;
754 return (error);
755 }
756 nb = newb;
757 *allocblk++ = nb;
758 *lbns_remfree++ = indirs[1].in_lbn;
759 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
760 bp->b_blkno = fsbtodb(fs, nb);
761 vfs_bio_clrbuf(bp);
762 if (DOINGSOFTDEP(vp)) {
763 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
764 newb, 0, fs->fs_bsize, 0, bp);
765 bdwrite(bp);
766 } else {
767 /*
768 * Write synchronously so that indirect blocks
769 * never point at garbage.
770 */
771 if (DOINGASYNC(vp))
772 bdwrite(bp);
773 else if ((error = bwrite(bp)) != 0)
774 goto fail;
775 }
776 allocib = &dp->di_ib[indirs[0].in_off];
777 *allocib = nb;
778 ip->i_flag |= IN_CHANGE | IN_UPDATE;
779 }
780 /*
781 * Fetch through the indirect blocks, allocating as necessary.
782 */
783 for (i = 1;;) {
784 error = bread(vp,
785 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
786 if (error) {
787 brelse(bp);
788 goto fail;
789 }
790 bap = (ufs2_daddr_t *)bp->b_data;
791 nb = bap[indirs[i].in_off];
792 if (i == num)
793 break;
794 i += 1;
795 if (nb != 0) {
796 bqrelse(bp);
797 continue;
798 }
799 UFS_LOCK(ump);
800 if (pref == 0)
801 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
802 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
803 flags, cred, &newb)) != 0) {
804 brelse(bp);
805 goto fail;
806 }
807 nb = newb;
808 *allocblk++ = nb;
809 *lbns_remfree++ = indirs[i].in_lbn;
810 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
811 nbp->b_blkno = fsbtodb(fs, nb);
812 vfs_bio_clrbuf(nbp);
813 if (DOINGSOFTDEP(vp)) {
814 softdep_setup_allocindir_meta(nbp, ip, bp,
815 indirs[i - 1].in_off, nb);
816 bdwrite(nbp);
817 } else {
818 /*
819 * Write synchronously so that indirect blocks
820 * never point at garbage.
821 */
822 if ((error = bwrite(nbp)) != 0) {
823 brelse(bp);
824 goto fail;
825 }
826 }
827 bap[indirs[i - 1].in_off] = nb;
828 if (allocib == NULL && unwindidx < 0)
829 unwindidx = i - 1;
830 /*
831 * If required, write synchronously, otherwise use
832 * delayed write.
833 */
834 if (flags & IO_SYNC) {
835 bwrite(bp);
836 } else {
837 if (bp->b_bufsize == fs->fs_bsize)
838 bp->b_flags |= B_CLUSTEROK;
839 bdwrite(bp);
840 }
841 }
842 /*
843 * If asked only for the indirect block, then return it.
844 */
845 if (flags & BA_METAONLY) {
846 curthread->td_pflags &= saved_inbdflush;
847 *bpp = bp;
848 return (0);
849 }
850 /*
851 * Get the data block, allocating if necessary.
852 */
853 if (nb == 0) {
854 UFS_LOCK(ump);
855 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]);
856 error = ffs_alloc(ip,
857 lbn, pref, (int)fs->fs_bsize, flags, cred, &newb);
858 if (error) {
859 brelse(bp);
860 goto fail;
861 }
862 nb = newb;
863 *allocblk++ = nb;
864 *lbns_remfree++ = lbn;
865 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
866 nbp->b_blkno = fsbtodb(fs, nb);
867 if (flags & BA_CLRBUF)
868 vfs_bio_clrbuf(nbp);
869 if (DOINGSOFTDEP(vp))
870 softdep_setup_allocindir_page(ip, lbn, bp,
871 indirs[i].in_off, nb, 0, nbp);
872 bap[indirs[i].in_off] = nb;
873 /*
874 * If required, write synchronously, otherwise use
875 * delayed write.
876 */
877 if (flags & IO_SYNC) {
878 bwrite(bp);
879 } else {
880 if (bp->b_bufsize == fs->fs_bsize)
881 bp->b_flags |= B_CLUSTEROK;
882 bdwrite(bp);
883 }
884 curthread->td_pflags &= saved_inbdflush;
885 *bpp = nbp;
886 return (0);
887 }
888 brelse(bp);
889 /*
890 * If requested clear invalid portions of the buffer. If we
891 * have to do a read-before-write (typical if BA_CLRBUF is set),
892 * try to do some read-ahead in the sequential case to reduce
893 * the number of I/O transactions.
894 */
895 if (flags & BA_CLRBUF) {
896 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
897 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
898 error = cluster_read(vp, ip->i_size, lbn,
899 (int)fs->fs_bsize, NOCRED,
900 MAXBSIZE, seqcount, &nbp);
901 } else {
902 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
903 }
904 if (error) {
905 brelse(nbp);
906 goto fail;
907 }
908 } else {
909 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
910 nbp->b_blkno = fsbtodb(fs, nb);
911 }
912 curthread->td_pflags &= saved_inbdflush;
913 *bpp = nbp;
914 return (0);
915 fail:
916 curthread->td_pflags &= saved_inbdflush;
917 /*
918 * If we have failed to allocate any blocks, simply return the error.
919 * This is the usual case and avoids the need to fsync the file.
920 */
921 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
922 return (error);
923 /*
924 * If we have failed part way through block allocation, we
925 * have to deallocate any indirect blocks that we have allocated.
926 * We have to fsync the file before we start to get rid of all
927 * of its dependencies so that we do not leave them dangling.
928 * We have to sync it at the end so that the soft updates code
929 * does not find any untracked changes. Although this is really
930 * slow, running out of disk space is not expected to be a common
931 * occurence. The error return from fsync is ignored as we already
932 * have an error to return to the user.
933 */
934 (void) ffs_syncvnode(vp, MNT_WAIT);
935 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
936 blkp < allocblk; blkp++, lbns_remfree++) {
937 /*
938 * We shall not leave the freed blocks on the vnode
939 * buffer object lists.
940 */
941 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
942 if (bp != NULL) {
943 bp->b_flags |= (B_INVAL | B_RELBUF);
944 bp->b_flags &= ~B_ASYNC;
945 brelse(bp);
946 }
947 deallocated += fs->fs_bsize;
948 }
949 if (allocib != NULL) {
950 *allocib = 0;
951 } else if (unwindidx >= 0) {
952 int r;
953
954 r = bread(vp, indirs[unwindidx].in_lbn,
955 (int)fs->fs_bsize, NOCRED, &bp);
956 if (r) {
957 panic("Could not unwind indirect block, error %d", r);
958 brelse(bp);
959 } else {
960 bap = (ufs2_daddr_t *)bp->b_data;
961 bap[indirs[unwindidx].in_off] = 0;
962 if (flags & IO_SYNC) {
963 bwrite(bp);
964 } else {
965 if (bp->b_bufsize == fs->fs_bsize)
966 bp->b_flags |= B_CLUSTEROK;
967 bdwrite(bp);
968 }
969 }
970 }
971 if (deallocated) {
972 #ifdef QUOTA
973 /*
974 * Restore user's disk quota because allocation failed.
975 */
976 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
977 #endif
978 dp->di_blocks -= btodb(deallocated);
979 ip->i_flag |= IN_CHANGE | IN_UPDATE;
980 }
981 (void) ffs_syncvnode(vp, MNT_WAIT);
982 /*
983 * After the buffers are invalidated and on-disk pointers are
984 * cleared, free the blocks.
985 */
986 for (blkp = allociblk; blkp < allocblk; blkp++) {
987 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
988 ip->i_number);
989 }
990 return (error);
991 }
Cache object: 3c4edd06d5929e3a2fb834855354c1f0
|