1 /*-
2 * Copyright (c) 2002 Networks Associates Technology, Inc.
3 * All rights reserved.
4 *
5 * This software was developed for the FreeBSD Project by Marshall
6 * Kirk McKusick and Network Associates Laboratories, the Security
7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9 * research program
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * Copyright (c) 1982, 1986, 1989, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 4. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
60 */
61
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD$");
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/bio.h>
68 #include <sys/buf.h>
69 #include <sys/lock.h>
70 #include <sys/mount.h>
71 #include <sys/vnode.h>
72
73 #include <ufs/ufs/quota.h>
74 #include <ufs/ufs/inode.h>
75 #include <ufs/ufs/ufs_extern.h>
76 #include <ufs/ufs/extattr.h>
77 #include <ufs/ufs/ufsmount.h>
78
79 #include <ufs/ffs/fs.h>
80 #include <ufs/ffs/ffs_extern.h>
81
82 /*
83 * Balloc defines the structure of filesystem storage
84 * by allocating the physical blocks on a device given
85 * the inode and the logical block number in a file.
86 * This is the allocation strategy for UFS1. Below is
87 * the allocation strategy for UFS2.
88 */
89 int
90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
91 struct ucred *cred, int flags, struct buf **bpp)
92 {
93 struct inode *ip;
94 struct ufs1_dinode *dp;
95 ufs_lbn_t lbn, lastlbn;
96 struct fs *fs;
97 ufs1_daddr_t nb;
98 struct buf *bp, *nbp;
99 struct ufsmount *ump;
100 struct indir indirs[NIADDR + 2];
101 int deallocated, osize, nsize, num, i, error;
102 ufs2_daddr_t newb;
103 ufs1_daddr_t *bap, pref;
104 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
105 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
106 int unwindidx = -1;
107 int saved_inbdflush;
108
109 ip = VTOI(vp);
110 dp = ip->i_din1;
111 fs = ip->i_fs;
112 ump = ip->i_ump;
113 lbn = lblkno(fs, startoffset);
114 size = blkoff(fs, startoffset) + size;
115 if (size > fs->fs_bsize)
116 panic("ffs_balloc_ufs1: blk too big");
117 *bpp = NULL;
118 if (flags & IO_EXT)
119 return (EOPNOTSUPP);
120 if (lbn < 0)
121 return (EFBIG);
122
123 /*
124 * If the next write will extend the file into a new block,
125 * and the file is currently composed of a fragment
126 * this fragment has to be extended to be a full block.
127 */
128 lastlbn = lblkno(fs, ip->i_size);
129 if (lastlbn < NDADDR && lastlbn < lbn) {
130 nb = lastlbn;
131 osize = blksize(fs, ip, nb);
132 if (osize < fs->fs_bsize && osize > 0) {
133 UFS_LOCK(ump);
134 error = ffs_realloccg(ip, nb, dp->di_db[nb],
135 ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
136 &dp->di_db[0]), osize, (int)fs->fs_bsize, cred, &bp);
137 if (error)
138 return (error);
139 if (DOINGSOFTDEP(vp))
140 softdep_setup_allocdirect(ip, nb,
141 dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
142 fs->fs_bsize, osize, bp);
143 ip->i_size = smalllblktosize(fs, nb + 1);
144 dp->di_size = ip->i_size;
145 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
146 ip->i_flag |= IN_CHANGE | IN_UPDATE;
147 if (flags & IO_SYNC)
148 bwrite(bp);
149 else
150 bawrite(bp);
151 }
152 }
153 /*
154 * The first NDADDR blocks are direct blocks
155 */
156 if (lbn < NDADDR) {
157 if (flags & BA_METAONLY)
158 panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
159 nb = dp->di_db[lbn];
160 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
161 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
162 if (error) {
163 brelse(bp);
164 return (error);
165 }
166 bp->b_blkno = fsbtodb(fs, nb);
167 *bpp = bp;
168 return (0);
169 }
170 if (nb != 0) {
171 /*
172 * Consider need to reallocate a fragment.
173 */
174 osize = fragroundup(fs, blkoff(fs, ip->i_size));
175 nsize = fragroundup(fs, size);
176 if (nsize <= osize) {
177 error = bread(vp, lbn, osize, NOCRED, &bp);
178 if (error) {
179 brelse(bp);
180 return (error);
181 }
182 bp->b_blkno = fsbtodb(fs, nb);
183 } else {
184 UFS_LOCK(ump);
185 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
186 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
187 &dp->di_db[0]), osize, nsize, cred, &bp);
188 if (error)
189 return (error);
190 if (DOINGSOFTDEP(vp))
191 softdep_setup_allocdirect(ip, lbn,
192 dbtofsb(fs, bp->b_blkno), nb,
193 nsize, osize, bp);
194 }
195 } else {
196 if (ip->i_size < smalllblktosize(fs, lbn + 1))
197 nsize = fragroundup(fs, size);
198 else
199 nsize = fs->fs_bsize;
200 UFS_LOCK(ump);
201 error = ffs_alloc(ip, lbn,
202 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
203 nsize, cred, &newb);
204 if (error)
205 return (error);
206 bp = getblk(vp, lbn, nsize, 0, 0, 0);
207 bp->b_blkno = fsbtodb(fs, newb);
208 if (flags & BA_CLRBUF)
209 vfs_bio_clrbuf(bp);
210 if (DOINGSOFTDEP(vp))
211 softdep_setup_allocdirect(ip, lbn, newb, 0,
212 nsize, 0, bp);
213 }
214 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
215 ip->i_flag |= IN_CHANGE | IN_UPDATE;
216 *bpp = bp;
217 return (0);
218 }
219 /*
220 * Determine the number of levels of indirection.
221 */
222 pref = 0;
223 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
224 return(error);
225 #ifdef INVARIANTS
226 if (num < 1)
227 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
228 #endif
229 saved_inbdflush = ~TDP_INBDFLUSH | (curthread->td_pflags &
230 TDP_INBDFLUSH);
231 curthread->td_pflags |= TDP_INBDFLUSH;
232 /*
233 * Fetch the first indirect block allocating if necessary.
234 */
235 --num;
236 nb = dp->di_ib[indirs[0].in_off];
237 allocib = NULL;
238 allocblk = allociblk;
239 lbns_remfree = lbns;
240 if (nb == 0) {
241 UFS_LOCK(ump);
242 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
243 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
244 cred, &newb)) != 0) {
245 curthread->td_pflags &= saved_inbdflush;
246 return (error);
247 }
248 nb = newb;
249 *allocblk++ = nb;
250 *lbns_remfree++ = indirs[1].in_lbn;
251 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
252 bp->b_blkno = fsbtodb(fs, nb);
253 vfs_bio_clrbuf(bp);
254 if (DOINGSOFTDEP(vp)) {
255 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
256 newb, 0, fs->fs_bsize, 0, bp);
257 bdwrite(bp);
258 } else {
259 /*
260 * Write synchronously so that indirect blocks
261 * never point at garbage.
262 */
263 if (DOINGASYNC(vp))
264 bdwrite(bp);
265 else if ((error = bwrite(bp)) != 0)
266 goto fail;
267 }
268 allocib = &dp->di_ib[indirs[0].in_off];
269 *allocib = nb;
270 ip->i_flag |= IN_CHANGE | IN_UPDATE;
271 }
272 /*
273 * Fetch through the indirect blocks, allocating as necessary.
274 */
275 for (i = 1;;) {
276 error = bread(vp,
277 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
278 if (error) {
279 brelse(bp);
280 goto fail;
281 }
282 bap = (ufs1_daddr_t *)bp->b_data;
283 nb = bap[indirs[i].in_off];
284 if (i == num)
285 break;
286 i += 1;
287 if (nb != 0) {
288 bqrelse(bp);
289 continue;
290 }
291 UFS_LOCK(ump);
292 if (pref == 0)
293 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
294 if ((error =
295 ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
296 brelse(bp);
297 goto fail;
298 }
299 nb = newb;
300 *allocblk++ = nb;
301 *lbns_remfree++ = indirs[i].in_lbn;
302 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
303 nbp->b_blkno = fsbtodb(fs, nb);
304 vfs_bio_clrbuf(nbp);
305 if (DOINGSOFTDEP(vp)) {
306 softdep_setup_allocindir_meta(nbp, ip, bp,
307 indirs[i - 1].in_off, nb);
308 bdwrite(nbp);
309 } else {
310 /*
311 * Write synchronously so that indirect blocks
312 * never point at garbage.
313 */
314 if ((error = bwrite(nbp)) != 0) {
315 brelse(bp);
316 goto fail;
317 }
318 }
319 bap[indirs[i - 1].in_off] = nb;
320 if (allocib == NULL && unwindidx < 0)
321 unwindidx = i - 1;
322 /*
323 * If required, write synchronously, otherwise use
324 * delayed write.
325 */
326 if (flags & IO_SYNC) {
327 bwrite(bp);
328 } else {
329 if (bp->b_bufsize == fs->fs_bsize)
330 bp->b_flags |= B_CLUSTEROK;
331 bdwrite(bp);
332 }
333 }
334 /*
335 * If asked only for the indirect block, then return it.
336 */
337 if (flags & BA_METAONLY) {
338 curthread->td_pflags &= saved_inbdflush;
339 *bpp = bp;
340 return (0);
341 }
342 /*
343 * Get the data block, allocating if necessary.
344 */
345 if (nb == 0) {
346 UFS_LOCK(ump);
347 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]);
348 error = ffs_alloc(ip,
349 lbn, pref, (int)fs->fs_bsize, cred, &newb);
350 if (error) {
351 brelse(bp);
352 goto fail;
353 }
354 nb = newb;
355 *allocblk++ = nb;
356 *lbns_remfree++ = lbn;
357 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
358 nbp->b_blkno = fsbtodb(fs, nb);
359 if (flags & BA_CLRBUF)
360 vfs_bio_clrbuf(nbp);
361 if (DOINGSOFTDEP(vp))
362 softdep_setup_allocindir_page(ip, lbn, bp,
363 indirs[i].in_off, nb, 0, nbp);
364 bap[indirs[i].in_off] = nb;
365 /*
366 * If required, write synchronously, otherwise use
367 * delayed write.
368 */
369 if (flags & IO_SYNC) {
370 bwrite(bp);
371 } else {
372 if (bp->b_bufsize == fs->fs_bsize)
373 bp->b_flags |= B_CLUSTEROK;
374 bdwrite(bp);
375 }
376 curthread->td_pflags &= saved_inbdflush;
377 *bpp = nbp;
378 return (0);
379 }
380 brelse(bp);
381 if (flags & BA_CLRBUF) {
382 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
383 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
384 error = cluster_read(vp, ip->i_size, lbn,
385 (int)fs->fs_bsize, NOCRED,
386 MAXBSIZE, seqcount, &nbp);
387 } else {
388 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
389 }
390 if (error) {
391 brelse(nbp);
392 goto fail;
393 }
394 } else {
395 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
396 nbp->b_blkno = fsbtodb(fs, nb);
397 }
398 curthread->td_pflags &= saved_inbdflush;
399 *bpp = nbp;
400 return (0);
401 fail:
402 curthread->td_pflags &= saved_inbdflush;
403 /*
404 * If we have failed to allocate any blocks, simply return the error.
405 * This is the usual case and avoids the need to fsync the file.
406 */
407 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
408 return (error);
409 /*
410 * If we have failed part way through block allocation, we
411 * have to deallocate any indirect blocks that we have allocated.
412 * We have to fsync the file before we start to get rid of all
413 * of its dependencies so that we do not leave them dangling.
414 * We have to sync it at the end so that the soft updates code
415 * does not find any untracked changes. Although this is really
416 * slow, running out of disk space is not expected to be a common
417 * occurence. The error return from fsync is ignored as we already
418 * have an error to return to the user.
419 */
420 (void) ffs_syncvnode(vp, MNT_WAIT);
421 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
422 blkp < allocblk; blkp++, lbns_remfree++) {
423 /*
424 * We shall not leave the freed blocks on the vnode
425 * buffer object lists.
426 */
427 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
428 if (bp != NULL) {
429 bp->b_flags |= (B_INVAL | B_RELBUF);
430 bp->b_flags &= ~B_ASYNC;
431 brelse(bp);
432 }
433 deallocated += fs->fs_bsize;
434 }
435 if (allocib != NULL) {
436 *allocib = 0;
437 } else if (unwindidx >= 0) {
438 int r;
439
440 r = bread(vp, indirs[unwindidx].in_lbn,
441 (int)fs->fs_bsize, NOCRED, &bp);
442 if (r) {
443 panic("Could not unwind indirect block, error %d", r);
444 brelse(bp);
445 } else {
446 bap = (ufs1_daddr_t *)bp->b_data;
447 bap[indirs[unwindidx].in_off] = 0;
448 if (flags & IO_SYNC) {
449 bwrite(bp);
450 } else {
451 if (bp->b_bufsize == fs->fs_bsize)
452 bp->b_flags |= B_CLUSTEROK;
453 bdwrite(bp);
454 }
455 }
456 }
457 if (deallocated) {
458 #ifdef QUOTA
459 /*
460 * Restore user's disk quota because allocation failed.
461 */
462 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
463 #endif
464 dp->di_blocks -= btodb(deallocated);
465 ip->i_flag |= IN_CHANGE | IN_UPDATE;
466 }
467 (void) ffs_syncvnode(vp, MNT_WAIT);
468 /*
469 * After the buffers are invalidated and on-disk pointers are
470 * cleared, free the blocks.
471 */
472 for (blkp = allociblk; blkp < allocblk; blkp++) {
473 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
474 ip->i_number);
475 }
476 return (error);
477 }
478
479 /*
480 * Balloc defines the structure of file system storage
481 * by allocating the physical blocks on a device given
482 * the inode and the logical block number in a file.
483 * This is the allocation strategy for UFS2. Above is
484 * the allocation strategy for UFS1.
485 */
486 int
487 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
488 struct ucred *cred, int flags, struct buf **bpp)
489 {
490 struct inode *ip;
491 struct ufs2_dinode *dp;
492 ufs_lbn_t lbn, lastlbn;
493 struct fs *fs;
494 struct buf *bp, *nbp;
495 struct ufsmount *ump;
496 struct indir indirs[NIADDR + 2];
497 ufs2_daddr_t nb, newb, *bap, pref;
498 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
499 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
500 int deallocated, osize, nsize, num, i, error;
501 int unwindidx = -1;
502 int saved_inbdflush;
503
504 ip = VTOI(vp);
505 dp = ip->i_din2;
506 fs = ip->i_fs;
507 ump = ip->i_ump;
508 lbn = lblkno(fs, startoffset);
509 size = blkoff(fs, startoffset) + size;
510 if (size > fs->fs_bsize)
511 panic("ffs_balloc_ufs2: blk too big");
512 *bpp = NULL;
513 if (lbn < 0)
514 return (EFBIG);
515
516 /*
517 * Check for allocating external data.
518 */
519 if (flags & IO_EXT) {
520 if (lbn >= NXADDR)
521 return (EFBIG);
522 /*
523 * If the next write will extend the data into a new block,
524 * and the data is currently composed of a fragment
525 * this fragment has to be extended to be a full block.
526 */
527 lastlbn = lblkno(fs, dp->di_extsize);
528 if (lastlbn < lbn) {
529 nb = lastlbn;
530 osize = sblksize(fs, dp->di_extsize, nb);
531 if (osize < fs->fs_bsize && osize > 0) {
532 UFS_LOCK(ump);
533 error = ffs_realloccg(ip, -1 - nb,
534 dp->di_extb[nb],
535 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
536 &dp->di_extb[0]), osize,
537 (int)fs->fs_bsize, cred, &bp);
538 if (error)
539 return (error);
540 if (DOINGSOFTDEP(vp))
541 softdep_setup_allocext(ip, nb,
542 dbtofsb(fs, bp->b_blkno),
543 dp->di_extb[nb],
544 fs->fs_bsize, osize, bp);
545 dp->di_extsize = smalllblktosize(fs, nb + 1);
546 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
547 bp->b_xflags |= BX_ALTDATA;
548 ip->i_flag |= IN_CHANGE | IN_UPDATE;
549 if (flags & IO_SYNC)
550 bwrite(bp);
551 else
552 bawrite(bp);
553 }
554 }
555 /*
556 * All blocks are direct blocks
557 */
558 if (flags & BA_METAONLY)
559 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
560 nb = dp->di_extb[lbn];
561 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
562 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
563 if (error) {
564 brelse(bp);
565 return (error);
566 }
567 bp->b_blkno = fsbtodb(fs, nb);
568 bp->b_xflags |= BX_ALTDATA;
569 *bpp = bp;
570 return (0);
571 }
572 if (nb != 0) {
573 /*
574 * Consider need to reallocate a fragment.
575 */
576 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
577 nsize = fragroundup(fs, size);
578 if (nsize <= osize) {
579 error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
580 if (error) {
581 brelse(bp);
582 return (error);
583 }
584 bp->b_blkno = fsbtodb(fs, nb);
585 bp->b_xflags |= BX_ALTDATA;
586 } else {
587 UFS_LOCK(ump);
588 error = ffs_realloccg(ip, -1 - lbn,
589 dp->di_extb[lbn],
590 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
591 &dp->di_extb[0]), osize, nsize, cred, &bp);
592 if (error)
593 return (error);
594 bp->b_xflags |= BX_ALTDATA;
595 if (DOINGSOFTDEP(vp))
596 softdep_setup_allocext(ip, lbn,
597 dbtofsb(fs, bp->b_blkno), nb,
598 nsize, osize, bp);
599 }
600 } else {
601 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
602 nsize = fragroundup(fs, size);
603 else
604 nsize = fs->fs_bsize;
605 UFS_LOCK(ump);
606 error = ffs_alloc(ip, lbn,
607 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
608 nsize, cred, &newb);
609 if (error)
610 return (error);
611 bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0);
612 bp->b_blkno = fsbtodb(fs, newb);
613 bp->b_xflags |= BX_ALTDATA;
614 if (flags & BA_CLRBUF)
615 vfs_bio_clrbuf(bp);
616 if (DOINGSOFTDEP(vp))
617 softdep_setup_allocext(ip, lbn, newb, 0,
618 nsize, 0, bp);
619 }
620 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
621 ip->i_flag |= IN_CHANGE | IN_UPDATE;
622 *bpp = bp;
623 return (0);
624 }
625 /*
626 * If the next write will extend the file into a new block,
627 * and the file is currently composed of a fragment
628 * this fragment has to be extended to be a full block.
629 */
630 lastlbn = lblkno(fs, ip->i_size);
631 if (lastlbn < NDADDR && lastlbn < lbn) {
632 nb = lastlbn;
633 osize = blksize(fs, ip, nb);
634 if (osize < fs->fs_bsize && osize > 0) {
635 UFS_LOCK(ump);
636 error = ffs_realloccg(ip, nb, dp->di_db[nb],
637 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
638 &dp->di_db[0]), osize, (int)fs->fs_bsize,
639 cred, &bp);
640 if (error)
641 return (error);
642 if (DOINGSOFTDEP(vp))
643 softdep_setup_allocdirect(ip, nb,
644 dbtofsb(fs, bp->b_blkno),
645 dp->di_db[nb],
646 fs->fs_bsize, osize, bp);
647 ip->i_size = smalllblktosize(fs, nb + 1);
648 dp->di_size = ip->i_size;
649 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
650 ip->i_flag |= IN_CHANGE | IN_UPDATE;
651 if (flags & IO_SYNC)
652 bwrite(bp);
653 else
654 bawrite(bp);
655 }
656 }
657 /*
658 * The first NDADDR blocks are direct blocks
659 */
660 if (lbn < NDADDR) {
661 if (flags & BA_METAONLY)
662 panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
663 nb = dp->di_db[lbn];
664 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
665 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
666 if (error) {
667 brelse(bp);
668 return (error);
669 }
670 bp->b_blkno = fsbtodb(fs, nb);
671 *bpp = bp;
672 return (0);
673 }
674 if (nb != 0) {
675 /*
676 * Consider need to reallocate a fragment.
677 */
678 osize = fragroundup(fs, blkoff(fs, ip->i_size));
679 nsize = fragroundup(fs, size);
680 if (nsize <= osize) {
681 error = bread(vp, lbn, osize, NOCRED, &bp);
682 if (error) {
683 brelse(bp);
684 return (error);
685 }
686 bp->b_blkno = fsbtodb(fs, nb);
687 } else {
688 UFS_LOCK(ump);
689 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
690 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
691 &dp->di_db[0]), osize, nsize, cred, &bp);
692 if (error)
693 return (error);
694 if (DOINGSOFTDEP(vp))
695 softdep_setup_allocdirect(ip, lbn,
696 dbtofsb(fs, bp->b_blkno), nb,
697 nsize, osize, bp);
698 }
699 } else {
700 if (ip->i_size < smalllblktosize(fs, lbn + 1))
701 nsize = fragroundup(fs, size);
702 else
703 nsize = fs->fs_bsize;
704 UFS_LOCK(ump);
705 error = ffs_alloc(ip, lbn,
706 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
707 &dp->di_db[0]), nsize, cred, &newb);
708 if (error)
709 return (error);
710 bp = getblk(vp, lbn, nsize, 0, 0, 0);
711 bp->b_blkno = fsbtodb(fs, newb);
712 if (flags & BA_CLRBUF)
713 vfs_bio_clrbuf(bp);
714 if (DOINGSOFTDEP(vp))
715 softdep_setup_allocdirect(ip, lbn, newb, 0,
716 nsize, 0, bp);
717 }
718 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
719 ip->i_flag |= IN_CHANGE | IN_UPDATE;
720 *bpp = bp;
721 return (0);
722 }
723 /*
724 * Determine the number of levels of indirection.
725 */
726 pref = 0;
727 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
728 return(error);
729 #ifdef INVARIANTS
730 if (num < 1)
731 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
732 #endif
733 saved_inbdflush = ~TDP_INBDFLUSH | (curthread->td_pflags &
734 TDP_INBDFLUSH);
735 curthread->td_pflags |= TDP_INBDFLUSH;
736 /*
737 * Fetch the first indirect block allocating if necessary.
738 */
739 --num;
740 nb = dp->di_ib[indirs[0].in_off];
741 allocib = NULL;
742 allocblk = allociblk;
743 lbns_remfree = lbns;
744 if (nb == 0) {
745 UFS_LOCK(ump);
746 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
747 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
748 cred, &newb)) != 0) {
749 curthread->td_pflags &= saved_inbdflush;
750 return (error);
751 }
752 nb = newb;
753 *allocblk++ = nb;
754 *lbns_remfree++ = indirs[1].in_lbn;
755 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
756 bp->b_blkno = fsbtodb(fs, nb);
757 vfs_bio_clrbuf(bp);
758 if (DOINGSOFTDEP(vp)) {
759 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
760 newb, 0, fs->fs_bsize, 0, bp);
761 bdwrite(bp);
762 } else {
763 /*
764 * Write synchronously so that indirect blocks
765 * never point at garbage.
766 */
767 if (DOINGASYNC(vp))
768 bdwrite(bp);
769 else if ((error = bwrite(bp)) != 0)
770 goto fail;
771 }
772 allocib = &dp->di_ib[indirs[0].in_off];
773 *allocib = nb;
774 ip->i_flag |= IN_CHANGE | IN_UPDATE;
775 }
776 /*
777 * Fetch through the indirect blocks, allocating as necessary.
778 */
779 for (i = 1;;) {
780 error = bread(vp,
781 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
782 if (error) {
783 brelse(bp);
784 goto fail;
785 }
786 bap = (ufs2_daddr_t *)bp->b_data;
787 nb = bap[indirs[i].in_off];
788 if (i == num)
789 break;
790 i += 1;
791 if (nb != 0) {
792 bqrelse(bp);
793 continue;
794 }
795 UFS_LOCK(ump);
796 if (pref == 0)
797 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
798 if ((error =
799 ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
800 brelse(bp);
801 goto fail;
802 }
803 nb = newb;
804 *allocblk++ = nb;
805 *lbns_remfree++ = indirs[i].in_lbn;
806 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
807 nbp->b_blkno = fsbtodb(fs, nb);
808 vfs_bio_clrbuf(nbp);
809 if (DOINGSOFTDEP(vp)) {
810 softdep_setup_allocindir_meta(nbp, ip, bp,
811 indirs[i - 1].in_off, nb);
812 bdwrite(nbp);
813 } else {
814 /*
815 * Write synchronously so that indirect blocks
816 * never point at garbage.
817 */
818 if ((error = bwrite(nbp)) != 0) {
819 brelse(bp);
820 goto fail;
821 }
822 }
823 bap[indirs[i - 1].in_off] = nb;
824 if (allocib == NULL && unwindidx < 0)
825 unwindidx = i - 1;
826 /*
827 * If required, write synchronously, otherwise use
828 * delayed write.
829 */
830 if (flags & IO_SYNC) {
831 bwrite(bp);
832 } else {
833 if (bp->b_bufsize == fs->fs_bsize)
834 bp->b_flags |= B_CLUSTEROK;
835 bdwrite(bp);
836 }
837 }
838 /*
839 * If asked only for the indirect block, then return it.
840 */
841 if (flags & BA_METAONLY) {
842 curthread->td_pflags &= saved_inbdflush;
843 *bpp = bp;
844 return (0);
845 }
846 /*
847 * Get the data block, allocating if necessary.
848 */
849 if (nb == 0) {
850 UFS_LOCK(ump);
851 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]);
852 error = ffs_alloc(ip,
853 lbn, pref, (int)fs->fs_bsize, cred, &newb);
854 if (error) {
855 brelse(bp);
856 goto fail;
857 }
858 nb = newb;
859 *allocblk++ = nb;
860 *lbns_remfree++ = lbn;
861 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
862 nbp->b_blkno = fsbtodb(fs, nb);
863 if (flags & BA_CLRBUF)
864 vfs_bio_clrbuf(nbp);
865 if (DOINGSOFTDEP(vp))
866 softdep_setup_allocindir_page(ip, lbn, bp,
867 indirs[i].in_off, nb, 0, nbp);
868 bap[indirs[i].in_off] = nb;
869 /*
870 * If required, write synchronously, otherwise use
871 * delayed write.
872 */
873 if (flags & IO_SYNC) {
874 bwrite(bp);
875 } else {
876 if (bp->b_bufsize == fs->fs_bsize)
877 bp->b_flags |= B_CLUSTEROK;
878 bdwrite(bp);
879 }
880 curthread->td_pflags &= saved_inbdflush;
881 *bpp = nbp;
882 return (0);
883 }
884 brelse(bp);
885 /*
886 * If requested clear invalid portions of the buffer. If we
887 * have to do a read-before-write (typical if BA_CLRBUF is set),
888 * try to do some read-ahead in the sequential case to reduce
889 * the number of I/O transactions.
890 */
891 if (flags & BA_CLRBUF) {
892 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
893 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
894 error = cluster_read(vp, ip->i_size, lbn,
895 (int)fs->fs_bsize, NOCRED,
896 MAXBSIZE, seqcount, &nbp);
897 } else {
898 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
899 }
900 if (error) {
901 brelse(nbp);
902 goto fail;
903 }
904 } else {
905 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
906 nbp->b_blkno = fsbtodb(fs, nb);
907 }
908 curthread->td_pflags &= saved_inbdflush;
909 *bpp = nbp;
910 return (0);
911 fail:
912 curthread->td_pflags &= saved_inbdflush;
913 /*
914 * If we have failed to allocate any blocks, simply return the error.
915 * This is the usual case and avoids the need to fsync the file.
916 */
917 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
918 return (error);
919 /*
920 * If we have failed part way through block allocation, we
921 * have to deallocate any indirect blocks that we have allocated.
922 * We have to fsync the file before we start to get rid of all
923 * of its dependencies so that we do not leave them dangling.
924 * We have to sync it at the end so that the soft updates code
925 * does not find any untracked changes. Although this is really
926 * slow, running out of disk space is not expected to be a common
927 * occurence. The error return from fsync is ignored as we already
928 * have an error to return to the user.
929 */
930 (void) ffs_syncvnode(vp, MNT_WAIT);
931 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
932 blkp < allocblk; blkp++, lbns_remfree++) {
933 /*
934 * We shall not leave the freed blocks on the vnode
935 * buffer object lists.
936 */
937 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
938 if (bp != NULL) {
939 bp->b_flags |= (B_INVAL | B_RELBUF);
940 bp->b_flags &= ~B_ASYNC;
941 brelse(bp);
942 }
943 deallocated += fs->fs_bsize;
944 }
945 if (allocib != NULL) {
946 *allocib = 0;
947 } else if (unwindidx >= 0) {
948 int r;
949
950 r = bread(vp, indirs[unwindidx].in_lbn,
951 (int)fs->fs_bsize, NOCRED, &bp);
952 if (r) {
953 panic("Could not unwind indirect block, error %d", r);
954 brelse(bp);
955 } else {
956 bap = (ufs2_daddr_t *)bp->b_data;
957 bap[indirs[unwindidx].in_off] = 0;
958 if (flags & IO_SYNC) {
959 bwrite(bp);
960 } else {
961 if (bp->b_bufsize == fs->fs_bsize)
962 bp->b_flags |= B_CLUSTEROK;
963 bdwrite(bp);
964 }
965 }
966 }
967 if (deallocated) {
968 #ifdef QUOTA
969 /*
970 * Restore user's disk quota because allocation failed.
971 */
972 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
973 #endif
974 dp->di_blocks -= btodb(deallocated);
975 ip->i_flag |= IN_CHANGE | IN_UPDATE;
976 }
977 (void) ffs_syncvnode(vp, MNT_WAIT);
978 /*
979 * After the buffers are invalidated and on-disk pointers are
980 * cleared, free the blocks.
981 */
982 for (blkp = allociblk; blkp < allocblk; blkp++) {
983 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
984 ip->i_number);
985 }
986 return (error);
987 }
Cache object: 0f1926d901a5f2dfc93d1b0505037209
|