1 /*-
2 * Copyright (c) 2002 Networks Associates Technology, Inc.
3 * All rights reserved.
4 *
5 * This software was developed for the FreeBSD Project by Marshall
6 * Kirk McKusick and Network Associates Laboratories, the Security
7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9 * research program
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * Copyright (c) 1982, 1986, 1989, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 4. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
60 */
61
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD$");
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/bio.h>
68 #include <sys/buf.h>
69 #include <sys/lock.h>
70 #include <sys/mount.h>
71 #include <sys/vnode.h>
72 #include <sys/vmmeter.h>
73
74 #include <ufs/ufs/quota.h>
75 #include <ufs/ufs/inode.h>
76 #include <ufs/ufs/ufs_extern.h>
77 #include <ufs/ufs/extattr.h>
78 #include <ufs/ufs/ufsmount.h>
79
80 #include <ufs/ffs/fs.h>
81 #include <ufs/ffs/ffs_extern.h>
82
83 /*
84 * Balloc defines the structure of filesystem storage
85 * by allocating the physical blocks on a device given
86 * the inode and the logical block number in a file.
87 * This is the allocation strategy for UFS1. Below is
88 * the allocation strategy for UFS2.
89 */
90 int
91 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
92 struct ucred *cred, int flags, struct buf **bpp)
93 {
94 struct inode *ip;
95 struct ufs1_dinode *dp;
96 ufs_lbn_t lbn, lastlbn;
97 struct fs *fs;
98 ufs1_daddr_t nb;
99 struct buf *bp, *nbp;
100 struct ufsmount *ump;
101 struct indir indirs[NIADDR + 2];
102 int deallocated, osize, nsize, num, i, error;
103 ufs2_daddr_t newb;
104 ufs1_daddr_t *bap, pref;
105 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
106 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
107 int unwindidx = -1;
108 int saved_inbdflush;
109 static struct timeval lastfail;
110 static int curfail;
111 int gbflags, reclaimed;
112
113 ip = VTOI(vp);
114 dp = ip->i_din1;
115 fs = ITOFS(ip);
116 ump = ITOUMP(ip);
117 lbn = lblkno(fs, startoffset);
118 size = blkoff(fs, startoffset) + size;
119 reclaimed = 0;
120 if (size > fs->fs_bsize)
121 panic("ffs_balloc_ufs1: blk too big");
122 *bpp = NULL;
123 if (flags & IO_EXT)
124 return (EOPNOTSUPP);
125 if (lbn < 0)
126 return (EFBIG);
127 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
128
129 if (DOINGSOFTDEP(vp))
130 softdep_prealloc(vp, MNT_WAIT);
131 /*
132 * If the next write will extend the file into a new block,
133 * and the file is currently composed of a fragment
134 * this fragment has to be extended to be a full block.
135 */
136 lastlbn = lblkno(fs, ip->i_size);
137 if (lastlbn < NDADDR && lastlbn < lbn) {
138 nb = lastlbn;
139 osize = blksize(fs, ip, nb);
140 if (osize < fs->fs_bsize && osize > 0) {
141 UFS_LOCK(ump);
142 error = ffs_realloccg(ip, nb, dp->di_db[nb],
143 ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
144 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
145 cred, &bp);
146 if (error)
147 return (error);
148 if (DOINGSOFTDEP(vp))
149 softdep_setup_allocdirect(ip, nb,
150 dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
151 fs->fs_bsize, osize, bp);
152 ip->i_size = smalllblktosize(fs, nb + 1);
153 dp->di_size = ip->i_size;
154 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
155 ip->i_flag |= IN_SIZEMOD | IN_CHANGE | IN_UPDATE |
156 IN_IBLKDATA;
157 if (flags & IO_SYNC)
158 bwrite(bp);
159 else
160 bawrite(bp);
161 }
162 }
163 /*
164 * The first NDADDR blocks are direct blocks
165 */
166 if (lbn < NDADDR) {
167 if (flags & BA_METAONLY)
168 panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
169 nb = dp->di_db[lbn];
170 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
171 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
172 if (error) {
173 brelse(bp);
174 return (error);
175 }
176 bp->b_blkno = fsbtodb(fs, nb);
177 *bpp = bp;
178 return (0);
179 }
180 if (nb != 0) {
181 /*
182 * Consider need to reallocate a fragment.
183 */
184 osize = fragroundup(fs, blkoff(fs, ip->i_size));
185 nsize = fragroundup(fs, size);
186 if (nsize <= osize) {
187 error = bread(vp, lbn, osize, NOCRED, &bp);
188 if (error) {
189 brelse(bp);
190 return (error);
191 }
192 bp->b_blkno = fsbtodb(fs, nb);
193 } else {
194 UFS_LOCK(ump);
195 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
196 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
197 &dp->di_db[0]), osize, nsize, flags,
198 cred, &bp);
199 if (error)
200 return (error);
201 if (DOINGSOFTDEP(vp))
202 softdep_setup_allocdirect(ip, lbn,
203 dbtofsb(fs, bp->b_blkno), nb,
204 nsize, osize, bp);
205 }
206 } else {
207 if (ip->i_size < smalllblktosize(fs, lbn + 1))
208 nsize = fragroundup(fs, size);
209 else
210 nsize = fs->fs_bsize;
211 UFS_LOCK(ump);
212 error = ffs_alloc(ip, lbn,
213 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
214 nsize, flags, cred, &newb);
215 if (error)
216 return (error);
217 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
218 bp->b_blkno = fsbtodb(fs, newb);
219 if (flags & BA_CLRBUF)
220 vfs_bio_clrbuf(bp);
221 if (DOINGSOFTDEP(vp))
222 softdep_setup_allocdirect(ip, lbn, newb, 0,
223 nsize, 0, bp);
224 }
225 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
226 ip->i_flag |= IN_CHANGE | IN_UPDATE | IN_IBLKDATA;
227 *bpp = bp;
228 return (0);
229 }
230 /*
231 * Determine the number of levels of indirection.
232 */
233 pref = 0;
234 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
235 return(error);
236 #ifdef INVARIANTS
237 if (num < 1)
238 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
239 #endif
240 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
241 /*
242 * Fetch the first indirect block allocating if necessary.
243 */
244 --num;
245 nb = dp->di_ib[indirs[0].in_off];
246 allocib = NULL;
247 allocblk = allociblk;
248 lbns_remfree = lbns;
249 if (nb == 0) {
250 UFS_LOCK(ump);
251 pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
252 (ufs1_daddr_t *)0);
253 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
254 flags, cred, &newb)) != 0) {
255 curthread_pflags_restore(saved_inbdflush);
256 return (error);
257 }
258 pref = newb + fs->fs_frag;
259 nb = newb;
260 MPASS(allocblk < allociblk + nitems(allociblk));
261 MPASS(lbns_remfree < lbns + nitems(lbns));
262 *allocblk++ = nb;
263 *lbns_remfree++ = indirs[1].in_lbn;
264 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
265 bp->b_blkno = fsbtodb(fs, nb);
266 vfs_bio_clrbuf(bp);
267 if (DOINGSOFTDEP(vp)) {
268 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
269 newb, 0, fs->fs_bsize, 0, bp);
270 bdwrite(bp);
271 } else {
272 /*
273 * Write synchronously so that indirect blocks
274 * never point at garbage.
275 */
276 if (DOINGASYNC(vp))
277 bdwrite(bp);
278 else if ((error = bwrite(bp)) != 0)
279 goto fail;
280 }
281 allocib = &dp->di_ib[indirs[0].in_off];
282 *allocib = nb;
283 ip->i_flag |= IN_CHANGE | IN_UPDATE | IN_IBLKDATA;
284 }
285 /*
286 * Fetch through the indirect blocks, allocating as necessary.
287 */
288 retry:
289 for (i = 1;;) {
290 error = bread(vp,
291 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
292 if (error) {
293 brelse(bp);
294 goto fail;
295 }
296 bap = (ufs1_daddr_t *)bp->b_data;
297 nb = bap[indirs[i].in_off];
298 if (i == num)
299 break;
300 i += 1;
301 if (nb != 0) {
302 bqrelse(bp);
303 continue;
304 }
305 UFS_LOCK(ump);
306 /*
307 * If parent indirect has just been allocated, try to cluster
308 * immediately following it.
309 */
310 if (pref == 0)
311 pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
312 (ufs1_daddr_t *)0);
313 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
314 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
315 brelse(bp);
316 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
317 UFS_LOCK(ump);
318 softdep_request_cleanup(fs, vp, cred,
319 FLUSH_BLOCKS_WAIT);
320 UFS_UNLOCK(ump);
321 goto retry;
322 }
323 if (ppsratecheck(&lastfail, &curfail, 1)) {
324 ffs_fserr(fs, ip->i_number, "filesystem full");
325 uprintf("\n%s: write failed, filesystem "
326 "is full\n", fs->fs_fsmnt);
327 }
328 goto fail;
329 }
330 pref = newb + fs->fs_frag;
331 nb = newb;
332 MPASS(allocblk < allociblk + nitems(allociblk));
333 MPASS(lbns_remfree < lbns + nitems(lbns));
334 *allocblk++ = nb;
335 *lbns_remfree++ = indirs[i].in_lbn;
336 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
337 nbp->b_blkno = fsbtodb(fs, nb);
338 vfs_bio_clrbuf(nbp);
339 if (DOINGSOFTDEP(vp)) {
340 softdep_setup_allocindir_meta(nbp, ip, bp,
341 indirs[i - 1].in_off, nb);
342 bdwrite(nbp);
343 } else {
344 /*
345 * Write synchronously so that indirect blocks
346 * never point at garbage.
347 */
348 if ((error = bwrite(nbp)) != 0) {
349 brelse(bp);
350 goto fail;
351 }
352 }
353 bap[indirs[i - 1].in_off] = nb;
354 if (allocib == NULL && unwindidx < 0)
355 unwindidx = i - 1;
356 /*
357 * If required, write synchronously, otherwise use
358 * delayed write.
359 */
360 if (flags & IO_SYNC) {
361 bwrite(bp);
362 } else {
363 if (bp->b_bufsize == fs->fs_bsize)
364 bp->b_flags |= B_CLUSTEROK;
365 bdwrite(bp);
366 }
367 }
368 /*
369 * If asked only for the indirect block, then return it.
370 */
371 if (flags & BA_METAONLY) {
372 curthread_pflags_restore(saved_inbdflush);
373 *bpp = bp;
374 return (0);
375 }
376 /*
377 * Get the data block, allocating if necessary.
378 */
379 if (nb == 0) {
380 UFS_LOCK(ump);
381 /*
382 * If allocating metadata at the front of the cylinder
383 * group and parent indirect block has just been allocated,
384 * then cluster next to it if it is the first indirect in
385 * the file. Otherwise it has been allocated in the metadata
386 * area, so we want to find our own place out in the data area.
387 */
388 if (pref == 0 || (lbn > NDADDR && fs->fs_metaspace != 0))
389 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
390 &bap[0]);
391 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
392 flags | IO_BUFLOCKED, cred, &newb);
393 if (error) {
394 brelse(bp);
395 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
396 UFS_LOCK(ump);
397 softdep_request_cleanup(fs, vp, cred,
398 FLUSH_BLOCKS_WAIT);
399 UFS_UNLOCK(ump);
400 goto retry;
401 }
402 if (ppsratecheck(&lastfail, &curfail, 1)) {
403 ffs_fserr(fs, ip->i_number, "filesystem full");
404 uprintf("\n%s: write failed, filesystem "
405 "is full\n", fs->fs_fsmnt);
406 }
407 goto fail;
408 }
409 nb = newb;
410 MPASS(allocblk < allociblk + nitems(allociblk));
411 MPASS(lbns_remfree < lbns + nitems(lbns));
412 *allocblk++ = nb;
413 *lbns_remfree++ = lbn;
414 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
415 nbp->b_blkno = fsbtodb(fs, nb);
416 if (flags & BA_CLRBUF)
417 vfs_bio_clrbuf(nbp);
418 if (DOINGSOFTDEP(vp))
419 softdep_setup_allocindir_page(ip, lbn, bp,
420 indirs[i].in_off, nb, 0, nbp);
421 bap[indirs[i].in_off] = nb;
422 /*
423 * If required, write synchronously, otherwise use
424 * delayed write.
425 */
426 if (flags & IO_SYNC) {
427 bwrite(bp);
428 } else {
429 if (bp->b_bufsize == fs->fs_bsize)
430 bp->b_flags |= B_CLUSTEROK;
431 bdwrite(bp);
432 }
433 curthread_pflags_restore(saved_inbdflush);
434 *bpp = nbp;
435 return (0);
436 }
437 brelse(bp);
438 if (flags & BA_CLRBUF) {
439 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
440 if (seqcount != 0 &&
441 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
442 !(vm_page_count_severe() || buf_dirty_count_severe())) {
443 error = cluster_read(vp, ip->i_size, lbn,
444 (int)fs->fs_bsize, NOCRED,
445 MAXBSIZE, seqcount, gbflags, &nbp);
446 } else {
447 error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
448 gbflags, &nbp);
449 }
450 if (error) {
451 brelse(nbp);
452 goto fail;
453 }
454 } else {
455 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
456 nbp->b_blkno = fsbtodb(fs, nb);
457 }
458 curthread_pflags_restore(saved_inbdflush);
459 *bpp = nbp;
460 return (0);
461 fail:
462 curthread_pflags_restore(saved_inbdflush);
463 /*
464 * If we have failed to allocate any blocks, simply return the error.
465 * This is the usual case and avoids the need to fsync the file.
466 */
467 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
468 return (error);
469 /*
470 * If we have failed part way through block allocation, we
471 * have to deallocate any indirect blocks that we have allocated.
472 * We have to fsync the file before we start to get rid of all
473 * of its dependencies so that we do not leave them dangling.
474 * We have to sync it at the end so that the soft updates code
475 * does not find any untracked changes. Although this is really
476 * slow, running out of disk space is not expected to be a common
477 * occurrence. The error return from fsync is ignored as we already
478 * have an error to return to the user.
479 *
480 * XXX Still have to journal the free below
481 */
482 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
483 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
484 blkp < allocblk; blkp++, lbns_remfree++) {
485 /*
486 * We shall not leave the freed blocks on the vnode
487 * buffer object lists.
488 */
489 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
490 GB_NOCREAT | GB_UNMAPPED);
491 if (bp != NULL) {
492 KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
493 ("mismatch1 l %jd %jd b %ju %ju",
494 (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
495 (uintmax_t)bp->b_blkno,
496 (uintmax_t)fsbtodb(fs, *blkp)));
497 bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
498 bp->b_flags &= ~(B_ASYNC | B_CACHE);
499 brelse(bp);
500 }
501 deallocated += fs->fs_bsize;
502 }
503 if (allocib != NULL) {
504 *allocib = 0;
505 } else if (unwindidx >= 0) {
506 int r;
507
508 r = bread(vp, indirs[unwindidx].in_lbn,
509 (int)fs->fs_bsize, NOCRED, &bp);
510 if (r) {
511 panic("Could not unwind indirect block, error %d", r);
512 brelse(bp);
513 } else {
514 bap = (ufs1_daddr_t *)bp->b_data;
515 bap[indirs[unwindidx].in_off] = 0;
516 if (flags & IO_SYNC) {
517 bwrite(bp);
518 } else {
519 if (bp->b_bufsize == fs->fs_bsize)
520 bp->b_flags |= B_CLUSTEROK;
521 bdwrite(bp);
522 }
523 }
524 }
525 if (deallocated) {
526 #ifdef QUOTA
527 /*
528 * Restore user's disk quota because allocation failed.
529 */
530 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
531 #endif
532 dp->di_blocks -= btodb(deallocated);
533 ip->i_flag |= IN_CHANGE | IN_UPDATE;
534 }
535 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
536 /*
537 * After the buffers are invalidated and on-disk pointers are
538 * cleared, free the blocks.
539 */
540 for (blkp = allociblk; blkp < allocblk; blkp++) {
541 #ifdef INVARIANTS
542 if (blkp == allociblk)
543 lbns_remfree = lbns;
544 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
545 GB_NOCREAT | GB_UNMAPPED);
546 if (bp != NULL) {
547 panic("zombie1 %jd %ju %ju",
548 (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
549 (uintmax_t)fsbtodb(fs, *blkp));
550 }
551 lbns_remfree++;
552 #endif
553 ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
554 ip->i_number, vp->v_type, NULL);
555 }
556 return (error);
557 }
558
559 /*
560 * Balloc defines the structure of file system storage
561 * by allocating the physical blocks on a device given
562 * the inode and the logical block number in a file.
563 * This is the allocation strategy for UFS2. Above is
564 * the allocation strategy for UFS1.
565 */
566 int
567 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
568 struct ucred *cred, int flags, struct buf **bpp)
569 {
570 struct inode *ip;
571 struct ufs2_dinode *dp;
572 ufs_lbn_t lbn, lastlbn;
573 struct fs *fs;
574 struct buf *bp, *nbp;
575 struct ufsmount *ump;
576 struct indir indirs[NIADDR + 2];
577 ufs2_daddr_t nb, newb, *bap, pref;
578 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
579 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
580 int deallocated, osize, nsize, num, i, error;
581 int unwindidx = -1;
582 int saved_inbdflush;
583 static struct timeval lastfail;
584 static int curfail;
585 int gbflags, reclaimed;
586
587 ip = VTOI(vp);
588 dp = ip->i_din2;
589 fs = ITOFS(ip);
590 ump = ITOUMP(ip);
591 lbn = lblkno(fs, startoffset);
592 size = blkoff(fs, startoffset) + size;
593 reclaimed = 0;
594 if (size > fs->fs_bsize)
595 panic("ffs_balloc_ufs2: blk too big");
596 *bpp = NULL;
597 if (lbn < 0)
598 return (EFBIG);
599 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
600
601 if (DOINGSOFTDEP(vp))
602 softdep_prealloc(vp, MNT_WAIT);
603
604 /*
605 * Check for allocating external data.
606 */
607 if (flags & IO_EXT) {
608 if (lbn >= NXADDR)
609 return (EFBIG);
610 /*
611 * If the next write will extend the data into a new block,
612 * and the data is currently composed of a fragment
613 * this fragment has to be extended to be a full block.
614 */
615 lastlbn = lblkno(fs, dp->di_extsize);
616 if (lastlbn < lbn) {
617 nb = lastlbn;
618 osize = sblksize(fs, dp->di_extsize, nb);
619 if (osize < fs->fs_bsize && osize > 0) {
620 UFS_LOCK(ump);
621 error = ffs_realloccg(ip, -1 - nb,
622 dp->di_extb[nb],
623 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
624 &dp->di_extb[0]), osize,
625 (int)fs->fs_bsize, flags, cred, &bp);
626 if (error)
627 return (error);
628 if (DOINGSOFTDEP(vp))
629 softdep_setup_allocext(ip, nb,
630 dbtofsb(fs, bp->b_blkno),
631 dp->di_extb[nb],
632 fs->fs_bsize, osize, bp);
633 dp->di_extsize = smalllblktosize(fs, nb + 1);
634 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
635 bp->b_xflags |= BX_ALTDATA;
636 ip->i_flag |= IN_SIZEMOD | IN_CHANGE | IN_IBLKDATA;
637 if (flags & IO_SYNC)
638 bwrite(bp);
639 else
640 bawrite(bp);
641 }
642 }
643 /*
644 * All blocks are direct blocks
645 */
646 if (flags & BA_METAONLY)
647 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
648 nb = dp->di_extb[lbn];
649 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
650 error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
651 gbflags, &bp);
652 if (error) {
653 brelse(bp);
654 return (error);
655 }
656 bp->b_blkno = fsbtodb(fs, nb);
657 bp->b_xflags |= BX_ALTDATA;
658 *bpp = bp;
659 return (0);
660 }
661 if (nb != 0) {
662 /*
663 * Consider need to reallocate a fragment.
664 */
665 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
666 nsize = fragroundup(fs, size);
667 if (nsize <= osize) {
668 error = bread_gb(vp, -1 - lbn, osize, NOCRED,
669 gbflags, &bp);
670 if (error) {
671 brelse(bp);
672 return (error);
673 }
674 bp->b_blkno = fsbtodb(fs, nb);
675 bp->b_xflags |= BX_ALTDATA;
676 } else {
677 UFS_LOCK(ump);
678 error = ffs_realloccg(ip, -1 - lbn,
679 dp->di_extb[lbn],
680 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
681 &dp->di_extb[0]), osize, nsize, flags,
682 cred, &bp);
683 if (error)
684 return (error);
685 bp->b_xflags |= BX_ALTDATA;
686 if (DOINGSOFTDEP(vp))
687 softdep_setup_allocext(ip, lbn,
688 dbtofsb(fs, bp->b_blkno), nb,
689 nsize, osize, bp);
690 }
691 } else {
692 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
693 nsize = fragroundup(fs, size);
694 else
695 nsize = fs->fs_bsize;
696 UFS_LOCK(ump);
697 error = ffs_alloc(ip, lbn,
698 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
699 nsize, flags, cred, &newb);
700 if (error)
701 return (error);
702 bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
703 bp->b_blkno = fsbtodb(fs, newb);
704 bp->b_xflags |= BX_ALTDATA;
705 if (flags & BA_CLRBUF)
706 vfs_bio_clrbuf(bp);
707 if (DOINGSOFTDEP(vp))
708 softdep_setup_allocext(ip, lbn, newb, 0,
709 nsize, 0, bp);
710 }
711 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
712 ip->i_flag |= IN_CHANGE | IN_IBLKDATA;
713 *bpp = bp;
714 return (0);
715 }
716 /*
717 * If the next write will extend the file into a new block,
718 * and the file is currently composed of a fragment
719 * this fragment has to be extended to be a full block.
720 */
721 lastlbn = lblkno(fs, ip->i_size);
722 if (lastlbn < NDADDR && lastlbn < lbn) {
723 nb = lastlbn;
724 osize = blksize(fs, ip, nb);
725 if (osize < fs->fs_bsize && osize > 0) {
726 UFS_LOCK(ump);
727 error = ffs_realloccg(ip, nb, dp->di_db[nb],
728 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
729 &dp->di_db[0]), osize, (int)fs->fs_bsize,
730 flags, cred, &bp);
731 if (error)
732 return (error);
733 if (DOINGSOFTDEP(vp))
734 softdep_setup_allocdirect(ip, nb,
735 dbtofsb(fs, bp->b_blkno),
736 dp->di_db[nb],
737 fs->fs_bsize, osize, bp);
738 ip->i_size = smalllblktosize(fs, nb + 1);
739 dp->di_size = ip->i_size;
740 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
741 ip->i_flag |= IN_SIZEMOD | IN_CHANGE | IN_UPDATE |
742 IN_IBLKDATA;
743 if (flags & IO_SYNC)
744 bwrite(bp);
745 else
746 bawrite(bp);
747 }
748 }
749 /*
750 * The first NDADDR blocks are direct blocks
751 */
752 if (lbn < NDADDR) {
753 if (flags & BA_METAONLY)
754 panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
755 nb = dp->di_db[lbn];
756 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
757 error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
758 gbflags, &bp);
759 if (error) {
760 brelse(bp);
761 return (error);
762 }
763 bp->b_blkno = fsbtodb(fs, nb);
764 *bpp = bp;
765 return (0);
766 }
767 if (nb != 0) {
768 /*
769 * Consider need to reallocate a fragment.
770 */
771 osize = fragroundup(fs, blkoff(fs, ip->i_size));
772 nsize = fragroundup(fs, size);
773 if (nsize <= osize) {
774 error = bread_gb(vp, lbn, osize, NOCRED,
775 gbflags, &bp);
776 if (error) {
777 brelse(bp);
778 return (error);
779 }
780 bp->b_blkno = fsbtodb(fs, nb);
781 } else {
782 UFS_LOCK(ump);
783 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
784 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
785 &dp->di_db[0]), osize, nsize, flags,
786 cred, &bp);
787 if (error)
788 return (error);
789 if (DOINGSOFTDEP(vp))
790 softdep_setup_allocdirect(ip, lbn,
791 dbtofsb(fs, bp->b_blkno), nb,
792 nsize, osize, bp);
793 }
794 } else {
795 if (ip->i_size < smalllblktosize(fs, lbn + 1))
796 nsize = fragroundup(fs, size);
797 else
798 nsize = fs->fs_bsize;
799 UFS_LOCK(ump);
800 error = ffs_alloc(ip, lbn,
801 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
802 &dp->di_db[0]), nsize, flags, cred, &newb);
803 if (error)
804 return (error);
805 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
806 bp->b_blkno = fsbtodb(fs, newb);
807 if (flags & BA_CLRBUF)
808 vfs_bio_clrbuf(bp);
809 if (DOINGSOFTDEP(vp))
810 softdep_setup_allocdirect(ip, lbn, newb, 0,
811 nsize, 0, bp);
812 }
813 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
814 ip->i_flag |= IN_CHANGE | IN_UPDATE | IN_IBLKDATA;
815 *bpp = bp;
816 return (0);
817 }
818 /*
819 * Determine the number of levels of indirection.
820 */
821 pref = 0;
822 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
823 return(error);
824 #ifdef INVARIANTS
825 if (num < 1)
826 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
827 #endif
828 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
829 /*
830 * Fetch the first indirect block allocating if necessary.
831 */
832 --num;
833 nb = dp->di_ib[indirs[0].in_off];
834 allocib = NULL;
835 allocblk = allociblk;
836 lbns_remfree = lbns;
837 if (nb == 0) {
838 UFS_LOCK(ump);
839 pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
840 (ufs2_daddr_t *)0);
841 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
842 flags, cred, &newb)) != 0) {
843 curthread_pflags_restore(saved_inbdflush);
844 return (error);
845 }
846 pref = newb + fs->fs_frag;
847 nb = newb;
848 MPASS(allocblk < allociblk + nitems(allociblk));
849 MPASS(lbns_remfree < lbns + nitems(lbns));
850 *allocblk++ = nb;
851 *lbns_remfree++ = indirs[1].in_lbn;
852 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
853 GB_UNMAPPED);
854 bp->b_blkno = fsbtodb(fs, nb);
855 vfs_bio_clrbuf(bp);
856 if (DOINGSOFTDEP(vp)) {
857 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
858 newb, 0, fs->fs_bsize, 0, bp);
859 bdwrite(bp);
860 } else {
861 /*
862 * Write synchronously so that indirect blocks
863 * never point at garbage.
864 */
865 if (DOINGASYNC(vp))
866 bdwrite(bp);
867 else if ((error = bwrite(bp)) != 0)
868 goto fail;
869 }
870 allocib = &dp->di_ib[indirs[0].in_off];
871 *allocib = nb;
872 ip->i_flag |= IN_CHANGE | IN_UPDATE | IN_IBLKDATA;
873 }
874 /*
875 * Fetch through the indirect blocks, allocating as necessary.
876 */
877 retry:
878 for (i = 1;;) {
879 error = bread(vp,
880 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
881 if (error) {
882 brelse(bp);
883 goto fail;
884 }
885 bap = (ufs2_daddr_t *)bp->b_data;
886 nb = bap[indirs[i].in_off];
887 if (i == num)
888 break;
889 i += 1;
890 if (nb != 0) {
891 bqrelse(bp);
892 continue;
893 }
894 UFS_LOCK(ump);
895 /*
896 * If parent indirect has just been allocated, try to cluster
897 * immediately following it.
898 */
899 if (pref == 0)
900 pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
901 (ufs2_daddr_t *)0);
902 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
903 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
904 brelse(bp);
905 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
906 UFS_LOCK(ump);
907 softdep_request_cleanup(fs, vp, cred,
908 FLUSH_BLOCKS_WAIT);
909 UFS_UNLOCK(ump);
910 goto retry;
911 }
912 if (ppsratecheck(&lastfail, &curfail, 1)) {
913 ffs_fserr(fs, ip->i_number, "filesystem full");
914 uprintf("\n%s: write failed, filesystem "
915 "is full\n", fs->fs_fsmnt);
916 }
917 goto fail;
918 }
919 pref = newb + fs->fs_frag;
920 nb = newb;
921 MPASS(allocblk < allociblk + nitems(allociblk));
922 MPASS(lbns_remfree < lbns + nitems(lbns));
923 *allocblk++ = nb;
924 *lbns_remfree++ = indirs[i].in_lbn;
925 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
926 GB_UNMAPPED);
927 nbp->b_blkno = fsbtodb(fs, nb);
928 vfs_bio_clrbuf(nbp);
929 if (DOINGSOFTDEP(vp)) {
930 softdep_setup_allocindir_meta(nbp, ip, bp,
931 indirs[i - 1].in_off, nb);
932 bdwrite(nbp);
933 } else {
934 /*
935 * Write synchronously so that indirect blocks
936 * never point at garbage.
937 */
938 if ((error = bwrite(nbp)) != 0) {
939 brelse(bp);
940 goto fail;
941 }
942 }
943 bap[indirs[i - 1].in_off] = nb;
944 if (allocib == NULL && unwindidx < 0)
945 unwindidx = i - 1;
946 /*
947 * If required, write synchronously, otherwise use
948 * delayed write.
949 */
950 if (flags & IO_SYNC) {
951 bwrite(bp);
952 } else {
953 if (bp->b_bufsize == fs->fs_bsize)
954 bp->b_flags |= B_CLUSTEROK;
955 bdwrite(bp);
956 }
957 }
958 /*
959 * If asked only for the indirect block, then return it.
960 */
961 if (flags & BA_METAONLY) {
962 curthread_pflags_restore(saved_inbdflush);
963 *bpp = bp;
964 return (0);
965 }
966 /*
967 * Get the data block, allocating if necessary.
968 */
969 if (nb == 0) {
970 UFS_LOCK(ump);
971 /*
972 * If allocating metadata at the front of the cylinder
973 * group and parent indirect block has just been allocated,
974 * then cluster next to it if it is the first indirect in
975 * the file. Otherwise it has been allocated in the metadata
976 * area, so we want to find our own place out in the data area.
977 */
978 if (pref == 0 || (lbn > NDADDR && fs->fs_metaspace != 0))
979 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
980 &bap[0]);
981 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
982 flags | IO_BUFLOCKED, cred, &newb);
983 if (error) {
984 brelse(bp);
985 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
986 UFS_LOCK(ump);
987 softdep_request_cleanup(fs, vp, cred,
988 FLUSH_BLOCKS_WAIT);
989 UFS_UNLOCK(ump);
990 goto retry;
991 }
992 if (ppsratecheck(&lastfail, &curfail, 1)) {
993 ffs_fserr(fs, ip->i_number, "filesystem full");
994 uprintf("\n%s: write failed, filesystem "
995 "is full\n", fs->fs_fsmnt);
996 }
997 goto fail;
998 }
999 nb = newb;
1000 MPASS(allocblk < allociblk + nitems(allociblk));
1001 MPASS(lbns_remfree < lbns + nitems(lbns));
1002 *allocblk++ = nb;
1003 *lbns_remfree++ = lbn;
1004 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1005 nbp->b_blkno = fsbtodb(fs, nb);
1006 if (flags & BA_CLRBUF)
1007 vfs_bio_clrbuf(nbp);
1008 if (DOINGSOFTDEP(vp))
1009 softdep_setup_allocindir_page(ip, lbn, bp,
1010 indirs[i].in_off, nb, 0, nbp);
1011 bap[indirs[i].in_off] = nb;
1012 /*
1013 * If required, write synchronously, otherwise use
1014 * delayed write.
1015 */
1016 if (flags & IO_SYNC) {
1017 bwrite(bp);
1018 } else {
1019 if (bp->b_bufsize == fs->fs_bsize)
1020 bp->b_flags |= B_CLUSTEROK;
1021 bdwrite(bp);
1022 }
1023 curthread_pflags_restore(saved_inbdflush);
1024 *bpp = nbp;
1025 return (0);
1026 }
1027 brelse(bp);
1028 /*
1029 * If requested clear invalid portions of the buffer. If we
1030 * have to do a read-before-write (typical if BA_CLRBUF is set),
1031 * try to do some read-ahead in the sequential case to reduce
1032 * the number of I/O transactions.
1033 */
1034 if (flags & BA_CLRBUF) {
1035 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1036 if (seqcount != 0 &&
1037 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1038 !(vm_page_count_severe() || buf_dirty_count_severe())) {
1039 error = cluster_read(vp, ip->i_size, lbn,
1040 (int)fs->fs_bsize, NOCRED,
1041 MAXBSIZE, seqcount, gbflags, &nbp);
1042 } else {
1043 error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1044 NOCRED, gbflags, &nbp);
1045 }
1046 if (error) {
1047 brelse(nbp);
1048 goto fail;
1049 }
1050 } else {
1051 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1052 nbp->b_blkno = fsbtodb(fs, nb);
1053 }
1054 curthread_pflags_restore(saved_inbdflush);
1055 *bpp = nbp;
1056 return (0);
1057 fail:
1058 curthread_pflags_restore(saved_inbdflush);
1059 /*
1060 * If we have failed to allocate any blocks, simply return the error.
1061 * This is the usual case and avoids the need to fsync the file.
1062 */
1063 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1064 return (error);
1065 /*
1066 * If we have failed part way through block allocation, we
1067 * have to deallocate any indirect blocks that we have allocated.
1068 * We have to fsync the file before we start to get rid of all
1069 * of its dependencies so that we do not leave them dangling.
1070 * We have to sync it at the end so that the soft updates code
1071 * does not find any untracked changes. Although this is really
1072 * slow, running out of disk space is not expected to be a common
1073 * occurrence. The error return from fsync is ignored as we already
1074 * have an error to return to the user.
1075 *
1076 * XXX Still have to journal the free below
1077 */
1078 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1079 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1080 blkp < allocblk; blkp++, lbns_remfree++) {
1081 /*
1082 * We shall not leave the freed blocks on the vnode
1083 * buffer object lists.
1084 */
1085 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1086 GB_NOCREAT | GB_UNMAPPED);
1087 if (bp != NULL) {
1088 KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1089 ("mismatch2 l %jd %jd b %ju %ju",
1090 (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1091 (uintmax_t)bp->b_blkno,
1092 (uintmax_t)fsbtodb(fs, *blkp)));
1093 bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1094 bp->b_flags &= ~(B_ASYNC | B_CACHE);
1095 brelse(bp);
1096 }
1097 deallocated += fs->fs_bsize;
1098 }
1099 if (allocib != NULL) {
1100 *allocib = 0;
1101 } else if (unwindidx >= 0) {
1102 int r;
1103
1104 r = bread(vp, indirs[unwindidx].in_lbn,
1105 (int)fs->fs_bsize, NOCRED, &bp);
1106 if (r) {
1107 panic("Could not unwind indirect block, error %d", r);
1108 brelse(bp);
1109 } else {
1110 bap = (ufs2_daddr_t *)bp->b_data;
1111 bap[indirs[unwindidx].in_off] = 0;
1112 if (flags & IO_SYNC) {
1113 bwrite(bp);
1114 } else {
1115 if (bp->b_bufsize == fs->fs_bsize)
1116 bp->b_flags |= B_CLUSTEROK;
1117 bdwrite(bp);
1118 }
1119 }
1120 }
1121 if (deallocated) {
1122 #ifdef QUOTA
1123 /*
1124 * Restore user's disk quota because allocation failed.
1125 */
1126 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1127 #endif
1128 dp->di_blocks -= btodb(deallocated);
1129 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1130 }
1131 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1132 /*
1133 * After the buffers are invalidated and on-disk pointers are
1134 * cleared, free the blocks.
1135 */
1136 for (blkp = allociblk; blkp < allocblk; blkp++) {
1137 #ifdef INVARIANTS
1138 if (blkp == allociblk)
1139 lbns_remfree = lbns;
1140 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1141 GB_NOCREAT | GB_UNMAPPED);
1142 if (bp != NULL) {
1143 panic("zombie2 %jd %ju %ju",
1144 (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1145 (uintmax_t)fsbtodb(fs, *blkp));
1146 }
1147 lbns_remfree++;
1148 #endif
1149 ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
1150 ip->i_number, vp->v_type, NULL);
1151 }
1152 return (error);
1153 }
Cache object: 6a05c47b2be2b5f9879065d761e04653
|