1 /*-
2 * Copyright (c) 2002 Networks Associates Technology, Inc.
3 * All rights reserved.
4 *
5 * This software was developed for the FreeBSD Project by Marshall
6 * Kirk McKusick and Network Associates Laboratories, the Security
7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9 * research program
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * Copyright (c) 1982, 1986, 1989, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 4. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
60 */
61
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD: releng/10.1/sys/ufs/ffs/ffs_balloc.c 262779 2014-03-05 04:23:19Z pfg $");
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/bio.h>
68 #include <sys/buf.h>
69 #include <sys/lock.h>
70 #include <sys/mount.h>
71 #include <sys/vnode.h>
72
73 #include <ufs/ufs/quota.h>
74 #include <ufs/ufs/inode.h>
75 #include <ufs/ufs/ufs_extern.h>
76 #include <ufs/ufs/extattr.h>
77 #include <ufs/ufs/ufsmount.h>
78
79 #include <ufs/ffs/fs.h>
80 #include <ufs/ffs/ffs_extern.h>
81
82 /*
83 * Balloc defines the structure of filesystem storage
84 * by allocating the physical blocks on a device given
85 * the inode and the logical block number in a file.
86 * This is the allocation strategy for UFS1. Below is
87 * the allocation strategy for UFS2.
88 */
89 int
90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
91 struct ucred *cred, int flags, struct buf **bpp)
92 {
93 struct inode *ip;
94 struct ufs1_dinode *dp;
95 ufs_lbn_t lbn, lastlbn;
96 struct fs *fs;
97 ufs1_daddr_t nb;
98 struct buf *bp, *nbp;
99 struct ufsmount *ump;
100 struct indir indirs[NIADDR + 2];
101 int deallocated, osize, nsize, num, i, error;
102 ufs2_daddr_t newb;
103 ufs1_daddr_t *bap, pref;
104 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
105 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
106 int unwindidx = -1;
107 int saved_inbdflush;
108 static struct timeval lastfail;
109 static int curfail;
110 int gbflags, reclaimed;
111
112 ip = VTOI(vp);
113 dp = ip->i_din1;
114 fs = ip->i_fs;
115 ump = ip->i_ump;
116 lbn = lblkno(fs, startoffset);
117 size = blkoff(fs, startoffset) + size;
118 reclaimed = 0;
119 if (size > fs->fs_bsize)
120 panic("ffs_balloc_ufs1: blk too big");
121 *bpp = NULL;
122 if (flags & IO_EXT)
123 return (EOPNOTSUPP);
124 if (lbn < 0)
125 return (EFBIG);
126 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
127
128 if (DOINGSOFTDEP(vp))
129 softdep_prealloc(vp, MNT_WAIT);
130 /*
131 * If the next write will extend the file into a new block,
132 * and the file is currently composed of a fragment
133 * this fragment has to be extended to be a full block.
134 */
135 lastlbn = lblkno(fs, ip->i_size);
136 if (lastlbn < NDADDR && lastlbn < lbn) {
137 nb = lastlbn;
138 osize = blksize(fs, ip, nb);
139 if (osize < fs->fs_bsize && osize > 0) {
140 UFS_LOCK(ump);
141 error = ffs_realloccg(ip, nb, dp->di_db[nb],
142 ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
143 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
144 cred, &bp);
145 if (error)
146 return (error);
147 if (DOINGSOFTDEP(vp))
148 softdep_setup_allocdirect(ip, nb,
149 dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
150 fs->fs_bsize, osize, bp);
151 ip->i_size = smalllblktosize(fs, nb + 1);
152 dp->di_size = ip->i_size;
153 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
154 ip->i_flag |= IN_CHANGE | IN_UPDATE;
155 if (flags & IO_SYNC)
156 bwrite(bp);
157 else
158 bawrite(bp);
159 }
160 }
161 /*
162 * The first NDADDR blocks are direct blocks
163 */
164 if (lbn < NDADDR) {
165 if (flags & BA_METAONLY)
166 panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
167 nb = dp->di_db[lbn];
168 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
169 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
170 if (error) {
171 brelse(bp);
172 return (error);
173 }
174 bp->b_blkno = fsbtodb(fs, nb);
175 *bpp = bp;
176 return (0);
177 }
178 if (nb != 0) {
179 /*
180 * Consider need to reallocate a fragment.
181 */
182 osize = fragroundup(fs, blkoff(fs, ip->i_size));
183 nsize = fragroundup(fs, size);
184 if (nsize <= osize) {
185 error = bread(vp, lbn, osize, NOCRED, &bp);
186 if (error) {
187 brelse(bp);
188 return (error);
189 }
190 bp->b_blkno = fsbtodb(fs, nb);
191 } else {
192 UFS_LOCK(ump);
193 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
194 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
195 &dp->di_db[0]), osize, nsize, flags,
196 cred, &bp);
197 if (error)
198 return (error);
199 if (DOINGSOFTDEP(vp))
200 softdep_setup_allocdirect(ip, lbn,
201 dbtofsb(fs, bp->b_blkno), nb,
202 nsize, osize, bp);
203 }
204 } else {
205 if (ip->i_size < smalllblktosize(fs, lbn + 1))
206 nsize = fragroundup(fs, size);
207 else
208 nsize = fs->fs_bsize;
209 UFS_LOCK(ump);
210 error = ffs_alloc(ip, lbn,
211 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
212 nsize, flags, cred, &newb);
213 if (error)
214 return (error);
215 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
216 bp->b_blkno = fsbtodb(fs, newb);
217 if (flags & BA_CLRBUF)
218 vfs_bio_clrbuf(bp);
219 if (DOINGSOFTDEP(vp))
220 softdep_setup_allocdirect(ip, lbn, newb, 0,
221 nsize, 0, bp);
222 }
223 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
224 ip->i_flag |= IN_CHANGE | IN_UPDATE;
225 *bpp = bp;
226 return (0);
227 }
228 /*
229 * Determine the number of levels of indirection.
230 */
231 pref = 0;
232 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
233 return(error);
234 #ifdef INVARIANTS
235 if (num < 1)
236 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
237 #endif
238 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
239 /*
240 * Fetch the first indirect block allocating if necessary.
241 */
242 --num;
243 nb = dp->di_ib[indirs[0].in_off];
244 allocib = NULL;
245 allocblk = allociblk;
246 lbns_remfree = lbns;
247 if (nb == 0) {
248 UFS_LOCK(ump);
249 pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
250 (ufs1_daddr_t *)0);
251 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
252 flags, cred, &newb)) != 0) {
253 curthread_pflags_restore(saved_inbdflush);
254 return (error);
255 }
256 pref = newb + fs->fs_frag;
257 nb = newb;
258 *allocblk++ = nb;
259 *lbns_remfree++ = indirs[1].in_lbn;
260 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
261 bp->b_blkno = fsbtodb(fs, nb);
262 vfs_bio_clrbuf(bp);
263 if (DOINGSOFTDEP(vp)) {
264 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
265 newb, 0, fs->fs_bsize, 0, bp);
266 bdwrite(bp);
267 } else {
268 /*
269 * Write synchronously so that indirect blocks
270 * never point at garbage.
271 */
272 if (DOINGASYNC(vp))
273 bdwrite(bp);
274 else if ((error = bwrite(bp)) != 0)
275 goto fail;
276 }
277 allocib = &dp->di_ib[indirs[0].in_off];
278 *allocib = nb;
279 ip->i_flag |= IN_CHANGE | IN_UPDATE;
280 }
281 /*
282 * Fetch through the indirect blocks, allocating as necessary.
283 */
284 retry:
285 for (i = 1;;) {
286 error = bread(vp,
287 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
288 if (error) {
289 brelse(bp);
290 goto fail;
291 }
292 bap = (ufs1_daddr_t *)bp->b_data;
293 nb = bap[indirs[i].in_off];
294 if (i == num)
295 break;
296 i += 1;
297 if (nb != 0) {
298 bqrelse(bp);
299 continue;
300 }
301 UFS_LOCK(ump);
302 /*
303 * If parent indirect has just been allocated, try to cluster
304 * immediately following it.
305 */
306 if (pref == 0)
307 pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
308 (ufs1_daddr_t *)0);
309 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
310 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
311 brelse(bp);
312 if (++reclaimed == 1) {
313 UFS_LOCK(ump);
314 softdep_request_cleanup(fs, vp, cred,
315 FLUSH_BLOCKS_WAIT);
316 UFS_UNLOCK(ump);
317 goto retry;
318 }
319 if (ppsratecheck(&lastfail, &curfail, 1)) {
320 ffs_fserr(fs, ip->i_number, "filesystem full");
321 uprintf("\n%s: write failed, filesystem "
322 "is full\n", fs->fs_fsmnt);
323 }
324 goto fail;
325 }
326 pref = newb + fs->fs_frag;
327 nb = newb;
328 *allocblk++ = nb;
329 *lbns_remfree++ = indirs[i].in_lbn;
330 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
331 nbp->b_blkno = fsbtodb(fs, nb);
332 vfs_bio_clrbuf(nbp);
333 if (DOINGSOFTDEP(vp)) {
334 softdep_setup_allocindir_meta(nbp, ip, bp,
335 indirs[i - 1].in_off, nb);
336 bdwrite(nbp);
337 } else {
338 /*
339 * Write synchronously so that indirect blocks
340 * never point at garbage.
341 */
342 if ((error = bwrite(nbp)) != 0) {
343 brelse(bp);
344 goto fail;
345 }
346 }
347 bap[indirs[i - 1].in_off] = nb;
348 if (allocib == NULL && unwindidx < 0)
349 unwindidx = i - 1;
350 /*
351 * If required, write synchronously, otherwise use
352 * delayed write.
353 */
354 if (flags & IO_SYNC) {
355 bwrite(bp);
356 } else {
357 if (bp->b_bufsize == fs->fs_bsize)
358 bp->b_flags |= B_CLUSTEROK;
359 bdwrite(bp);
360 }
361 }
362 /*
363 * If asked only for the indirect block, then return it.
364 */
365 if (flags & BA_METAONLY) {
366 curthread_pflags_restore(saved_inbdflush);
367 *bpp = bp;
368 return (0);
369 }
370 /*
371 * Get the data block, allocating if necessary.
372 */
373 if (nb == 0) {
374 UFS_LOCK(ump);
375 /*
376 * If allocating metadata at the front of the cylinder
377 * group and parent indirect block has just been allocated,
378 * then cluster next to it if it is the first indirect in
379 * the file. Otherwise it has been allocated in the metadata
380 * area, so we want to find our own place out in the data area.
381 */
382 if (pref == 0 || (lbn > NDADDR && fs->fs_metaspace != 0))
383 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
384 &bap[0]);
385 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
386 flags | IO_BUFLOCKED, cred, &newb);
387 if (error) {
388 brelse(bp);
389 if (++reclaimed == 1) {
390 UFS_LOCK(ump);
391 softdep_request_cleanup(fs, vp, cred,
392 FLUSH_BLOCKS_WAIT);
393 UFS_UNLOCK(ump);
394 goto retry;
395 }
396 if (ppsratecheck(&lastfail, &curfail, 1)) {
397 ffs_fserr(fs, ip->i_number, "filesystem full");
398 uprintf("\n%s: write failed, filesystem "
399 "is full\n", fs->fs_fsmnt);
400 }
401 goto fail;
402 }
403 nb = newb;
404 *allocblk++ = nb;
405 *lbns_remfree++ = lbn;
406 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
407 nbp->b_blkno = fsbtodb(fs, nb);
408 if (flags & BA_CLRBUF)
409 vfs_bio_clrbuf(nbp);
410 if (DOINGSOFTDEP(vp))
411 softdep_setup_allocindir_page(ip, lbn, bp,
412 indirs[i].in_off, nb, 0, nbp);
413 bap[indirs[i].in_off] = nb;
414 /*
415 * If required, write synchronously, otherwise use
416 * delayed write.
417 */
418 if (flags & IO_SYNC) {
419 bwrite(bp);
420 } else {
421 if (bp->b_bufsize == fs->fs_bsize)
422 bp->b_flags |= B_CLUSTEROK;
423 bdwrite(bp);
424 }
425 curthread_pflags_restore(saved_inbdflush);
426 *bpp = nbp;
427 return (0);
428 }
429 brelse(bp);
430 if (flags & BA_CLRBUF) {
431 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
432 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
433 error = cluster_read(vp, ip->i_size, lbn,
434 (int)fs->fs_bsize, NOCRED,
435 MAXBSIZE, seqcount, gbflags, &nbp);
436 } else {
437 error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
438 gbflags, &nbp);
439 }
440 if (error) {
441 brelse(nbp);
442 goto fail;
443 }
444 } else {
445 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
446 nbp->b_blkno = fsbtodb(fs, nb);
447 }
448 curthread_pflags_restore(saved_inbdflush);
449 *bpp = nbp;
450 return (0);
451 fail:
452 curthread_pflags_restore(saved_inbdflush);
453 /*
454 * If we have failed to allocate any blocks, simply return the error.
455 * This is the usual case and avoids the need to fsync the file.
456 */
457 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
458 return (error);
459 /*
460 * If we have failed part way through block allocation, we
461 * have to deallocate any indirect blocks that we have allocated.
462 * We have to fsync the file before we start to get rid of all
463 * of its dependencies so that we do not leave them dangling.
464 * We have to sync it at the end so that the soft updates code
465 * does not find any untracked changes. Although this is really
466 * slow, running out of disk space is not expected to be a common
467 * occurrence. The error return from fsync is ignored as we already
468 * have an error to return to the user.
469 *
470 * XXX Still have to journal the free below
471 */
472 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
473 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
474 blkp < allocblk; blkp++, lbns_remfree++) {
475 /*
476 * We shall not leave the freed blocks on the vnode
477 * buffer object lists.
478 */
479 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
480 if (bp != NULL) {
481 bp->b_flags |= (B_INVAL | B_RELBUF);
482 bp->b_flags &= ~B_ASYNC;
483 brelse(bp);
484 }
485 deallocated += fs->fs_bsize;
486 }
487 if (allocib != NULL) {
488 *allocib = 0;
489 } else if (unwindidx >= 0) {
490 int r;
491
492 r = bread(vp, indirs[unwindidx].in_lbn,
493 (int)fs->fs_bsize, NOCRED, &bp);
494 if (r) {
495 panic("Could not unwind indirect block, error %d", r);
496 brelse(bp);
497 } else {
498 bap = (ufs1_daddr_t *)bp->b_data;
499 bap[indirs[unwindidx].in_off] = 0;
500 if (flags & IO_SYNC) {
501 bwrite(bp);
502 } else {
503 if (bp->b_bufsize == fs->fs_bsize)
504 bp->b_flags |= B_CLUSTEROK;
505 bdwrite(bp);
506 }
507 }
508 }
509 if (deallocated) {
510 #ifdef QUOTA
511 /*
512 * Restore user's disk quota because allocation failed.
513 */
514 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
515 #endif
516 dp->di_blocks -= btodb(deallocated);
517 ip->i_flag |= IN_CHANGE | IN_UPDATE;
518 }
519 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
520 /*
521 * After the buffers are invalidated and on-disk pointers are
522 * cleared, free the blocks.
523 */
524 for (blkp = allociblk; blkp < allocblk; blkp++) {
525 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
526 ip->i_number, vp->v_type, NULL);
527 }
528 return (error);
529 }
530
531 /*
532 * Balloc defines the structure of file system storage
533 * by allocating the physical blocks on a device given
534 * the inode and the logical block number in a file.
535 * This is the allocation strategy for UFS2. Above is
536 * the allocation strategy for UFS1.
537 */
538 int
539 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
540 struct ucred *cred, int flags, struct buf **bpp)
541 {
542 struct inode *ip;
543 struct ufs2_dinode *dp;
544 ufs_lbn_t lbn, lastlbn;
545 struct fs *fs;
546 struct buf *bp, *nbp;
547 struct ufsmount *ump;
548 struct indir indirs[NIADDR + 2];
549 ufs2_daddr_t nb, newb, *bap, pref;
550 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
551 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
552 int deallocated, osize, nsize, num, i, error;
553 int unwindidx = -1;
554 int saved_inbdflush;
555 static struct timeval lastfail;
556 static int curfail;
557 int gbflags, reclaimed;
558
559 ip = VTOI(vp);
560 dp = ip->i_din2;
561 fs = ip->i_fs;
562 ump = ip->i_ump;
563 lbn = lblkno(fs, startoffset);
564 size = blkoff(fs, startoffset) + size;
565 reclaimed = 0;
566 if (size > fs->fs_bsize)
567 panic("ffs_balloc_ufs2: blk too big");
568 *bpp = NULL;
569 if (lbn < 0)
570 return (EFBIG);
571 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
572
573 if (DOINGSOFTDEP(vp))
574 softdep_prealloc(vp, MNT_WAIT);
575
576 /*
577 * Check for allocating external data.
578 */
579 if (flags & IO_EXT) {
580 if (lbn >= NXADDR)
581 return (EFBIG);
582 /*
583 * If the next write will extend the data into a new block,
584 * and the data is currently composed of a fragment
585 * this fragment has to be extended to be a full block.
586 */
587 lastlbn = lblkno(fs, dp->di_extsize);
588 if (lastlbn < lbn) {
589 nb = lastlbn;
590 osize = sblksize(fs, dp->di_extsize, nb);
591 if (osize < fs->fs_bsize && osize > 0) {
592 UFS_LOCK(ump);
593 error = ffs_realloccg(ip, -1 - nb,
594 dp->di_extb[nb],
595 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
596 &dp->di_extb[0]), osize,
597 (int)fs->fs_bsize, flags, cred, &bp);
598 if (error)
599 return (error);
600 if (DOINGSOFTDEP(vp))
601 softdep_setup_allocext(ip, nb,
602 dbtofsb(fs, bp->b_blkno),
603 dp->di_extb[nb],
604 fs->fs_bsize, osize, bp);
605 dp->di_extsize = smalllblktosize(fs, nb + 1);
606 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
607 bp->b_xflags |= BX_ALTDATA;
608 ip->i_flag |= IN_CHANGE;
609 if (flags & IO_SYNC)
610 bwrite(bp);
611 else
612 bawrite(bp);
613 }
614 }
615 /*
616 * All blocks are direct blocks
617 */
618 if (flags & BA_METAONLY)
619 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
620 nb = dp->di_extb[lbn];
621 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
622 error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
623 gbflags, &bp);
624 if (error) {
625 brelse(bp);
626 return (error);
627 }
628 bp->b_blkno = fsbtodb(fs, nb);
629 bp->b_xflags |= BX_ALTDATA;
630 *bpp = bp;
631 return (0);
632 }
633 if (nb != 0) {
634 /*
635 * Consider need to reallocate a fragment.
636 */
637 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
638 nsize = fragroundup(fs, size);
639 if (nsize <= osize) {
640 error = bread_gb(vp, -1 - lbn, osize, NOCRED,
641 gbflags, &bp);
642 if (error) {
643 brelse(bp);
644 return (error);
645 }
646 bp->b_blkno = fsbtodb(fs, nb);
647 bp->b_xflags |= BX_ALTDATA;
648 } else {
649 UFS_LOCK(ump);
650 error = ffs_realloccg(ip, -1 - lbn,
651 dp->di_extb[lbn],
652 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
653 &dp->di_extb[0]), osize, nsize, flags,
654 cred, &bp);
655 if (error)
656 return (error);
657 bp->b_xflags |= BX_ALTDATA;
658 if (DOINGSOFTDEP(vp))
659 softdep_setup_allocext(ip, lbn,
660 dbtofsb(fs, bp->b_blkno), nb,
661 nsize, osize, bp);
662 }
663 } else {
664 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
665 nsize = fragroundup(fs, size);
666 else
667 nsize = fs->fs_bsize;
668 UFS_LOCK(ump);
669 error = ffs_alloc(ip, lbn,
670 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
671 nsize, flags, cred, &newb);
672 if (error)
673 return (error);
674 bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
675 bp->b_blkno = fsbtodb(fs, newb);
676 bp->b_xflags |= BX_ALTDATA;
677 if (flags & BA_CLRBUF)
678 vfs_bio_clrbuf(bp);
679 if (DOINGSOFTDEP(vp))
680 softdep_setup_allocext(ip, lbn, newb, 0,
681 nsize, 0, bp);
682 }
683 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
684 ip->i_flag |= IN_CHANGE;
685 *bpp = bp;
686 return (0);
687 }
688 /*
689 * If the next write will extend the file into a new block,
690 * and the file is currently composed of a fragment
691 * this fragment has to be extended to be a full block.
692 */
693 lastlbn = lblkno(fs, ip->i_size);
694 if (lastlbn < NDADDR && lastlbn < lbn) {
695 nb = lastlbn;
696 osize = blksize(fs, ip, nb);
697 if (osize < fs->fs_bsize && osize > 0) {
698 UFS_LOCK(ump);
699 error = ffs_realloccg(ip, nb, dp->di_db[nb],
700 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
701 &dp->di_db[0]), osize, (int)fs->fs_bsize,
702 flags, cred, &bp);
703 if (error)
704 return (error);
705 if (DOINGSOFTDEP(vp))
706 softdep_setup_allocdirect(ip, nb,
707 dbtofsb(fs, bp->b_blkno),
708 dp->di_db[nb],
709 fs->fs_bsize, osize, bp);
710 ip->i_size = smalllblktosize(fs, nb + 1);
711 dp->di_size = ip->i_size;
712 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
713 ip->i_flag |= IN_CHANGE | IN_UPDATE;
714 if (flags & IO_SYNC)
715 bwrite(bp);
716 else
717 bawrite(bp);
718 }
719 }
720 /*
721 * The first NDADDR blocks are direct blocks
722 */
723 if (lbn < NDADDR) {
724 if (flags & BA_METAONLY)
725 panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
726 nb = dp->di_db[lbn];
727 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
728 error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
729 gbflags, &bp);
730 if (error) {
731 brelse(bp);
732 return (error);
733 }
734 bp->b_blkno = fsbtodb(fs, nb);
735 *bpp = bp;
736 return (0);
737 }
738 if (nb != 0) {
739 /*
740 * Consider need to reallocate a fragment.
741 */
742 osize = fragroundup(fs, blkoff(fs, ip->i_size));
743 nsize = fragroundup(fs, size);
744 if (nsize <= osize) {
745 error = bread_gb(vp, lbn, osize, NOCRED,
746 gbflags, &bp);
747 if (error) {
748 brelse(bp);
749 return (error);
750 }
751 bp->b_blkno = fsbtodb(fs, nb);
752 } else {
753 UFS_LOCK(ump);
754 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
755 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
756 &dp->di_db[0]), osize, nsize, flags,
757 cred, &bp);
758 if (error)
759 return (error);
760 if (DOINGSOFTDEP(vp))
761 softdep_setup_allocdirect(ip, lbn,
762 dbtofsb(fs, bp->b_blkno), nb,
763 nsize, osize, bp);
764 }
765 } else {
766 if (ip->i_size < smalllblktosize(fs, lbn + 1))
767 nsize = fragroundup(fs, size);
768 else
769 nsize = fs->fs_bsize;
770 UFS_LOCK(ump);
771 error = ffs_alloc(ip, lbn,
772 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
773 &dp->di_db[0]), nsize, flags, cred, &newb);
774 if (error)
775 return (error);
776 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
777 bp->b_blkno = fsbtodb(fs, newb);
778 if (flags & BA_CLRBUF)
779 vfs_bio_clrbuf(bp);
780 if (DOINGSOFTDEP(vp))
781 softdep_setup_allocdirect(ip, lbn, newb, 0,
782 nsize, 0, bp);
783 }
784 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
785 ip->i_flag |= IN_CHANGE | IN_UPDATE;
786 *bpp = bp;
787 return (0);
788 }
789 /*
790 * Determine the number of levels of indirection.
791 */
792 pref = 0;
793 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
794 return(error);
795 #ifdef INVARIANTS
796 if (num < 1)
797 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
798 #endif
799 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
800 /*
801 * Fetch the first indirect block allocating if necessary.
802 */
803 --num;
804 nb = dp->di_ib[indirs[0].in_off];
805 allocib = NULL;
806 allocblk = allociblk;
807 lbns_remfree = lbns;
808 if (nb == 0) {
809 UFS_LOCK(ump);
810 pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
811 (ufs2_daddr_t *)0);
812 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
813 flags, cred, &newb)) != 0) {
814 curthread_pflags_restore(saved_inbdflush);
815 return (error);
816 }
817 pref = newb + fs->fs_frag;
818 nb = newb;
819 *allocblk++ = nb;
820 *lbns_remfree++ = indirs[1].in_lbn;
821 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
822 GB_UNMAPPED);
823 bp->b_blkno = fsbtodb(fs, nb);
824 vfs_bio_clrbuf(bp);
825 if (DOINGSOFTDEP(vp)) {
826 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
827 newb, 0, fs->fs_bsize, 0, bp);
828 bdwrite(bp);
829 } else {
830 /*
831 * Write synchronously so that indirect blocks
832 * never point at garbage.
833 */
834 if (DOINGASYNC(vp))
835 bdwrite(bp);
836 else if ((error = bwrite(bp)) != 0)
837 goto fail;
838 }
839 allocib = &dp->di_ib[indirs[0].in_off];
840 *allocib = nb;
841 ip->i_flag |= IN_CHANGE | IN_UPDATE;
842 }
843 /*
844 * Fetch through the indirect blocks, allocating as necessary.
845 */
846 retry:
847 for (i = 1;;) {
848 error = bread(vp,
849 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
850 if (error) {
851 brelse(bp);
852 goto fail;
853 }
854 bap = (ufs2_daddr_t *)bp->b_data;
855 nb = bap[indirs[i].in_off];
856 if (i == num)
857 break;
858 i += 1;
859 if (nb != 0) {
860 bqrelse(bp);
861 continue;
862 }
863 UFS_LOCK(ump);
864 /*
865 * If parent indirect has just been allocated, try to cluster
866 * immediately following it.
867 */
868 if (pref == 0)
869 pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
870 (ufs2_daddr_t *)0);
871 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
872 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
873 brelse(bp);
874 if (++reclaimed == 1) {
875 UFS_LOCK(ump);
876 softdep_request_cleanup(fs, vp, cred,
877 FLUSH_BLOCKS_WAIT);
878 UFS_UNLOCK(ump);
879 goto retry;
880 }
881 if (ppsratecheck(&lastfail, &curfail, 1)) {
882 ffs_fserr(fs, ip->i_number, "filesystem full");
883 uprintf("\n%s: write failed, filesystem "
884 "is full\n", fs->fs_fsmnt);
885 }
886 goto fail;
887 }
888 pref = newb + fs->fs_frag;
889 nb = newb;
890 *allocblk++ = nb;
891 *lbns_remfree++ = indirs[i].in_lbn;
892 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
893 GB_UNMAPPED);
894 nbp->b_blkno = fsbtodb(fs, nb);
895 vfs_bio_clrbuf(nbp);
896 if (DOINGSOFTDEP(vp)) {
897 softdep_setup_allocindir_meta(nbp, ip, bp,
898 indirs[i - 1].in_off, nb);
899 bdwrite(nbp);
900 } else {
901 /*
902 * Write synchronously so that indirect blocks
903 * never point at garbage.
904 */
905 if ((error = bwrite(nbp)) != 0) {
906 brelse(bp);
907 goto fail;
908 }
909 }
910 bap[indirs[i - 1].in_off] = nb;
911 if (allocib == NULL && unwindidx < 0)
912 unwindidx = i - 1;
913 /*
914 * If required, write synchronously, otherwise use
915 * delayed write.
916 */
917 if (flags & IO_SYNC) {
918 bwrite(bp);
919 } else {
920 if (bp->b_bufsize == fs->fs_bsize)
921 bp->b_flags |= B_CLUSTEROK;
922 bdwrite(bp);
923 }
924 }
925 /*
926 * If asked only for the indirect block, then return it.
927 */
928 if (flags & BA_METAONLY) {
929 curthread_pflags_restore(saved_inbdflush);
930 *bpp = bp;
931 return (0);
932 }
933 /*
934 * Get the data block, allocating if necessary.
935 */
936 if (nb == 0) {
937 UFS_LOCK(ump);
938 /*
939 * If allocating metadata at the front of the cylinder
940 * group and parent indirect block has just been allocated,
941 * then cluster next to it if it is the first indirect in
942 * the file. Otherwise it has been allocated in the metadata
943 * area, so we want to find our own place out in the data area.
944 */
945 if (pref == 0 || (lbn > NDADDR && fs->fs_metaspace != 0))
946 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
947 &bap[0]);
948 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
949 flags | IO_BUFLOCKED, cred, &newb);
950 if (error) {
951 brelse(bp);
952 if (++reclaimed == 1) {
953 UFS_LOCK(ump);
954 softdep_request_cleanup(fs, vp, cred,
955 FLUSH_BLOCKS_WAIT);
956 UFS_UNLOCK(ump);
957 goto retry;
958 }
959 if (ppsratecheck(&lastfail, &curfail, 1)) {
960 ffs_fserr(fs, ip->i_number, "filesystem full");
961 uprintf("\n%s: write failed, filesystem "
962 "is full\n", fs->fs_fsmnt);
963 }
964 goto fail;
965 }
966 nb = newb;
967 *allocblk++ = nb;
968 *lbns_remfree++ = lbn;
969 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
970 nbp->b_blkno = fsbtodb(fs, nb);
971 if (flags & BA_CLRBUF)
972 vfs_bio_clrbuf(nbp);
973 if (DOINGSOFTDEP(vp))
974 softdep_setup_allocindir_page(ip, lbn, bp,
975 indirs[i].in_off, nb, 0, nbp);
976 bap[indirs[i].in_off] = nb;
977 /*
978 * If required, write synchronously, otherwise use
979 * delayed write.
980 */
981 if (flags & IO_SYNC) {
982 bwrite(bp);
983 } else {
984 if (bp->b_bufsize == fs->fs_bsize)
985 bp->b_flags |= B_CLUSTEROK;
986 bdwrite(bp);
987 }
988 curthread_pflags_restore(saved_inbdflush);
989 *bpp = nbp;
990 return (0);
991 }
992 brelse(bp);
993 /*
994 * If requested clear invalid portions of the buffer. If we
995 * have to do a read-before-write (typical if BA_CLRBUF is set),
996 * try to do some read-ahead in the sequential case to reduce
997 * the number of I/O transactions.
998 */
999 if (flags & BA_CLRBUF) {
1000 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1001 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
1002 error = cluster_read(vp, ip->i_size, lbn,
1003 (int)fs->fs_bsize, NOCRED,
1004 MAXBSIZE, seqcount, gbflags, &nbp);
1005 } else {
1006 error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1007 NOCRED, gbflags, &nbp);
1008 }
1009 if (error) {
1010 brelse(nbp);
1011 goto fail;
1012 }
1013 } else {
1014 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1015 nbp->b_blkno = fsbtodb(fs, nb);
1016 }
1017 curthread_pflags_restore(saved_inbdflush);
1018 *bpp = nbp;
1019 return (0);
1020 fail:
1021 curthread_pflags_restore(saved_inbdflush);
1022 /*
1023 * If we have failed to allocate any blocks, simply return the error.
1024 * This is the usual case and avoids the need to fsync the file.
1025 */
1026 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1027 return (error);
1028 /*
1029 * If we have failed part way through block allocation, we
1030 * have to deallocate any indirect blocks that we have allocated.
1031 * We have to fsync the file before we start to get rid of all
1032 * of its dependencies so that we do not leave them dangling.
1033 * We have to sync it at the end so that the soft updates code
1034 * does not find any untracked changes. Although this is really
1035 * slow, running out of disk space is not expected to be a common
1036 * occurrence. The error return from fsync is ignored as we already
1037 * have an error to return to the user.
1038 *
1039 * XXX Still have to journal the free below
1040 */
1041 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1042 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1043 blkp < allocblk; blkp++, lbns_remfree++) {
1044 /*
1045 * We shall not leave the freed blocks on the vnode
1046 * buffer object lists.
1047 */
1048 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
1049 if (bp != NULL) {
1050 bp->b_flags |= (B_INVAL | B_RELBUF);
1051 bp->b_flags &= ~B_ASYNC;
1052 brelse(bp);
1053 }
1054 deallocated += fs->fs_bsize;
1055 }
1056 if (allocib != NULL) {
1057 *allocib = 0;
1058 } else if (unwindidx >= 0) {
1059 int r;
1060
1061 r = bread(vp, indirs[unwindidx].in_lbn,
1062 (int)fs->fs_bsize, NOCRED, &bp);
1063 if (r) {
1064 panic("Could not unwind indirect block, error %d", r);
1065 brelse(bp);
1066 } else {
1067 bap = (ufs2_daddr_t *)bp->b_data;
1068 bap[indirs[unwindidx].in_off] = 0;
1069 if (flags & IO_SYNC) {
1070 bwrite(bp);
1071 } else {
1072 if (bp->b_bufsize == fs->fs_bsize)
1073 bp->b_flags |= B_CLUSTEROK;
1074 bdwrite(bp);
1075 }
1076 }
1077 }
1078 if (deallocated) {
1079 #ifdef QUOTA
1080 /*
1081 * Restore user's disk quota because allocation failed.
1082 */
1083 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1084 #endif
1085 dp->di_blocks -= btodb(deallocated);
1086 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1087 }
1088 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1089 /*
1090 * After the buffers are invalidated and on-disk pointers are
1091 * cleared, free the blocks.
1092 */
1093 for (blkp = allociblk; blkp < allocblk; blkp++) {
1094 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
1095 ip->i_number, vp->v_type, NULL);
1096 }
1097 return (error);
1098 }
Cache object: bb2c2c93c3dcea42456adf86e3529784
|