1 /*-
2 * Copyright (c) 2002 Networks Associates Technology, Inc.
3 * All rights reserved.
4 *
5 * This software was developed for the FreeBSD Project by Marshall
6 * Kirk McKusick and Network Associates Laboratories, the Security
7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9 * research program
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * Copyright (c) 1982, 1986, 1989, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 4. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95
60 */
61
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD: releng/6.4/sys/ufs/ffs/ffs_alloc.c 176542 2008-02-25 09:30:00Z obrien $");
64
65 #include "opt_quota.h"
66
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/bio.h>
70 #include <sys/buf.h>
71 #include <sys/conf.h>
72 #include <sys/file.h>
73 #include <sys/filedesc.h>
74 #include <sys/proc.h>
75 #include <sys/vnode.h>
76 #include <sys/mount.h>
77 #include <sys/kernel.h>
78 #include <sys/sysctl.h>
79 #include <sys/syslog.h>
80
81 #include <ufs/ufs/extattr.h>
82 #include <ufs/ufs/quota.h>
83 #include <ufs/ufs/inode.h>
84 #include <ufs/ufs/ufs_extern.h>
85 #include <ufs/ufs/ufsmount.h>
86
87 #include <ufs/ffs/fs.h>
88 #include <ufs/ffs/ffs_extern.h>
89
90 typedef ufs2_daddr_t allocfcn_t(struct inode *ip, int cg, ufs2_daddr_t bpref,
91 int size);
92
93 static ufs2_daddr_t ffs_alloccg(struct inode *, int, ufs2_daddr_t, int);
94 static ufs2_daddr_t
95 ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t);
96 #ifdef INVARIANTS
97 static int ffs_checkblk(struct inode *, ufs2_daddr_t, long);
98 #endif
99 static ufs2_daddr_t ffs_clusteralloc(struct inode *, int, ufs2_daddr_t, int);
100 static void ffs_clusteracct(struct ufsmount *, struct fs *, struct cg *,
101 ufs1_daddr_t, int);
102 static ino_t ffs_dirpref(struct inode *);
103 static ufs2_daddr_t ffs_fragextend(struct inode *, int, ufs2_daddr_t, int, int);
104 static void ffs_fserr(struct fs *, ino_t, char *);
105 static ufs2_daddr_t ffs_hashalloc
106 (struct inode *, int, ufs2_daddr_t, int, allocfcn_t *);
107 static ufs2_daddr_t ffs_nodealloccg(struct inode *, int, ufs2_daddr_t, int);
108 static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int);
109 static int ffs_reallocblks_ufs1(struct vop_reallocblks_args *);
110 static int ffs_reallocblks_ufs2(struct vop_reallocblks_args *);
111
112 /*
113 * Allocate a block in the filesystem.
114 *
115 * The size of the requested block is given, which must be some
116 * multiple of fs_fsize and <= fs_bsize.
117 * A preference may be optionally specified. If a preference is given
118 * the following hierarchy is used to allocate a block:
119 * 1) allocate the requested block.
120 * 2) allocate a rotationally optimal block in the same cylinder.
121 * 3) allocate a block in the same cylinder group.
122 * 4) quadradically rehash into other cylinder groups, until an
123 * available block is located.
124 * If no block preference is given the following hierarchy is used
125 * to allocate a block:
126 * 1) allocate a block in the cylinder group that contains the
127 * inode for the file.
128 * 2) quadradically rehash into other cylinder groups, until an
129 * available block is located.
130 */
131 int
132 ffs_alloc(ip, lbn, bpref, size, cred, bnp)
133 struct inode *ip;
134 ufs2_daddr_t lbn, bpref;
135 int size;
136 struct ucred *cred;
137 ufs2_daddr_t *bnp;
138 {
139 struct fs *fs;
140 struct ufsmount *ump;
141 ufs2_daddr_t bno;
142 int cg, reclaimed;
143 static struct timeval lastfail;
144 static int curfail;
145 int64_t delta;
146 #ifdef QUOTA
147 int error;
148 #endif
149
150 *bnp = 0;
151 fs = ip->i_fs;
152 ump = ip->i_ump;
153 mtx_assert(UFS_MTX(ump), MA_OWNED);
154 #ifdef INVARIANTS
155 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
156 printf("dev = %s, bsize = %ld, size = %d, fs = %s\n",
157 devtoname(ip->i_dev), (long)fs->fs_bsize, size,
158 fs->fs_fsmnt);
159 panic("ffs_alloc: bad size");
160 }
161 if (cred == NOCRED)
162 panic("ffs_alloc: missing credential");
163 #endif /* INVARIANTS */
164 reclaimed = 0;
165 retry:
166 #ifdef QUOTA
167 UFS_UNLOCK(ump);
168 error = chkdq(ip, btodb(size), cred, 0);
169 if (error)
170 return (error);
171 UFS_LOCK(ump);
172 #endif
173 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
174 goto nospace;
175 if (suser_cred(cred, SUSER_ALLOWJAIL) &&
176 freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0)
177 goto nospace;
178 if (bpref >= fs->fs_size)
179 bpref = 0;
180 if (bpref == 0)
181 cg = ino_to_cg(fs, ip->i_number);
182 else
183 cg = dtog(fs, bpref);
184 bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg);
185 if (bno > 0) {
186 delta = btodb(size);
187 if (ip->i_flag & IN_SPACECOUNTED) {
188 UFS_LOCK(ump);
189 fs->fs_pendingblocks += delta;
190 UFS_UNLOCK(ump);
191 }
192 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta);
193 ip->i_flag |= IN_CHANGE | IN_UPDATE;
194 *bnp = bno;
195 return (0);
196 }
197 nospace:
198 #ifdef QUOTA
199 UFS_UNLOCK(ump);
200 /*
201 * Restore user's disk quota because allocation failed.
202 */
203 (void) chkdq(ip, -btodb(size), cred, FORCE);
204 UFS_LOCK(ump);
205 #endif
206 if (fs->fs_pendingblocks > 0 && reclaimed == 0) {
207 reclaimed = 1;
208 softdep_request_cleanup(fs, ITOV(ip));
209 goto retry;
210 }
211 UFS_UNLOCK(ump);
212 if (ppsratecheck(&lastfail, &curfail, 1)) {
213 ffs_fserr(fs, ip->i_number, "filesystem full");
214 uprintf("\n%s: write failed, filesystem is full\n",
215 fs->fs_fsmnt);
216 }
217 return (ENOSPC);
218 }
219
220 /*
221 * Reallocate a fragment to a bigger size
222 *
223 * The number and size of the old block is given, and a preference
224 * and new size is also specified. The allocator attempts to extend
225 * the original block. Failing that, the regular block allocator is
226 * invoked to get an appropriate block.
227 */
228 int
229 ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, cred, bpp)
230 struct inode *ip;
231 ufs2_daddr_t lbprev;
232 ufs2_daddr_t bprev;
233 ufs2_daddr_t bpref;
234 int osize, nsize;
235 struct ucred *cred;
236 struct buf **bpp;
237 {
238 struct vnode *vp;
239 struct fs *fs;
240 struct buf *bp;
241 struct ufsmount *ump;
242 int cg, request, error, reclaimed;
243 ufs2_daddr_t bno;
244 static struct timeval lastfail;
245 static int curfail;
246 int64_t delta;
247
248 *bpp = 0;
249 vp = ITOV(ip);
250 fs = ip->i_fs;
251 bp = NULL;
252 ump = ip->i_ump;
253 mtx_assert(UFS_MTX(ump), MA_OWNED);
254 #ifdef INVARIANTS
255 if (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
256 panic("ffs_realloccg: allocation on suspended filesystem");
257 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
258 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
259 printf(
260 "dev = %s, bsize = %ld, osize = %d, nsize = %d, fs = %s\n",
261 devtoname(ip->i_dev), (long)fs->fs_bsize, osize,
262 nsize, fs->fs_fsmnt);
263 panic("ffs_realloccg: bad size");
264 }
265 if (cred == NOCRED)
266 panic("ffs_realloccg: missing credential");
267 #endif /* INVARIANTS */
268 reclaimed = 0;
269 retry:
270 if (suser_cred(cred, SUSER_ALLOWJAIL) &&
271 freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0) {
272 goto nospace;
273 }
274 if (bprev == 0) {
275 printf("dev = %s, bsize = %ld, bprev = %jd, fs = %s\n",
276 devtoname(ip->i_dev), (long)fs->fs_bsize, (intmax_t)bprev,
277 fs->fs_fsmnt);
278 panic("ffs_realloccg: bad bprev");
279 }
280 UFS_UNLOCK(ump);
281 /*
282 * Allocate the extra space in the buffer.
283 */
284 error = bread(vp, lbprev, osize, NOCRED, &bp);
285 if (error) {
286 brelse(bp);
287 return (error);
288 }
289
290 if (bp->b_blkno == bp->b_lblkno) {
291 if (lbprev >= NDADDR)
292 panic("ffs_realloccg: lbprev out of range");
293 bp->b_blkno = fsbtodb(fs, bprev);
294 }
295
296 #ifdef QUOTA
297 error = chkdq(ip, btodb(nsize - osize), cred, 0);
298 if (error) {
299 brelse(bp);
300 return (error);
301 }
302 #endif
303 /*
304 * Check for extension in the existing location.
305 */
306 cg = dtog(fs, bprev);
307 UFS_LOCK(ump);
308 bno = ffs_fragextend(ip, cg, bprev, osize, nsize);
309 if (bno) {
310 if (bp->b_blkno != fsbtodb(fs, bno))
311 panic("ffs_realloccg: bad blockno");
312 delta = btodb(nsize - osize);
313 if (ip->i_flag & IN_SPACECOUNTED) {
314 UFS_LOCK(ump);
315 fs->fs_pendingblocks += delta;
316 UFS_UNLOCK(ump);
317 }
318 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta);
319 ip->i_flag |= IN_CHANGE | IN_UPDATE;
320 allocbuf(bp, nsize);
321 bp->b_flags |= B_DONE;
322 if ((bp->b_flags & (B_MALLOC | B_VMIO)) != B_VMIO)
323 bzero((char *)bp->b_data + osize, nsize - osize);
324 else
325 vfs_bio_clrbuf(bp);
326 *bpp = bp;
327 return (0);
328 }
329 /*
330 * Allocate a new disk location.
331 */
332 if (bpref >= fs->fs_size)
333 bpref = 0;
334 switch ((int)fs->fs_optim) {
335 case FS_OPTSPACE:
336 /*
337 * Allocate an exact sized fragment. Although this makes
338 * best use of space, we will waste time relocating it if
339 * the file continues to grow. If the fragmentation is
340 * less than half of the minimum free reserve, we choose
341 * to begin optimizing for time.
342 */
343 request = nsize;
344 if (fs->fs_minfree <= 5 ||
345 fs->fs_cstotal.cs_nffree >
346 (off_t)fs->fs_dsize * fs->fs_minfree / (2 * 100))
347 break;
348 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
349 fs->fs_fsmnt);
350 fs->fs_optim = FS_OPTTIME;
351 break;
352 case FS_OPTTIME:
353 /*
354 * At this point we have discovered a file that is trying to
355 * grow a small fragment to a larger fragment. To save time,
356 * we allocate a full sized block, then free the unused portion.
357 * If the file continues to grow, the `ffs_fragextend' call
358 * above will be able to grow it in place without further
359 * copying. If aberrant programs cause disk fragmentation to
360 * grow within 2% of the free reserve, we choose to begin
361 * optimizing for space.
362 */
363 request = fs->fs_bsize;
364 if (fs->fs_cstotal.cs_nffree <
365 (off_t)fs->fs_dsize * (fs->fs_minfree - 2) / 100)
366 break;
367 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
368 fs->fs_fsmnt);
369 fs->fs_optim = FS_OPTSPACE;
370 break;
371 default:
372 printf("dev = %s, optim = %ld, fs = %s\n",
373 devtoname(ip->i_dev), (long)fs->fs_optim, fs->fs_fsmnt);
374 panic("ffs_realloccg: bad optim");
375 /* NOTREACHED */
376 }
377 bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg);
378 if (bno > 0) {
379 bp->b_blkno = fsbtodb(fs, bno);
380 if (!DOINGSOFTDEP(vp))
381 ffs_blkfree(ump, fs, ip->i_devvp, bprev, (long)osize,
382 ip->i_number);
383 if (nsize < request)
384 ffs_blkfree(ump, fs, ip->i_devvp,
385 bno + numfrags(fs, nsize),
386 (long)(request - nsize), ip->i_number);
387 delta = btodb(nsize - osize);
388 if (ip->i_flag & IN_SPACECOUNTED) {
389 UFS_LOCK(ump);
390 fs->fs_pendingblocks += delta;
391 UFS_UNLOCK(ump);
392 }
393 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta);
394 ip->i_flag |= IN_CHANGE | IN_UPDATE;
395 allocbuf(bp, nsize);
396 bp->b_flags |= B_DONE;
397 if ((bp->b_flags & (B_MALLOC | B_VMIO)) != B_VMIO)
398 bzero((char *)bp->b_data + osize, nsize - osize);
399 else
400 vfs_bio_clrbuf(bp);
401 *bpp = bp;
402 return (0);
403 }
404 #ifdef QUOTA
405 UFS_UNLOCK(ump);
406 /*
407 * Restore user's disk quota because allocation failed.
408 */
409 (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE);
410 UFS_LOCK(ump);
411 #endif
412 nospace:
413 /*
414 * no space available
415 */
416 if (fs->fs_pendingblocks > 0 && reclaimed == 0) {
417 reclaimed = 1;
418 softdep_request_cleanup(fs, vp);
419 UFS_UNLOCK(ump);
420 if (bp)
421 brelse(bp);
422 UFS_LOCK(ump);
423 goto retry;
424 }
425 UFS_UNLOCK(ump);
426 if (bp)
427 brelse(bp);
428 if (ppsratecheck(&lastfail, &curfail, 1)) {
429 ffs_fserr(fs, ip->i_number, "filesystem full");
430 uprintf("\n%s: write failed, filesystem is full\n",
431 fs->fs_fsmnt);
432 }
433 return (ENOSPC);
434 }
435
436 /*
437 * Reallocate a sequence of blocks into a contiguous sequence of blocks.
438 *
439 * The vnode and an array of buffer pointers for a range of sequential
440 * logical blocks to be made contiguous is given. The allocator attempts
441 * to find a range of sequential blocks starting as close as possible
442 * from the end of the allocation for the logical block immediately
443 * preceding the current range. If successful, the physical block numbers
444 * in the buffer pointers and in the inode are changed to reflect the new
445 * allocation. If unsuccessful, the allocation is left unchanged. The
446 * success in doing the reallocation is returned. Note that the error
447 * return is not reflected back to the user. Rather the previous block
448 * allocation will be used.
449 */
450
451 SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem");
452
453 static int doasyncfree = 1;
454 SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, "");
455
456 static int doreallocblks = 1;
457 SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
458
459 #ifdef DEBUG
460 static volatile int prtrealloc = 0;
461 #endif
462
463 int
464 ffs_reallocblks(ap)
465 struct vop_reallocblks_args /* {
466 struct vnode *a_vp;
467 struct cluster_save *a_buflist;
468 } */ *ap;
469 {
470
471 if (doreallocblks == 0)
472 return (ENOSPC);
473 if (VTOI(ap->a_vp)->i_ump->um_fstype == UFS1)
474 return (ffs_reallocblks_ufs1(ap));
475 return (ffs_reallocblks_ufs2(ap));
476 }
477
478 static int
479 ffs_reallocblks_ufs1(ap)
480 struct vop_reallocblks_args /* {
481 struct vnode *a_vp;
482 struct cluster_save *a_buflist;
483 } */ *ap;
484 {
485 struct fs *fs;
486 struct inode *ip;
487 struct vnode *vp;
488 struct buf *sbp, *ebp;
489 ufs1_daddr_t *bap, *sbap, *ebap = 0;
490 struct cluster_save *buflist;
491 struct ufsmount *ump;
492 ufs_lbn_t start_lbn, end_lbn;
493 ufs1_daddr_t soff, newblk, blkno;
494 ufs2_daddr_t pref;
495 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
496 int i, len, start_lvl, end_lvl, ssize;
497
498 vp = ap->a_vp;
499 ip = VTOI(vp);
500 fs = ip->i_fs;
501 ump = ip->i_ump;
502 if (fs->fs_contigsumsize <= 0)
503 return (ENOSPC);
504 buflist = ap->a_buflist;
505 len = buflist->bs_nchildren;
506 start_lbn = buflist->bs_children[0]->b_lblkno;
507 end_lbn = start_lbn + len - 1;
508 #ifdef INVARIANTS
509 for (i = 0; i < len; i++)
510 if (!ffs_checkblk(ip,
511 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
512 panic("ffs_reallocblks: unallocated block 1");
513 for (i = 1; i < len; i++)
514 if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
515 panic("ffs_reallocblks: non-logical cluster");
516 blkno = buflist->bs_children[0]->b_blkno;
517 ssize = fsbtodb(fs, fs->fs_frag);
518 for (i = 1; i < len - 1; i++)
519 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
520 panic("ffs_reallocblks: non-physical cluster %d", i);
521 #endif
522 /*
523 * If the latest allocation is in a new cylinder group, assume that
524 * the filesystem has decided to move and do not force it back to
525 * the previous cylinder group.
526 */
527 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
528 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
529 return (ENOSPC);
530 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
531 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
532 return (ENOSPC);
533 /*
534 * Get the starting offset and block map for the first block.
535 */
536 if (start_lvl == 0) {
537 sbap = &ip->i_din1->di_db[0];
538 soff = start_lbn;
539 } else {
540 idp = &start_ap[start_lvl - 1];
541 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
542 brelse(sbp);
543 return (ENOSPC);
544 }
545 sbap = (ufs1_daddr_t *)sbp->b_data;
546 soff = idp->in_off;
547 }
548 /*
549 * If the block range spans two block maps, get the second map.
550 */
551 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
552 ssize = len;
553 } else {
554 #ifdef INVARIANTS
555 if (start_lvl > 0 &&
556 start_ap[start_lvl - 1].in_lbn == idp->in_lbn)
557 panic("ffs_reallocblk: start == end");
558 #endif
559 ssize = len - (idp->in_off + 1);
560 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
561 goto fail;
562 ebap = (ufs1_daddr_t *)ebp->b_data;
563 }
564 /*
565 * Find the preferred location for the cluster.
566 */
567 UFS_LOCK(ump);
568 pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap);
569 /*
570 * Search the block map looking for an allocation of the desired size.
571 */
572 if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref,
573 len, ffs_clusteralloc)) == 0) {
574 UFS_UNLOCK(ump);
575 goto fail;
576 }
577 /*
578 * We have found a new contiguous block.
579 *
580 * First we have to replace the old block pointers with the new
581 * block pointers in the inode and indirect blocks associated
582 * with the file.
583 */
584 #ifdef DEBUG
585 if (prtrealloc)
586 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number,
587 (intmax_t)start_lbn, (intmax_t)end_lbn);
588 #endif
589 blkno = newblk;
590 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
591 if (i == ssize) {
592 bap = ebap;
593 soff = -i;
594 }
595 #ifdef INVARIANTS
596 if (!ffs_checkblk(ip,
597 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
598 panic("ffs_reallocblks: unallocated block 2");
599 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap)
600 panic("ffs_reallocblks: alloc mismatch");
601 #endif
602 #ifdef DEBUG
603 if (prtrealloc)
604 printf(" %d,", *bap);
605 #endif
606 if (DOINGSOFTDEP(vp)) {
607 if (sbap == &ip->i_din1->di_db[0] && i < ssize)
608 softdep_setup_allocdirect(ip, start_lbn + i,
609 blkno, *bap, fs->fs_bsize, fs->fs_bsize,
610 buflist->bs_children[i]);
611 else
612 softdep_setup_allocindir_page(ip, start_lbn + i,
613 i < ssize ? sbp : ebp, soff + i, blkno,
614 *bap, buflist->bs_children[i]);
615 }
616 *bap++ = blkno;
617 }
618 /*
619 * Next we must write out the modified inode and indirect blocks.
620 * For strict correctness, the writes should be synchronous since
621 * the old block values may have been written to disk. In practise
622 * they are almost never written, but if we are concerned about
623 * strict correctness, the `doasyncfree' flag should be set to zero.
624 *
625 * The test on `doasyncfree' should be changed to test a flag
626 * that shows whether the associated buffers and inodes have
627 * been written. The flag should be set when the cluster is
628 * started and cleared whenever the buffer or inode is flushed.
629 * We can then check below to see if it is set, and do the
630 * synchronous write only when it has been cleared.
631 */
632 if (sbap != &ip->i_din1->di_db[0]) {
633 if (doasyncfree)
634 bdwrite(sbp);
635 else
636 bwrite(sbp);
637 } else {
638 ip->i_flag |= IN_CHANGE | IN_UPDATE;
639 if (!doasyncfree)
640 ffs_update(vp, 1);
641 }
642 if (ssize < len) {
643 if (doasyncfree)
644 bdwrite(ebp);
645 else
646 bwrite(ebp);
647 }
648 /*
649 * Last, free the old blocks and assign the new blocks to the buffers.
650 */
651 #ifdef DEBUG
652 if (prtrealloc)
653 printf("\n\tnew:");
654 #endif
655 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
656 if (!DOINGSOFTDEP(vp))
657 ffs_blkfree(ump, fs, ip->i_devvp,
658 dbtofsb(fs, buflist->bs_children[i]->b_blkno),
659 fs->fs_bsize, ip->i_number);
660 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
661 #ifdef INVARIANTS
662 if (!ffs_checkblk(ip,
663 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
664 panic("ffs_reallocblks: unallocated block 3");
665 #endif
666 #ifdef DEBUG
667 if (prtrealloc)
668 printf(" %d,", blkno);
669 #endif
670 }
671 #ifdef DEBUG
672 if (prtrealloc) {
673 prtrealloc--;
674 printf("\n");
675 }
676 #endif
677 return (0);
678
679 fail:
680 if (ssize < len)
681 brelse(ebp);
682 if (sbap != &ip->i_din1->di_db[0])
683 brelse(sbp);
684 return (ENOSPC);
685 }
686
687 static int
688 ffs_reallocblks_ufs2(ap)
689 struct vop_reallocblks_args /* {
690 struct vnode *a_vp;
691 struct cluster_save *a_buflist;
692 } */ *ap;
693 {
694 struct fs *fs;
695 struct inode *ip;
696 struct vnode *vp;
697 struct buf *sbp, *ebp;
698 ufs2_daddr_t *bap, *sbap, *ebap = 0;
699 struct cluster_save *buflist;
700 struct ufsmount *ump;
701 ufs_lbn_t start_lbn, end_lbn;
702 ufs2_daddr_t soff, newblk, blkno, pref;
703 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
704 int i, len, start_lvl, end_lvl, ssize;
705
706 vp = ap->a_vp;
707 ip = VTOI(vp);
708 fs = ip->i_fs;
709 ump = ip->i_ump;
710 if (fs->fs_contigsumsize <= 0)
711 return (ENOSPC);
712 buflist = ap->a_buflist;
713 len = buflist->bs_nchildren;
714 start_lbn = buflist->bs_children[0]->b_lblkno;
715 end_lbn = start_lbn + len - 1;
716 #ifdef INVARIANTS
717 for (i = 0; i < len; i++)
718 if (!ffs_checkblk(ip,
719 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
720 panic("ffs_reallocblks: unallocated block 1");
721 for (i = 1; i < len; i++)
722 if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
723 panic("ffs_reallocblks: non-logical cluster");
724 blkno = buflist->bs_children[0]->b_blkno;
725 ssize = fsbtodb(fs, fs->fs_frag);
726 for (i = 1; i < len - 1; i++)
727 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
728 panic("ffs_reallocblks: non-physical cluster %d", i);
729 #endif
730 /*
731 * If the latest allocation is in a new cylinder group, assume that
732 * the filesystem has decided to move and do not force it back to
733 * the previous cylinder group.
734 */
735 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
736 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
737 return (ENOSPC);
738 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
739 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
740 return (ENOSPC);
741 /*
742 * Get the starting offset and block map for the first block.
743 */
744 if (start_lvl == 0) {
745 sbap = &ip->i_din2->di_db[0];
746 soff = start_lbn;
747 } else {
748 idp = &start_ap[start_lvl - 1];
749 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
750 brelse(sbp);
751 return (ENOSPC);
752 }
753 sbap = (ufs2_daddr_t *)sbp->b_data;
754 soff = idp->in_off;
755 }
756 /*
757 * If the block range spans two block maps, get the second map.
758 */
759 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
760 ssize = len;
761 } else {
762 #ifdef INVARIANTS
763 if (start_lvl > 0 &&
764 start_ap[start_lvl - 1].in_lbn == idp->in_lbn)
765 panic("ffs_reallocblk: start == end");
766 #endif
767 ssize = len - (idp->in_off + 1);
768 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
769 goto fail;
770 ebap = (ufs2_daddr_t *)ebp->b_data;
771 }
772 /*
773 * Find the preferred location for the cluster.
774 */
775 UFS_LOCK(ump);
776 pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap);
777 /*
778 * Search the block map looking for an allocation of the desired size.
779 */
780 if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref,
781 len, ffs_clusteralloc)) == 0) {
782 UFS_UNLOCK(ump);
783 goto fail;
784 }
785 /*
786 * We have found a new contiguous block.
787 *
788 * First we have to replace the old block pointers with the new
789 * block pointers in the inode and indirect blocks associated
790 * with the file.
791 */
792 #ifdef DEBUG
793 if (prtrealloc)
794 printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number,
795 (intmax_t)start_lbn, (intmax_t)end_lbn);
796 #endif
797 blkno = newblk;
798 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
799 if (i == ssize) {
800 bap = ebap;
801 soff = -i;
802 }
803 #ifdef INVARIANTS
804 if (!ffs_checkblk(ip,
805 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
806 panic("ffs_reallocblks: unallocated block 2");
807 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap)
808 panic("ffs_reallocblks: alloc mismatch");
809 #endif
810 #ifdef DEBUG
811 if (prtrealloc)
812 printf(" %jd,", (intmax_t)*bap);
813 #endif
814 if (DOINGSOFTDEP(vp)) {
815 if (sbap == &ip->i_din2->di_db[0] && i < ssize)
816 softdep_setup_allocdirect(ip, start_lbn + i,
817 blkno, *bap, fs->fs_bsize, fs->fs_bsize,
818 buflist->bs_children[i]);
819 else
820 softdep_setup_allocindir_page(ip, start_lbn + i,
821 i < ssize ? sbp : ebp, soff + i, blkno,
822 *bap, buflist->bs_children[i]);
823 }
824 *bap++ = blkno;
825 }
826 /*
827 * Next we must write out the modified inode and indirect blocks.
828 * For strict correctness, the writes should be synchronous since
829 * the old block values may have been written to disk. In practise
830 * they are almost never written, but if we are concerned about
831 * strict correctness, the `doasyncfree' flag should be set to zero.
832 *
833 * The test on `doasyncfree' should be changed to test a flag
834 * that shows whether the associated buffers and inodes have
835 * been written. The flag should be set when the cluster is
836 * started and cleared whenever the buffer or inode is flushed.
837 * We can then check below to see if it is set, and do the
838 * synchronous write only when it has been cleared.
839 */
840 if (sbap != &ip->i_din2->di_db[0]) {
841 if (doasyncfree)
842 bdwrite(sbp);
843 else
844 bwrite(sbp);
845 } else {
846 ip->i_flag |= IN_CHANGE | IN_UPDATE;
847 if (!doasyncfree)
848 ffs_update(vp, 1);
849 }
850 if (ssize < len) {
851 if (doasyncfree)
852 bdwrite(ebp);
853 else
854 bwrite(ebp);
855 }
856 /*
857 * Last, free the old blocks and assign the new blocks to the buffers.
858 */
859 #ifdef DEBUG
860 if (prtrealloc)
861 printf("\n\tnew:");
862 #endif
863 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
864 if (!DOINGSOFTDEP(vp))
865 ffs_blkfree(ump, fs, ip->i_devvp,
866 dbtofsb(fs, buflist->bs_children[i]->b_blkno),
867 fs->fs_bsize, ip->i_number);
868 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
869 #ifdef INVARIANTS
870 if (!ffs_checkblk(ip,
871 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
872 panic("ffs_reallocblks: unallocated block 3");
873 #endif
874 #ifdef DEBUG
875 if (prtrealloc)
876 printf(" %jd,", (intmax_t)blkno);
877 #endif
878 }
879 #ifdef DEBUG
880 if (prtrealloc) {
881 prtrealloc--;
882 printf("\n");
883 }
884 #endif
885 return (0);
886
887 fail:
888 if (ssize < len)
889 brelse(ebp);
890 if (sbap != &ip->i_din2->di_db[0])
891 brelse(sbp);
892 return (ENOSPC);
893 }
894
895 /*
896 * Allocate an inode in the filesystem.
897 *
898 * If allocating a directory, use ffs_dirpref to select the inode.
899 * If allocating in a directory, the following hierarchy is followed:
900 * 1) allocate the preferred inode.
901 * 2) allocate an inode in the same cylinder group.
902 * 3) quadradically rehash into other cylinder groups, until an
903 * available inode is located.
904 * If no inode preference is given the following hierarchy is used
905 * to allocate an inode:
906 * 1) allocate an inode in cylinder group 0.
907 * 2) quadradically rehash into other cylinder groups, until an
908 * available inode is located.
909 */
910 int
911 ffs_valloc(pvp, mode, cred, vpp)
912 struct vnode *pvp;
913 int mode;
914 struct ucred *cred;
915 struct vnode **vpp;
916 {
917 struct inode *pip;
918 struct fs *fs;
919 struct inode *ip;
920 struct timespec ts;
921 struct ufsmount *ump;
922 ino_t ino, ipref;
923 int cg, error;
924 static struct timeval lastfail;
925 static int curfail;
926
927 *vpp = NULL;
928 pip = VTOI(pvp);
929 fs = pip->i_fs;
930 ump = pip->i_ump;
931
932 UFS_LOCK(ump);
933 if (fs->fs_cstotal.cs_nifree == 0)
934 goto noinodes;
935
936 if ((mode & IFMT) == IFDIR)
937 ipref = ffs_dirpref(pip);
938 else
939 ipref = pip->i_number;
940 if (ipref >= fs->fs_ncg * fs->fs_ipg)
941 ipref = 0;
942 cg = ino_to_cg(fs, ipref);
943 /*
944 * Track number of dirs created one after another
945 * in a same cg without intervening by files.
946 */
947 if ((mode & IFMT) == IFDIR) {
948 if (fs->fs_contigdirs[cg] < 255)
949 fs->fs_contigdirs[cg]++;
950 } else {
951 if (fs->fs_contigdirs[cg] > 0)
952 fs->fs_contigdirs[cg]--;
953 }
954 ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode,
955 (allocfcn_t *)ffs_nodealloccg);
956 if (ino == 0)
957 goto noinodes;
958 error = ffs_vget(pvp->v_mount, ino, LK_EXCLUSIVE, vpp);
959 if (error) {
960 ffs_vfree(pvp, ino, mode);
961 return (error);
962 }
963 ip = VTOI(*vpp);
964 if (ip->i_mode) {
965 printf("mode = 0%o, inum = %lu, fs = %s\n",
966 ip->i_mode, (u_long)ip->i_number, fs->fs_fsmnt);
967 panic("ffs_valloc: dup alloc");
968 }
969 if (DIP(ip, i_blocks) && (fs->fs_flags & FS_UNCLEAN) == 0) { /* XXX */
970 printf("free inode %s/%lu had %ld blocks\n",
971 fs->fs_fsmnt, (u_long)ino, (long)DIP(ip, i_blocks));
972 DIP_SET(ip, i_blocks, 0);
973 }
974 ip->i_flags = 0;
975 DIP_SET(ip, i_flags, 0);
976 /*
977 * Set up a new generation number for this inode.
978 */
979 if (ip->i_gen == 0 || ++ip->i_gen == 0)
980 ip->i_gen = arc4random() / 2 + 1;
981 DIP_SET(ip, i_gen, ip->i_gen);
982 if (fs->fs_magic == FS_UFS2_MAGIC) {
983 vfs_timestamp(&ts);
984 ip->i_din2->di_birthtime = ts.tv_sec;
985 ip->i_din2->di_birthnsec = ts.tv_nsec;
986 }
987 ip->i_flag = 0;
988 vnode_destroy_vobject(*vpp);
989 (*vpp)->v_type = VNON;
990 if (fs->fs_magic == FS_UFS2_MAGIC)
991 (*vpp)->v_op = &ffs_vnodeops2;
992 else
993 (*vpp)->v_op = &ffs_vnodeops1;
994 return (0);
995 noinodes:
996 UFS_UNLOCK(ump);
997 if (ppsratecheck(&lastfail, &curfail, 1)) {
998 ffs_fserr(fs, pip->i_number, "out of inodes");
999 uprintf("\n%s: create/symlink failed, no inodes free\n",
1000 fs->fs_fsmnt);
1001 }
1002 return (ENOSPC);
1003 }
1004
1005 /*
1006 * Find a cylinder group to place a directory.
1007 *
1008 * The policy implemented by this algorithm is to allocate a
1009 * directory inode in the same cylinder group as its parent
1010 * directory, but also to reserve space for its files inodes
1011 * and data. Restrict the number of directories which may be
1012 * allocated one after another in the same cylinder group
1013 * without intervening allocation of files.
1014 *
1015 * If we allocate a first level directory then force allocation
1016 * in another cylinder group.
1017 */
1018 static ino_t
1019 ffs_dirpref(pip)
1020 struct inode *pip;
1021 {
1022 struct fs *fs;
1023 int cg, prefcg, dirsize, cgsize;
1024 int avgifree, avgbfree, avgndir, curdirsize;
1025 int minifree, minbfree, maxndir;
1026 int mincg, minndir;
1027 int maxcontigdirs;
1028
1029 mtx_assert(UFS_MTX(pip->i_ump), MA_OWNED);
1030 fs = pip->i_fs;
1031
1032 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
1033 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
1034 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg;
1035
1036 /*
1037 * Force allocation in another cg if creating a first level dir.
1038 */
1039 ASSERT_VOP_LOCKED(ITOV(pip), "ffs_dirpref");
1040 if (ITOV(pip)->v_vflag & VV_ROOT) {
1041 prefcg = arc4random() % fs->fs_ncg;
1042 mincg = prefcg;
1043 minndir = fs->fs_ipg;
1044 for (cg = prefcg; cg < fs->fs_ncg; cg++)
1045 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
1046 fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
1047 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
1048 mincg = cg;
1049 minndir = fs->fs_cs(fs, cg).cs_ndir;
1050 }
1051 for (cg = 0; cg < prefcg; cg++)
1052 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
1053 fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
1054 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
1055 mincg = cg;
1056 minndir = fs->fs_cs(fs, cg).cs_ndir;
1057 }
1058 return ((ino_t)(fs->fs_ipg * mincg));
1059 }
1060
1061 /*
1062 * Count various limits which used for
1063 * optimal allocation of a directory inode.
1064 */
1065 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg);
1066 minifree = avgifree - avgifree / 4;
1067 if (minifree < 1)
1068 minifree = 1;
1069 minbfree = avgbfree - avgbfree / 4;
1070 if (minbfree < 1)
1071 minbfree = 1;
1072 cgsize = fs->fs_fsize * fs->fs_fpg;
1073 dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir;
1074 curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0;
1075 if (dirsize < curdirsize)
1076 dirsize = curdirsize;
1077 if (dirsize <= 0)
1078 maxcontigdirs = 0; /* dirsize overflowed */
1079 else
1080 maxcontigdirs = min((avgbfree * fs->fs_bsize) / dirsize, 255);
1081 if (fs->fs_avgfpdir > 0)
1082 maxcontigdirs = min(maxcontigdirs,
1083 fs->fs_ipg / fs->fs_avgfpdir);
1084 if (maxcontigdirs == 0)
1085 maxcontigdirs = 1;
1086
1087 /*
1088 * Limit number of dirs in one cg and reserve space for
1089 * regular files, but only if we have no deficit in
1090 * inodes or space.
1091 */
1092 prefcg = ino_to_cg(fs, pip->i_number);
1093 for (cg = prefcg; cg < fs->fs_ncg; cg++)
1094 if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
1095 fs->fs_cs(fs, cg).cs_nifree >= minifree &&
1096 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
1097 if (fs->fs_contigdirs[cg] < maxcontigdirs)
1098 return ((ino_t)(fs->fs_ipg * cg));
1099 }
1100 for (cg = 0; cg < prefcg; cg++)
1101 if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
1102 fs->fs_cs(fs, cg).cs_nifree >= minifree &&
1103 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
1104 if (fs->fs_contigdirs[cg] < maxcontigdirs)
1105 return ((ino_t)(fs->fs_ipg * cg));
1106 }
1107 /*
1108 * This is a backstop when we have deficit in space.
1109 */
1110 for (cg = prefcg; cg < fs->fs_ncg; cg++)
1111 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
1112 return ((ino_t)(fs->fs_ipg * cg));
1113 for (cg = 0; cg < prefcg; cg++)
1114 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
1115 break;
1116 return ((ino_t)(fs->fs_ipg * cg));
1117 }
1118
1119 /*
1120 * Select the desired position for the next block in a file. The file is
1121 * logically divided into sections. The first section is composed of the
1122 * direct blocks. Each additional section contains fs_maxbpg blocks.
1123 *
1124 * If no blocks have been allocated in the first section, the policy is to
1125 * request a block in the same cylinder group as the inode that describes
1126 * the file. If no blocks have been allocated in any other section, the
1127 * policy is to place the section in a cylinder group with a greater than
1128 * average number of free blocks. An appropriate cylinder group is found
1129 * by using a rotor that sweeps the cylinder groups. When a new group of
1130 * blocks is needed, the sweep begins in the cylinder group following the
1131 * cylinder group from which the previous allocation was made. The sweep
1132 * continues until a cylinder group with greater than the average number
1133 * of free blocks is found. If the allocation is for the first block in an
1134 * indirect block, the information on the previous allocation is unavailable;
1135 * here a best guess is made based upon the logical block number being
1136 * allocated.
1137 *
1138 * If a section is already partially allocated, the policy is to
1139 * contiguously allocate fs_maxcontig blocks. The end of one of these
1140 * contiguous blocks and the beginning of the next is laid out
1141 * contiguously if possible.
1142 */
1143 ufs2_daddr_t
1144 ffs_blkpref_ufs1(ip, lbn, indx, bap)
1145 struct inode *ip;
1146 ufs_lbn_t lbn;
1147 int indx;
1148 ufs1_daddr_t *bap;
1149 {
1150 struct fs *fs;
1151 int cg;
1152 int avgbfree, startcg;
1153
1154 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED);
1155 fs = ip->i_fs;
1156 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
1157 if (lbn < NDADDR + NINDIR(fs)) {
1158 cg = ino_to_cg(fs, ip->i_number);
1159 return (cgbase(fs, cg) + fs->fs_frag);
1160 }
1161 /*
1162 * Find a cylinder with greater than average number of
1163 * unused data blocks.
1164 */
1165 if (indx == 0 || bap[indx - 1] == 0)
1166 startcg =
1167 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
1168 else
1169 startcg = dtog(fs, bap[indx - 1]) + 1;
1170 startcg %= fs->fs_ncg;
1171 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
1172 for (cg = startcg; cg < fs->fs_ncg; cg++)
1173 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
1174 fs->fs_cgrotor = cg;
1175 return (cgbase(fs, cg) + fs->fs_frag);
1176 }
1177 for (cg = 0; cg <= startcg; cg++)
1178 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
1179 fs->fs_cgrotor = cg;
1180 return (cgbase(fs, cg) + fs->fs_frag);
1181 }
1182 return (0);
1183 }
1184 /*
1185 * We just always try to lay things out contiguously.
1186 */
1187 return (bap[indx - 1] + fs->fs_frag);
1188 }
1189
1190 /*
1191 * Same as above, but for UFS2
1192 */
1193 ufs2_daddr_t
1194 ffs_blkpref_ufs2(ip, lbn, indx, bap)
1195 struct inode *ip;
1196 ufs_lbn_t lbn;
1197 int indx;
1198 ufs2_daddr_t *bap;
1199 {
1200 struct fs *fs;
1201 int cg;
1202 int avgbfree, startcg;
1203
1204 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED);
1205 fs = ip->i_fs;
1206 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
1207 if (lbn < NDADDR + NINDIR(fs)) {
1208 cg = ino_to_cg(fs, ip->i_number);
1209 return (cgbase(fs, cg) + fs->fs_frag);
1210 }
1211 /*
1212 * Find a cylinder with greater than average number of
1213 * unused data blocks.
1214 */
1215 if (indx == 0 || bap[indx - 1] == 0)
1216 startcg =
1217 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
1218 else
1219 startcg = dtog(fs, bap[indx - 1]) + 1;
1220 startcg %= fs->fs_ncg;
1221 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
1222 for (cg = startcg; cg < fs->fs_ncg; cg++)
1223 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
1224 fs->fs_cgrotor = cg;
1225 return (cgbase(fs, cg) + fs->fs_frag);
1226 }
1227 for (cg = 0; cg <= startcg; cg++)
1228 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
1229 fs->fs_cgrotor = cg;
1230 return (cgbase(fs, cg) + fs->fs_frag);
1231 }
1232 return (0);
1233 }
1234 /*
1235 * We just always try to lay things out contiguously.
1236 */
1237 return (bap[indx - 1] + fs->fs_frag);
1238 }
1239
1240 /*
1241 * Implement the cylinder overflow algorithm.
1242 *
1243 * The policy implemented by this algorithm is:
1244 * 1) allocate the block in its requested cylinder group.
1245 * 2) quadradically rehash on the cylinder group number.
1246 * 3) brute force search for a free block.
1247 *
1248 * Must be called with the UFS lock held. Will release the lock on success
1249 * and return with it held on failure.
1250 */
1251 /*VARARGS5*/
1252 static ufs2_daddr_t
1253 ffs_hashalloc(ip, cg, pref, size, allocator)
1254 struct inode *ip;
1255 int cg;
1256 ufs2_daddr_t pref;
1257 int size; /* size for data blocks, mode for inodes */
1258 allocfcn_t *allocator;
1259 {
1260 struct fs *fs;
1261 ufs2_daddr_t result;
1262 int i, icg = cg;
1263
1264 mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED);
1265 #ifdef INVARIANTS
1266 if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
1267 panic("ffs_hashalloc: allocation on suspended filesystem");
1268 #endif
1269 fs = ip->i_fs;
1270 /*
1271 * 1: preferred cylinder group
1272 */
1273 result = (*allocator)(ip, cg, pref, size);
1274 if (result)
1275 return (result);
1276 /*
1277 * 2: quadratic rehash
1278 */
1279 for (i = 1; i < fs->fs_ncg; i *= 2) {
1280 cg += i;
1281 if (cg >= fs->fs_ncg)
1282 cg -= fs->fs_ncg;
1283 result = (*allocator)(ip, cg, 0, size);
1284 if (result)
1285 return (result);
1286 }
1287 /*
1288 * 3: brute force search
1289 * Note that we start at i == 2, since 0 was checked initially,
1290 * and 1 is always checked in the quadratic rehash.
1291 */
1292 cg = (icg + 2) % fs->fs_ncg;
1293 for (i = 2; i < fs->fs_ncg; i++) {
1294 result = (*allocator)(ip, cg, 0, size);
1295 if (result)
1296 return (result);
1297 cg++;
1298 if (cg == fs->fs_ncg)
1299 cg = 0;
1300 }
1301 return (0);
1302 }
1303
1304 /*
1305 * Determine whether a fragment can be extended.
1306 *
1307 * Check to see if the necessary fragments are available, and
1308 * if they are, allocate them.
1309 */
1310 static ufs2_daddr_t
1311 ffs_fragextend(ip, cg, bprev, osize, nsize)
1312 struct inode *ip;
1313 int cg;
1314 ufs2_daddr_t bprev;
1315 int osize, nsize;
1316 {
1317 struct fs *fs;
1318 struct cg *cgp;
1319 struct buf *bp;
1320 struct ufsmount *ump;
1321 int nffree;
1322 long bno;
1323 int frags, bbase;
1324 int i, error;
1325 u_int8_t *blksfree;
1326
1327 ump = ip->i_ump;
1328 fs = ip->i_fs;
1329 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
1330 return (0);
1331 frags = numfrags(fs, nsize);
1332 bbase = fragnum(fs, bprev);
1333 if (bbase > fragnum(fs, (bprev + frags - 1))) {
1334 /* cannot extend across a block boundary */
1335 return (0);
1336 }
1337 UFS_UNLOCK(ump);
1338 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1339 (int)fs->fs_cgsize, NOCRED, &bp);
1340 if (error)
1341 goto fail;
1342 cgp = (struct cg *)bp->b_data;
1343 if (!cg_chkmagic(cgp))
1344 goto fail;
1345 bp->b_xflags |= BX_BKGRDWRITE;
1346 cgp->cg_old_time = cgp->cg_time = time_second;
1347 bno = dtogd(fs, bprev);
1348 blksfree = cg_blksfree(cgp);
1349 for (i = numfrags(fs, osize); i < frags; i++)
1350 if (isclr(blksfree, bno + i))
1351 goto fail;
1352 /*
1353 * the current fragment can be extended
1354 * deduct the count on fragment being extended into
1355 * increase the count on the remaining fragment (if any)
1356 * allocate the extended piece
1357 */
1358 for (i = frags; i < fs->fs_frag - bbase; i++)
1359 if (isclr(blksfree, bno + i))
1360 break;
1361 cgp->cg_frsum[i - numfrags(fs, osize)]--;
1362 if (i != frags)
1363 cgp->cg_frsum[i - frags]++;
1364 for (i = numfrags(fs, osize), nffree = 0; i < frags; i++) {
1365 clrbit(blksfree, bno + i);
1366 cgp->cg_cs.cs_nffree--;
1367 nffree++;
1368 }
1369 UFS_LOCK(ump);
1370 fs->fs_cstotal.cs_nffree -= nffree;
1371 fs->fs_cs(fs, cg).cs_nffree -= nffree;
1372 fs->fs_fmod = 1;
1373 ACTIVECLEAR(fs, cg);
1374 UFS_UNLOCK(ump);
1375 if (DOINGSOFTDEP(ITOV(ip)))
1376 softdep_setup_blkmapdep(bp, UFSTOVFS(ump), bprev);
1377 bdwrite(bp);
1378 return (bprev);
1379
1380 fail:
1381 brelse(bp);
1382 UFS_LOCK(ump);
1383 return (0);
1384
1385 }
1386
1387 /*
1388 * Determine whether a block can be allocated.
1389 *
1390 * Check to see if a block of the appropriate size is available,
1391 * and if it is, allocate it.
1392 */
1393 static ufs2_daddr_t
1394 ffs_alloccg(ip, cg, bpref, size)
1395 struct inode *ip;
1396 int cg;
1397 ufs2_daddr_t bpref;
1398 int size;
1399 {
1400 struct fs *fs;
1401 struct cg *cgp;
1402 struct buf *bp;
1403 struct ufsmount *ump;
1404 ufs1_daddr_t bno;
1405 ufs2_daddr_t blkno;
1406 int i, allocsiz, error, frags;
1407 u_int8_t *blksfree;
1408
1409 ump = ip->i_ump;
1410 fs = ip->i_fs;
1411 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
1412 return (0);
1413 UFS_UNLOCK(ump);
1414 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1415 (int)fs->fs_cgsize, NOCRED, &bp);
1416 if (error)
1417 goto fail;
1418 cgp = (struct cg *)bp->b_data;
1419 if (!cg_chkmagic(cgp) ||
1420 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize))
1421 goto fail;
1422 bp->b_xflags |= BX_BKGRDWRITE;
1423 cgp->cg_old_time = cgp->cg_time = time_second;
1424 if (size == fs->fs_bsize) {
1425 UFS_LOCK(ump);
1426 blkno = ffs_alloccgblk(ip, bp, bpref);
1427 ACTIVECLEAR(fs, cg);
1428 UFS_UNLOCK(ump);
1429 bdwrite(bp);
1430 return (blkno);
1431 }
1432 /*
1433 * check to see if any fragments are already available
1434 * allocsiz is the size which will be allocated, hacking
1435 * it down to a smaller size if necessary
1436 */
1437 blksfree = cg_blksfree(cgp);
1438 frags = numfrags(fs, size);
1439 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
1440 if (cgp->cg_frsum[allocsiz] != 0)
1441 break;
1442 if (allocsiz == fs->fs_frag) {
1443 /*
1444 * no fragments were available, so a block will be
1445 * allocated, and hacked up
1446 */
1447 if (cgp->cg_cs.cs_nbfree == 0)
1448 goto fail;
1449 UFS_LOCK(ump);
1450 blkno = ffs_alloccgblk(ip, bp, bpref);
1451 bno = dtogd(fs, blkno);
1452 for (i = frags; i < fs->fs_frag; i++)
1453 setbit(blksfree, bno + i);
1454 i = fs->fs_frag - frags;
1455 cgp->cg_cs.cs_nffree += i;
1456 fs->fs_cstotal.cs_nffree += i;
1457 fs->fs_cs(fs, cg).cs_nffree += i;
1458 fs->fs_fmod = 1;
1459 cgp->cg_frsum[i]++;
1460 ACTIVECLEAR(fs, cg);
1461 UFS_UNLOCK(ump);
1462 bdwrite(bp);
1463 return (blkno);
1464 }
1465 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
1466 if (bno < 0)
1467 goto fail;
1468 for (i = 0; i < frags; i++)
1469 clrbit(blksfree, bno + i);
1470 cgp->cg_cs.cs_nffree -= frags;
1471 cgp->cg_frsum[allocsiz]--;
1472 if (frags != allocsiz)
1473 cgp->cg_frsum[allocsiz - frags]++;
1474 UFS_LOCK(ump);
1475 fs->fs_cstotal.cs_nffree -= frags;
1476 fs->fs_cs(fs, cg).cs_nffree -= frags;
1477 fs->fs_fmod = 1;
1478 blkno = cgbase(fs, cg) + bno;
1479 ACTIVECLEAR(fs, cg);
1480 UFS_UNLOCK(ump);
1481 if (DOINGSOFTDEP(ITOV(ip)))
1482 softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno);
1483 bdwrite(bp);
1484 return (blkno);
1485
1486 fail:
1487 brelse(bp);
1488 UFS_LOCK(ump);
1489 return (0);
1490 }
1491
1492 /*
1493 * Allocate a block in a cylinder group.
1494 *
1495 * This algorithm implements the following policy:
1496 * 1) allocate the requested block.
1497 * 2) allocate a rotationally optimal block in the same cylinder.
1498 * 3) allocate the next available block on the block rotor for the
1499 * specified cylinder group.
1500 * Note that this routine only allocates fs_bsize blocks; these
1501 * blocks may be fragmented by the routine that allocates them.
1502 */
1503 static ufs2_daddr_t
1504 ffs_alloccgblk(ip, bp, bpref)
1505 struct inode *ip;
1506 struct buf *bp;
1507 ufs2_daddr_t bpref;
1508 {
1509 struct fs *fs;
1510 struct cg *cgp;
1511 struct ufsmount *ump;
1512 ufs1_daddr_t bno;
1513 ufs2_daddr_t blkno;
1514 u_int8_t *blksfree;
1515
1516 fs = ip->i_fs;
1517 ump = ip->i_ump;
1518 mtx_assert(UFS_MTX(ump), MA_OWNED);
1519 cgp = (struct cg *)bp->b_data;
1520 blksfree = cg_blksfree(cgp);
1521 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) {
1522 bpref = cgp->cg_rotor;
1523 } else {
1524 bpref = blknum(fs, bpref);
1525 bno = dtogd(fs, bpref);
1526 /*
1527 * if the requested block is available, use it
1528 */
1529 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
1530 goto gotit;
1531 }
1532 /*
1533 * Take the next available block in this cylinder group.
1534 */
1535 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
1536 if (bno < 0)
1537 return (0);
1538 cgp->cg_rotor = bno;
1539 gotit:
1540 blkno = fragstoblks(fs, bno);
1541 ffs_clrblock(fs, blksfree, (long)blkno);
1542 ffs_clusteracct(ump, fs, cgp, blkno, -1);
1543 cgp->cg_cs.cs_nbfree--;
1544 fs->fs_cstotal.cs_nbfree--;
1545 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
1546 fs->fs_fmod = 1;
1547 blkno = cgbase(fs, cgp->cg_cgx) + bno;
1548 /* XXX Fixme. */
1549 UFS_UNLOCK(ump);
1550 if (DOINGSOFTDEP(ITOV(ip)))
1551 softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno);
1552 UFS_LOCK(ump);
1553 return (blkno);
1554 }
1555
1556 /*
1557 * Determine whether a cluster can be allocated.
1558 *
1559 * We do not currently check for optimal rotational layout if there
1560 * are multiple choices in the same cylinder group. Instead we just
1561 * take the first one that we find following bpref.
1562 */
1563 static ufs2_daddr_t
1564 ffs_clusteralloc(ip, cg, bpref, len)
1565 struct inode *ip;
1566 int cg;
1567 ufs2_daddr_t bpref;
1568 int len;
1569 {
1570 struct fs *fs;
1571 struct cg *cgp;
1572 struct buf *bp;
1573 struct ufsmount *ump;
1574 int i, run, bit, map, got;
1575 ufs2_daddr_t bno;
1576 u_char *mapp;
1577 int32_t *lp;
1578 u_int8_t *blksfree;
1579
1580 fs = ip->i_fs;
1581 ump = ip->i_ump;
1582 if (fs->fs_maxcluster[cg] < len)
1583 return (0);
1584 UFS_UNLOCK(ump);
1585 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
1586 NOCRED, &bp))
1587 goto fail_lock;
1588 cgp = (struct cg *)bp->b_data;
1589 if (!cg_chkmagic(cgp))
1590 goto fail_lock;
1591 bp->b_xflags |= BX_BKGRDWRITE;
1592 /*
1593 * Check to see if a cluster of the needed size (or bigger) is
1594 * available in this cylinder group.
1595 */
1596 lp = &cg_clustersum(cgp)[len];
1597 for (i = len; i <= fs->fs_contigsumsize; i++)
1598 if (*lp++ > 0)
1599 break;
1600 if (i > fs->fs_contigsumsize) {
1601 /*
1602 * This is the first time looking for a cluster in this
1603 * cylinder group. Update the cluster summary information
1604 * to reflect the true maximum sized cluster so that
1605 * future cluster allocation requests can avoid reading
1606 * the cylinder group map only to find no clusters.
1607 */
1608 lp = &cg_clustersum(cgp)[len - 1];
1609 for (i = len - 1; i > 0; i--)
1610 if (*lp-- > 0)
1611 break;
1612 UFS_LOCK(ump);
1613 fs->fs_maxcluster[cg] = i;
1614 goto fail;
1615 }
1616 /*
1617 * Search the cluster map to find a big enough cluster.
1618 * We take the first one that we find, even if it is larger
1619 * than we need as we prefer to get one close to the previous
1620 * block allocation. We do not search before the current
1621 * preference point as we do not want to allocate a block
1622 * that is allocated before the previous one (as we will
1623 * then have to wait for another pass of the elevator
1624 * algorithm before it will be read). We prefer to fail and
1625 * be recalled to try an allocation in the next cylinder group.
1626 */
1627 if (dtog(fs, bpref) != cg)
1628 bpref = 0;
1629 else
1630 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
1631 mapp = &cg_clustersfree(cgp)[bpref / NBBY];
1632 map = *mapp++;
1633 bit = 1 << (bpref % NBBY);
1634 for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) {
1635 if ((map & bit) == 0) {
1636 run = 0;
1637 } else {
1638 run++;
1639 if (run == len)
1640 break;
1641 }
1642 if ((got & (NBBY - 1)) != (NBBY - 1)) {
1643 bit <<= 1;
1644 } else {
1645 map = *mapp++;
1646 bit = 1;
1647 }
1648 }
1649 if (got >= cgp->cg_nclusterblks)
1650 goto fail_lock;
1651 /*
1652 * Allocate the cluster that we have found.
1653 */
1654 blksfree = cg_blksfree(cgp);
1655 for (i = 1; i <= len; i++)
1656 if (!ffs_isblock(fs, blksfree, got - run + i))
1657 panic("ffs_clusteralloc: map mismatch");
1658 bno = cgbase(fs, cg) + blkstofrags(fs, got - run + 1);
1659 if (dtog(fs, bno) != cg)
1660 panic("ffs_clusteralloc: allocated out of group");
1661 len = blkstofrags(fs, len);
1662 UFS_LOCK(ump);
1663 for (i = 0; i < len; i += fs->fs_frag)
1664 if (ffs_alloccgblk(ip, bp, bno + i) != bno + i)
1665 panic("ffs_clusteralloc: lost block");
1666 ACTIVECLEAR(fs, cg);
1667 UFS_UNLOCK(ump);
1668 bdwrite(bp);
1669 return (bno);
1670
1671 fail_lock:
1672 UFS_LOCK(ump);
1673 fail:
1674 brelse(bp);
1675 return (0);
1676 }
1677
1678 /*
1679 * Determine whether an inode can be allocated.
1680 *
1681 * Check to see if an inode is available, and if it is,
1682 * allocate it using the following policy:
1683 * 1) allocate the requested inode.
1684 * 2) allocate the next available inode after the requested
1685 * inode in the specified cylinder group.
1686 */
1687 static ufs2_daddr_t
1688 ffs_nodealloccg(ip, cg, ipref, mode)
1689 struct inode *ip;
1690 int cg;
1691 ufs2_daddr_t ipref;
1692 int mode;
1693 {
1694 struct fs *fs;
1695 struct cg *cgp;
1696 struct buf *bp, *ibp;
1697 struct ufsmount *ump;
1698 u_int8_t *inosused;
1699 struct ufs2_dinode *dp2;
1700 int error, start, len, loc, map, i;
1701
1702 fs = ip->i_fs;
1703 ump = ip->i_ump;
1704 if (fs->fs_cs(fs, cg).cs_nifree == 0)
1705 return (0);
1706 UFS_UNLOCK(ump);
1707 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1708 (int)fs->fs_cgsize, NOCRED, &bp);
1709 if (error) {
1710 brelse(bp);
1711 UFS_LOCK(ump);
1712 return (0);
1713 }
1714 cgp = (struct cg *)bp->b_data;
1715 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
1716 brelse(bp);
1717 UFS_LOCK(ump);
1718 return (0);
1719 }
1720 bp->b_xflags |= BX_BKGRDWRITE;
1721 cgp->cg_old_time = cgp->cg_time = time_second;
1722 inosused = cg_inosused(cgp);
1723 if (ipref) {
1724 ipref %= fs->fs_ipg;
1725 if (isclr(inosused, ipref))
1726 goto gotit;
1727 }
1728 start = cgp->cg_irotor / NBBY;
1729 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
1730 loc = skpc(0xff, len, &inosused[start]);
1731 if (loc == 0) {
1732 len = start + 1;
1733 start = 0;
1734 loc = skpc(0xff, len, &inosused[0]);
1735 if (loc == 0) {
1736 printf("cg = %d, irotor = %ld, fs = %s\n",
1737 cg, (long)cgp->cg_irotor, fs->fs_fsmnt);
1738 panic("ffs_nodealloccg: map corrupted");
1739 /* NOTREACHED */
1740 }
1741 }
1742 i = start + len - loc;
1743 map = inosused[i];
1744 ipref = i * NBBY;
1745 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
1746 if ((map & i) == 0) {
1747 cgp->cg_irotor = ipref;
1748 goto gotit;
1749 }
1750 }
1751 printf("fs = %s\n", fs->fs_fsmnt);
1752 panic("ffs_nodealloccg: block not in map");
1753 /* NOTREACHED */
1754 gotit:
1755 /*
1756 * Check to see if we need to initialize more inodes.
1757 */
1758 ibp = NULL;
1759 if (fs->fs_magic == FS_UFS2_MAGIC &&
1760 ipref + INOPB(fs) > cgp->cg_initediblk &&
1761 cgp->cg_initediblk < cgp->cg_niblk) {
1762 ibp = getblk(ip->i_devvp, fsbtodb(fs,
1763 ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)),
1764 (int)fs->fs_bsize, 0, 0, 0);
1765 bzero(ibp->b_data, (int)fs->fs_bsize);
1766 dp2 = (struct ufs2_dinode *)(ibp->b_data);
1767 for (i = 0; i < INOPB(fs); i++) {
1768 dp2->di_gen = arc4random() / 2 + 1;
1769 dp2++;
1770 }
1771 cgp->cg_initediblk += INOPB(fs);
1772 }
1773 UFS_LOCK(ump);
1774 ACTIVECLEAR(fs, cg);
1775 setbit(inosused, ipref);
1776 cgp->cg_cs.cs_nifree--;
1777 fs->fs_cstotal.cs_nifree--;
1778 fs->fs_cs(fs, cg).cs_nifree--;
1779 fs->fs_fmod = 1;
1780 if ((mode & IFMT) == IFDIR) {
1781 cgp->cg_cs.cs_ndir++;
1782 fs->fs_cstotal.cs_ndir++;
1783 fs->fs_cs(fs, cg).cs_ndir++;
1784 }
1785 UFS_UNLOCK(ump);
1786 if (DOINGSOFTDEP(ITOV(ip)))
1787 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref);
1788 bdwrite(bp);
1789 if (ibp != NULL)
1790 bawrite(ibp);
1791 return (cg * fs->fs_ipg + ipref);
1792 }
1793
1794 /*
1795 * check if a block is free
1796 */
1797 static int
1798 ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h)
1799 {
1800
1801 switch ((int)fs->fs_frag) {
1802 case 8:
1803 return (cp[h] == 0);
1804 case 4:
1805 return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0);
1806 case 2:
1807 return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0);
1808 case 1:
1809 return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0);
1810 default:
1811 panic("ffs_isfreeblock");
1812 }
1813 return (0);
1814 }
1815
1816 /*
1817 * Free a block or fragment.
1818 *
1819 * The specified block or fragment is placed back in the
1820 * free map. If a fragment is deallocated, a possible
1821 * block reassembly is checked.
1822 */
1823 void
1824 ffs_blkfree(ump, fs, devvp, bno, size, inum)
1825 struct ufsmount *ump;
1826 struct fs *fs;
1827 struct vnode *devvp;
1828 ufs2_daddr_t bno;
1829 long size;
1830 ino_t inum;
1831 {
1832 struct cg *cgp;
1833 struct buf *bp;
1834 ufs1_daddr_t fragno, cgbno;
1835 ufs2_daddr_t cgblkno;
1836 int i, cg, blk, frags, bbase;
1837 u_int8_t *blksfree;
1838 struct cdev *dev;
1839
1840 cg = dtog(fs, bno);
1841 if (devvp->v_type != VCHR) {
1842 /* devvp is a snapshot */
1843 dev = VTOI(devvp)->i_devvp->v_rdev;
1844 cgblkno = fragstoblks(fs, cgtod(fs, cg));
1845 } else {
1846 /* devvp is a normal disk device */
1847 dev = devvp->v_rdev;
1848 cgblkno = fsbtodb(fs, cgtod(fs, cg));
1849 ASSERT_VOP_LOCKED(devvp, "ffs_blkfree");
1850 if ((devvp->v_vflag & VV_COPYONWRITE) &&
1851 ffs_snapblkfree(fs, devvp, bno, size, inum))
1852 return;
1853 }
1854 #ifdef INVARIANTS
1855 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
1856 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
1857 printf("dev=%s, bno = %jd, bsize = %ld, size = %ld, fs = %s\n",
1858 devtoname(dev), (intmax_t)bno, (long)fs->fs_bsize,
1859 size, fs->fs_fsmnt);
1860 panic("ffs_blkfree: bad size");
1861 }
1862 #endif
1863 if ((u_int)bno >= fs->fs_size) {
1864 printf("bad block %jd, ino %lu\n", (intmax_t)bno,
1865 (u_long)inum);
1866 ffs_fserr(fs, inum, "bad block");
1867 return;
1868 }
1869 if (bread(devvp, cgblkno, (int)fs->fs_cgsize, NOCRED, &bp)) {
1870 brelse(bp);
1871 return;
1872 }
1873 cgp = (struct cg *)bp->b_data;
1874 if (!cg_chkmagic(cgp)) {
1875 brelse(bp);
1876 return;
1877 }
1878 bp->b_xflags |= BX_BKGRDWRITE;
1879 cgp->cg_old_time = cgp->cg_time = time_second;
1880 cgbno = dtogd(fs, bno);
1881 blksfree = cg_blksfree(cgp);
1882 UFS_LOCK(ump);
1883 if (size == fs->fs_bsize) {
1884 fragno = fragstoblks(fs, cgbno);
1885 if (!ffs_isfreeblock(fs, blksfree, fragno)) {
1886 if (devvp->v_type != VCHR) {
1887 UFS_UNLOCK(ump);
1888 /* devvp is a snapshot */
1889 brelse(bp);
1890 return;
1891 }
1892 printf("dev = %s, block = %jd, fs = %s\n",
1893 devtoname(dev), (intmax_t)bno, fs->fs_fsmnt);
1894 panic("ffs_blkfree: freeing free block");
1895 }
1896 ffs_setblock(fs, blksfree, fragno);
1897 ffs_clusteracct(ump, fs, cgp, fragno, 1);
1898 cgp->cg_cs.cs_nbfree++;
1899 fs->fs_cstotal.cs_nbfree++;
1900 fs->fs_cs(fs, cg).cs_nbfree++;
1901 } else {
1902 bbase = cgbno - fragnum(fs, cgbno);
1903 /*
1904 * decrement the counts associated with the old frags
1905 */
1906 blk = blkmap(fs, blksfree, bbase);
1907 ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
1908 /*
1909 * deallocate the fragment
1910 */
1911 frags = numfrags(fs, size);
1912 for (i = 0; i < frags; i++) {
1913 if (isset(blksfree, cgbno + i)) {
1914 printf("dev = %s, block = %jd, fs = %s\n",
1915 devtoname(dev), (intmax_t)(bno + i),
1916 fs->fs_fsmnt);
1917 panic("ffs_blkfree: freeing free frag");
1918 }
1919 setbit(blksfree, cgbno + i);
1920 }
1921 cgp->cg_cs.cs_nffree += i;
1922 fs->fs_cstotal.cs_nffree += i;
1923 fs->fs_cs(fs, cg).cs_nffree += i;
1924 /*
1925 * add back in counts associated with the new frags
1926 */
1927 blk = blkmap(fs, blksfree, bbase);
1928 ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
1929 /*
1930 * if a complete block has been reassembled, account for it
1931 */
1932 fragno = fragstoblks(fs, bbase);
1933 if (ffs_isblock(fs, blksfree, fragno)) {
1934 cgp->cg_cs.cs_nffree -= fs->fs_frag;
1935 fs->fs_cstotal.cs_nffree -= fs->fs_frag;
1936 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
1937 ffs_clusteracct(ump, fs, cgp, fragno, 1);
1938 cgp->cg_cs.cs_nbfree++;
1939 fs->fs_cstotal.cs_nbfree++;
1940 fs->fs_cs(fs, cg).cs_nbfree++;
1941 }
1942 }
1943 fs->fs_fmod = 1;
1944 ACTIVECLEAR(fs, cg);
1945 UFS_UNLOCK(ump);
1946 bdwrite(bp);
1947 }
1948
1949 #ifdef INVARIANTS
1950 /*
1951 * Verify allocation of a block or fragment. Returns true if block or
1952 * fragment is allocated, false if it is free.
1953 */
1954 static int
1955 ffs_checkblk(ip, bno, size)
1956 struct inode *ip;
1957 ufs2_daddr_t bno;
1958 long size;
1959 {
1960 struct fs *fs;
1961 struct cg *cgp;
1962 struct buf *bp;
1963 ufs1_daddr_t cgbno;
1964 int i, error, frags, free;
1965 u_int8_t *blksfree;
1966
1967 fs = ip->i_fs;
1968 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1969 printf("bsize = %ld, size = %ld, fs = %s\n",
1970 (long)fs->fs_bsize, size, fs->fs_fsmnt);
1971 panic("ffs_checkblk: bad size");
1972 }
1973 if ((u_int)bno >= fs->fs_size)
1974 panic("ffs_checkblk: bad block %jd", (intmax_t)bno);
1975 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
1976 (int)fs->fs_cgsize, NOCRED, &bp);
1977 if (error)
1978 panic("ffs_checkblk: cg bread failed");
1979 cgp = (struct cg *)bp->b_data;
1980 if (!cg_chkmagic(cgp))
1981 panic("ffs_checkblk: cg magic mismatch");
1982 bp->b_xflags |= BX_BKGRDWRITE;
1983 blksfree = cg_blksfree(cgp);
1984 cgbno = dtogd(fs, bno);
1985 if (size == fs->fs_bsize) {
1986 free = ffs_isblock(fs, blksfree, fragstoblks(fs, cgbno));
1987 } else {
1988 frags = numfrags(fs, size);
1989 for (free = 0, i = 0; i < frags; i++)
1990 if (isset(blksfree, cgbno + i))
1991 free++;
1992 if (free != 0 && free != frags)
1993 panic("ffs_checkblk: partially free fragment");
1994 }
1995 brelse(bp);
1996 return (!free);
1997 }
1998 #endif /* INVARIANTS */
1999
2000 /*
2001 * Free an inode.
2002 */
2003 int
2004 ffs_vfree(pvp, ino, mode)
2005 struct vnode *pvp;
2006 ino_t ino;
2007 int mode;
2008 {
2009 struct inode *ip;
2010
2011 if (DOINGSOFTDEP(pvp)) {
2012 softdep_freefile(pvp, ino, mode);
2013 return (0);
2014 }
2015 ip = VTOI(pvp);
2016 return (ffs_freefile(ip->i_ump, ip->i_fs, ip->i_devvp, ino, mode));
2017 }
2018
2019 /*
2020 * Do the actual free operation.
2021 * The specified inode is placed back in the free map.
2022 */
2023 int
2024 ffs_freefile(ump, fs, devvp, ino, mode)
2025 struct ufsmount *ump;
2026 struct fs *fs;
2027 struct vnode *devvp;
2028 ino_t ino;
2029 int mode;
2030 {
2031 struct cg *cgp;
2032 struct buf *bp;
2033 ufs2_daddr_t cgbno;
2034 int error, cg;
2035 u_int8_t *inosused;
2036 struct cdev *dev;
2037
2038 cg = ino_to_cg(fs, ino);
2039 if (devvp->v_type != VCHR) {
2040 /* devvp is a snapshot */
2041 dev = VTOI(devvp)->i_devvp->v_rdev;
2042 cgbno = fragstoblks(fs, cgtod(fs, cg));
2043 } else {
2044 /* devvp is a normal disk device */
2045 dev = devvp->v_rdev;
2046 cgbno = fsbtodb(fs, cgtod(fs, cg));
2047 }
2048 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
2049 panic("ffs_freefile: range: dev = %s, ino = %lu, fs = %s",
2050 devtoname(dev), (u_long)ino, fs->fs_fsmnt);
2051 if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) {
2052 brelse(bp);
2053 return (error);
2054 }
2055 cgp = (struct cg *)bp->b_data;
2056 if (!cg_chkmagic(cgp)) {
2057 brelse(bp);
2058 return (0);
2059 }
2060 bp->b_xflags |= BX_BKGRDWRITE;
2061 cgp->cg_old_time = cgp->cg_time = time_second;
2062 inosused = cg_inosused(cgp);
2063 ino %= fs->fs_ipg;
2064 if (isclr(inosused, ino)) {
2065 printf("dev = %s, ino = %lu, fs = %s\n", devtoname(dev),
2066 (u_long)ino + cg * fs->fs_ipg, fs->fs_fsmnt);
2067 if (fs->fs_ronly == 0)
2068 panic("ffs_freefile: freeing free inode");
2069 }
2070 clrbit(inosused, ino);
2071 if (ino < cgp->cg_irotor)
2072 cgp->cg_irotor = ino;
2073 cgp->cg_cs.cs_nifree++;
2074 UFS_LOCK(ump);
2075 fs->fs_cstotal.cs_nifree++;
2076 fs->fs_cs(fs, cg).cs_nifree++;
2077 if ((mode & IFMT) == IFDIR) {
2078 cgp->cg_cs.cs_ndir--;
2079 fs->fs_cstotal.cs_ndir--;
2080 fs->fs_cs(fs, cg).cs_ndir--;
2081 }
2082 fs->fs_fmod = 1;
2083 ACTIVECLEAR(fs, cg);
2084 UFS_UNLOCK(ump);
2085 bdwrite(bp);
2086 return (0);
2087 }
2088
2089 /*
2090 * Check to see if a file is free.
2091 */
2092 int
2093 ffs_checkfreefile(fs, devvp, ino)
2094 struct fs *fs;
2095 struct vnode *devvp;
2096 ino_t ino;
2097 {
2098 struct cg *cgp;
2099 struct buf *bp;
2100 ufs2_daddr_t cgbno;
2101 int ret, cg;
2102 u_int8_t *inosused;
2103
2104 cg = ino_to_cg(fs, ino);
2105 if (devvp->v_type != VCHR) {
2106 /* devvp is a snapshot */
2107 cgbno = fragstoblks(fs, cgtod(fs, cg));
2108 } else {
2109 /* devvp is a normal disk device */
2110 cgbno = fsbtodb(fs, cgtod(fs, cg));
2111 }
2112 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
2113 return (1);
2114 if (bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp)) {
2115 brelse(bp);
2116 return (1);
2117 }
2118 cgp = (struct cg *)bp->b_data;
2119 if (!cg_chkmagic(cgp)) {
2120 brelse(bp);
2121 return (1);
2122 }
2123 inosused = cg_inosused(cgp);
2124 ino %= fs->fs_ipg;
2125 ret = isclr(inosused, ino);
2126 brelse(bp);
2127 return (ret);
2128 }
2129
2130 /*
2131 * Find a block of the specified size in the specified cylinder group.
2132 *
2133 * It is a panic if a request is made to find a block if none are
2134 * available.
2135 */
2136 static ufs1_daddr_t
2137 ffs_mapsearch(fs, cgp, bpref, allocsiz)
2138 struct fs *fs;
2139 struct cg *cgp;
2140 ufs2_daddr_t bpref;
2141 int allocsiz;
2142 {
2143 ufs1_daddr_t bno;
2144 int start, len, loc, i;
2145 int blk, field, subfield, pos;
2146 u_int8_t *blksfree;
2147
2148 /*
2149 * find the fragment by searching through the free block
2150 * map for an appropriate bit pattern
2151 */
2152 if (bpref)
2153 start = dtogd(fs, bpref) / NBBY;
2154 else
2155 start = cgp->cg_frotor / NBBY;
2156 blksfree = cg_blksfree(cgp);
2157 len = howmany(fs->fs_fpg, NBBY) - start;
2158 loc = scanc((u_int)len, (u_char *)&blksfree[start],
2159 fragtbl[fs->fs_frag],
2160 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
2161 if (loc == 0) {
2162 len = start + 1;
2163 start = 0;
2164 loc = scanc((u_int)len, (u_char *)&blksfree[0],
2165 fragtbl[fs->fs_frag],
2166 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
2167 if (loc == 0) {
2168 printf("start = %d, len = %d, fs = %s\n",
2169 start, len, fs->fs_fsmnt);
2170 panic("ffs_alloccg: map corrupted");
2171 /* NOTREACHED */
2172 }
2173 }
2174 bno = (start + len - loc) * NBBY;
2175 cgp->cg_frotor = bno;
2176 /*
2177 * found the byte in the map
2178 * sift through the bits to find the selected frag
2179 */
2180 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
2181 blk = blkmap(fs, blksfree, bno);
2182 blk <<= 1;
2183 field = around[allocsiz];
2184 subfield = inside[allocsiz];
2185 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
2186 if ((blk & field) == subfield)
2187 return (bno + pos);
2188 field <<= 1;
2189 subfield <<= 1;
2190 }
2191 }
2192 printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt);
2193 panic("ffs_alloccg: block not in map");
2194 return (-1);
2195 }
2196
2197 /*
2198 * Update the cluster map because of an allocation or free.
2199 *
2200 * Cnt == 1 means free; cnt == -1 means allocating.
2201 */
2202 void
2203 ffs_clusteracct(ump, fs, cgp, blkno, cnt)
2204 struct ufsmount *ump;
2205 struct fs *fs;
2206 struct cg *cgp;
2207 ufs1_daddr_t blkno;
2208 int cnt;
2209 {
2210 int32_t *sump;
2211 int32_t *lp;
2212 u_char *freemapp, *mapp;
2213 int i, start, end, forw, back, map, bit;
2214
2215 mtx_assert(UFS_MTX(ump), MA_OWNED);
2216
2217 if (fs->fs_contigsumsize <= 0)
2218 return;
2219 freemapp = cg_clustersfree(cgp);
2220 sump = cg_clustersum(cgp);
2221 /*
2222 * Allocate or clear the actual block.
2223 */
2224 if (cnt > 0)
2225 setbit(freemapp, blkno);
2226 else
2227 clrbit(freemapp, blkno);
2228 /*
2229 * Find the size of the cluster going forward.
2230 */
2231 start = blkno + 1;
2232 end = start + fs->fs_contigsumsize;
2233 if (end >= cgp->cg_nclusterblks)
2234 end = cgp->cg_nclusterblks;
2235 mapp = &freemapp[start / NBBY];
2236 map = *mapp++;
2237 bit = 1 << (start % NBBY);
2238 for (i = start; i < end; i++) {
2239 if ((map & bit) == 0)
2240 break;
2241 if ((i & (NBBY - 1)) != (NBBY - 1)) {
2242 bit <<= 1;
2243 } else {
2244 map = *mapp++;
2245 bit = 1;
2246 }
2247 }
2248 forw = i - start;
2249 /*
2250 * Find the size of the cluster going backward.
2251 */
2252 start = blkno - 1;
2253 end = start - fs->fs_contigsumsize;
2254 if (end < 0)
2255 end = -1;
2256 mapp = &freemapp[start / NBBY];
2257 map = *mapp--;
2258 bit = 1 << (start % NBBY);
2259 for (i = start; i > end; i--) {
2260 if ((map & bit) == 0)
2261 break;
2262 if ((i & (NBBY - 1)) != 0) {
2263 bit >>= 1;
2264 } else {
2265 map = *mapp--;
2266 bit = 1 << (NBBY - 1);
2267 }
2268 }
2269 back = start - i;
2270 /*
2271 * Account for old cluster and the possibly new forward and
2272 * back clusters.
2273 */
2274 i = back + forw + 1;
2275 if (i > fs->fs_contigsumsize)
2276 i = fs->fs_contigsumsize;
2277 sump[i] += cnt;
2278 if (back > 0)
2279 sump[back] -= cnt;
2280 if (forw > 0)
2281 sump[forw] -= cnt;
2282 /*
2283 * Update cluster summary information.
2284 */
2285 lp = &sump[fs->fs_contigsumsize];
2286 for (i = fs->fs_contigsumsize; i > 0; i--)
2287 if (*lp-- > 0)
2288 break;
2289 fs->fs_maxcluster[cgp->cg_cgx] = i;
2290 }
2291
2292 /*
2293 * Fserr prints the name of a filesystem with an error diagnostic.
2294 *
2295 * The form of the error message is:
2296 * fs: error message
2297 */
2298 static void
2299 ffs_fserr(fs, inum, cp)
2300 struct fs *fs;
2301 ino_t inum;
2302 char *cp;
2303 {
2304 struct thread *td = curthread; /* XXX */
2305 struct proc *p = td->td_proc;
2306
2307 log(LOG_ERR, "pid %d (%s), uid %d inumber %d on %s: %s\n",
2308 p->p_pid, p->p_comm, td->td_ucred->cr_uid, inum, fs->fs_fsmnt, cp);
2309 }
2310
2311 /*
2312 * This function provides the capability for the fsck program to
2313 * update an active filesystem. Eleven operations are provided:
2314 *
2315 * adjrefcnt(inode, amt) - adjusts the reference count on the
2316 * specified inode by the specified amount. Under normal
2317 * operation the count should always go down. Decrementing
2318 * the count to zero will cause the inode to be freed.
2319 * adjblkcnt(inode, amt) - adjust the number of blocks used to
2320 * by the specifed amount.
2321 * adjndir, adjbfree, adjifree, adjffree, adjnumclusters(amt) -
2322 * adjust the superblock summary.
2323 * freedirs(inode, count) - directory inodes [inode..inode + count - 1]
2324 * are marked as free. Inodes should never have to be marked
2325 * as in use.
2326 * freefiles(inode, count) - file inodes [inode..inode + count - 1]
2327 * are marked as free. Inodes should never have to be marked
2328 * as in use.
2329 * freeblks(blockno, size) - blocks [blockno..blockno + size - 1]
2330 * are marked as free. Blocks should never have to be marked
2331 * as in use.
2332 * setflags(flags, set/clear) - the fs_flags field has the specified
2333 * flags set (second parameter +1) or cleared (second parameter -1).
2334 */
2335
2336 static int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS);
2337
2338 SYSCTL_PROC(_vfs_ffs, FFS_ADJ_REFCNT, adjrefcnt, CTLFLAG_WR|CTLTYPE_STRUCT,
2339 0, 0, sysctl_ffs_fsck, "S,fsck", "Adjust Inode Reference Count");
2340
2341 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_BLKCNT, adjblkcnt, CTLFLAG_WR,
2342 sysctl_ffs_fsck, "Adjust Inode Used Blocks Count");
2343
2344 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NDIR, adjndir, CTLFLAG_WR,
2345 sysctl_ffs_fsck, "Adjust number of directories");
2346
2347 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NBFREE, adjnbfree, CTLFLAG_WR,
2348 sysctl_ffs_fsck, "Adjust number of free blocks");
2349
2350 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NIFREE, adjnifree, CTLFLAG_WR,
2351 sysctl_ffs_fsck, "Adjust number of free inodes");
2352
2353 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NFFREE, adjnffree, CTLFLAG_WR,
2354 sysctl_ffs_fsck, "Adjust number of free frags");
2355
2356 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NUMCLUSTERS, adjnumclusters, CTLFLAG_WR,
2357 sysctl_ffs_fsck, "Adjust number of free clusters");
2358
2359 static SYSCTL_NODE(_vfs_ffs, FFS_DIR_FREE, freedirs, CTLFLAG_WR,
2360 sysctl_ffs_fsck, "Free Range of Directory Inodes");
2361
2362 static SYSCTL_NODE(_vfs_ffs, FFS_FILE_FREE, freefiles, CTLFLAG_WR,
2363 sysctl_ffs_fsck, "Free Range of File Inodes");
2364
2365 static SYSCTL_NODE(_vfs_ffs, FFS_BLK_FREE, freeblks, CTLFLAG_WR,
2366 sysctl_ffs_fsck, "Free Range of Blocks");
2367
2368 static SYSCTL_NODE(_vfs_ffs, FFS_SET_FLAGS, setflags, CTLFLAG_WR,
2369 sysctl_ffs_fsck, "Change Filesystem Flags");
2370
2371 #ifdef DEBUG
2372 static int fsckcmds = 0;
2373 SYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, "");
2374 #endif /* DEBUG */
2375
2376 static int
2377 sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
2378 {
2379 struct fsck_cmd cmd;
2380 struct ufsmount *ump;
2381 struct vnode *vp;
2382 struct inode *ip;
2383 struct mount *mp;
2384 struct fs *fs;
2385 ufs2_daddr_t blkno;
2386 long blkcnt, blksize;
2387 struct file *fp;
2388 int filetype, error;
2389
2390 if (req->newlen > sizeof cmd)
2391 return (EBADRPC);
2392 if ((error = SYSCTL_IN(req, &cmd, sizeof cmd)) != 0)
2393 return (error);
2394 if (cmd.version != FFS_CMD_VERSION)
2395 return (ERPCMISMATCH);
2396 if ((error = getvnode(curproc->p_fd, cmd.handle, &fp)) != 0)
2397 return (error);
2398 vn_start_write(fp->f_data, &mp, V_WAIT);
2399 if (mp == 0 || strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) {
2400 vn_finished_write(mp);
2401 fdrop(fp, curthread);
2402 return (EINVAL);
2403 }
2404 if (mp->mnt_flag & MNT_RDONLY) {
2405 vn_finished_write(mp);
2406 fdrop(fp, curthread);
2407 return (EROFS);
2408 }
2409 ump = VFSTOUFS(mp);
2410 fs = ump->um_fs;
2411 filetype = IFREG;
2412
2413 switch (oidp->oid_number) {
2414
2415 case FFS_SET_FLAGS:
2416 #ifdef DEBUG
2417 if (fsckcmds)
2418 printf("%s: %s flags\n", mp->mnt_stat.f_mntonname,
2419 cmd.size > 0 ? "set" : "clear");
2420 #endif /* DEBUG */
2421 if (cmd.size > 0)
2422 fs->fs_flags |= (long)cmd.value;
2423 else
2424 fs->fs_flags &= ~(long)cmd.value;
2425 break;
2426
2427 case FFS_ADJ_REFCNT:
2428 #ifdef DEBUG
2429 if (fsckcmds) {
2430 printf("%s: adjust inode %jd count by %jd\n",
2431 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value,
2432 (intmax_t)cmd.size);
2433 }
2434 #endif /* DEBUG */
2435 if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp)))
2436 break;
2437 ip = VTOI(vp);
2438 ip->i_nlink += cmd.size;
2439 DIP_SET(ip, i_nlink, ip->i_nlink);
2440 ip->i_effnlink += cmd.size;
2441 ip->i_flag |= IN_CHANGE;
2442 if (DOINGSOFTDEP(vp))
2443 softdep_change_linkcnt(ip);
2444 vput(vp);
2445 break;
2446
2447 case FFS_ADJ_BLKCNT:
2448 #ifdef DEBUG
2449 if (fsckcmds) {
2450 printf("%s: adjust inode %jd block count by %jd\n",
2451 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value,
2452 (intmax_t)cmd.size);
2453 }
2454 #endif /* DEBUG */
2455 if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp)))
2456 break;
2457 ip = VTOI(vp);
2458 if (ip->i_flag & IN_SPACECOUNTED) {
2459 UFS_LOCK(ump);
2460 fs->fs_pendingblocks += cmd.size;
2461 UFS_UNLOCK(ump);
2462 }
2463 DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + cmd.size);
2464 ip->i_flag |= IN_CHANGE;
2465 vput(vp);
2466 break;
2467
2468 case FFS_DIR_FREE:
2469 filetype = IFDIR;
2470 /* fall through */
2471
2472 case FFS_FILE_FREE:
2473 #ifdef DEBUG
2474 if (fsckcmds) {
2475 if (cmd.size == 1)
2476 printf("%s: free %s inode %d\n",
2477 mp->mnt_stat.f_mntonname,
2478 filetype == IFDIR ? "directory" : "file",
2479 (ino_t)cmd.value);
2480 else
2481 printf("%s: free %s inodes %d-%d\n",
2482 mp->mnt_stat.f_mntonname,
2483 filetype == IFDIR ? "directory" : "file",
2484 (ino_t)cmd.value,
2485 (ino_t)(cmd.value + cmd.size - 1));
2486 }
2487 #endif /* DEBUG */
2488 while (cmd.size > 0) {
2489 if ((error = ffs_freefile(ump, fs, ump->um_devvp,
2490 cmd.value, filetype)))
2491 break;
2492 cmd.size -= 1;
2493 cmd.value += 1;
2494 }
2495 break;
2496
2497 case FFS_BLK_FREE:
2498 #ifdef DEBUG
2499 if (fsckcmds) {
2500 if (cmd.size == 1)
2501 printf("%s: free block %jd\n",
2502 mp->mnt_stat.f_mntonname,
2503 (intmax_t)cmd.value);
2504 else
2505 printf("%s: free blocks %jd-%jd\n",
2506 mp->mnt_stat.f_mntonname,
2507 (intmax_t)cmd.value,
2508 (intmax_t)cmd.value + cmd.size - 1);
2509 }
2510 #endif /* DEBUG */
2511 blkno = cmd.value;
2512 blkcnt = cmd.size;
2513 blksize = fs->fs_frag - (blkno % fs->fs_frag);
2514 while (blkcnt > 0) {
2515 if (blksize > blkcnt)
2516 blksize = blkcnt;
2517 ffs_blkfree(ump, fs, ump->um_devvp, blkno,
2518 blksize * fs->fs_fsize, ROOTINO);
2519 blkno += blksize;
2520 blkcnt -= blksize;
2521 blksize = fs->fs_frag;
2522 }
2523 break;
2524
2525 /*
2526 * Adjust superblock summaries. fsck(8) is expected to
2527 * submit deltas when necessary.
2528 */
2529 case FFS_ADJ_NDIR:
2530 #ifdef DEBUG
2531 if (fsckcmds) {
2532 printf("%s: adjust number of directories by %jd\n",
2533 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
2534 }
2535 #endif /* DEBUG */
2536 fs->fs_cstotal.cs_ndir += cmd.value;
2537 break;
2538 case FFS_ADJ_NBFREE:
2539 #ifdef DEBUG
2540 if (fsckcmds) {
2541 printf("%s: adjust number of free blocks by %+jd\n",
2542 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
2543 }
2544 #endif /* DEBUG */
2545 fs->fs_cstotal.cs_nbfree += cmd.value;
2546 break;
2547 case FFS_ADJ_NIFREE:
2548 #ifdef DEBUG
2549 if (fsckcmds) {
2550 printf("%s: adjust number of free inodes by %+jd\n",
2551 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
2552 }
2553 #endif /* DEBUG */
2554 fs->fs_cstotal.cs_nifree += cmd.value;
2555 break;
2556 case FFS_ADJ_NFFREE:
2557 #ifdef DEBUG
2558 if (fsckcmds) {
2559 printf("%s: adjust number of free frags by %+jd\n",
2560 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
2561 }
2562 #endif /* DEBUG */
2563 fs->fs_cstotal.cs_nffree += cmd.value;
2564 break;
2565 case FFS_ADJ_NUMCLUSTERS:
2566 #ifdef DEBUG
2567 if (fsckcmds) {
2568 printf("%s: adjust number of free clusters by %+jd\n",
2569 mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
2570 }
2571 #endif /* DEBUG */
2572 fs->fs_cstotal.cs_numclusters += cmd.value;
2573 break;
2574
2575 default:
2576 #ifdef DEBUG
2577 if (fsckcmds) {
2578 printf("Invalid request %d from fsck\n",
2579 oidp->oid_number);
2580 }
2581 #endif /* DEBUG */
2582 error = EINVAL;
2583 break;
2584
2585 }
2586 fdrop(fp, curthread);
2587 vn_finished_write(mp);
2588 return (error);
2589 }
Cache object: a1066cce32a62c470396ceb0df30ab3b
|