1 /*-
2 * modified for EXT2FS support in Lites 1.1
3 *
4 * Aug 1995, Godmar Back (gback@cs.utah.edu)
5 * University of Utah, Department of Computer Science
6 */
7 /*-
8 * SPDX-License-Identifier: BSD-3-Clause
9 *
10 * Copyright (c) 1989, 1991, 1993, 1994
11 * The Regents of the University of California. All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94
38 * $FreeBSD$
39 */
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/namei.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/kernel.h>
47 #include <sys/vnode.h>
48 #include <sys/mount.h>
49 #include <sys/bio.h>
50 #include <sys/buf.h>
51 #include <sys/conf.h>
52 #include <sys/endian.h>
53 #include <sys/fcntl.h>
54 #include <sys/malloc.h>
55 #include <sys/sdt.h>
56 #include <sys/stat.h>
57 #include <sys/mutex.h>
58
59 #include <geom/geom.h>
60 #include <geom/geom_vfs.h>
61
62 #include <fs/ext2fs/fs.h>
63 #include <fs/ext2fs/ext2_mount.h>
64 #include <fs/ext2fs/inode.h>
65
66 #include <fs/ext2fs/ext2fs.h>
67 #include <fs/ext2fs/ext2_dinode.h>
68 #include <fs/ext2fs/ext2_extern.h>
69 #include <fs/ext2fs/ext2_extents.h>
70
71 SDT_PROVIDER_DECLARE(ext2fs);
72 /*
73 * ext2fs trace probe:
74 * arg0: verbosity. Higher numbers give more verbose messages
75 * arg1: Textual message
76 */
77 SDT_PROBE_DEFINE2(ext2fs, , vfsops, trace, "int", "char*");
78 SDT_PROBE_DEFINE2(ext2fs, , vfsops, ext2_cg_validate_error, "char*", "int");
79 SDT_PROBE_DEFINE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "char*");
80
81 static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td);
82 static int ext2_mountfs(struct vnode *, struct mount *);
83 static int ext2_reload(struct mount *mp, struct thread *td);
84 static int ext2_sbupdate(struct ext2mount *, int);
85 static int ext2_cgupdate(struct ext2mount *, int);
86 static vfs_unmount_t ext2_unmount;
87 static vfs_root_t ext2_root;
88 static vfs_statfs_t ext2_statfs;
89 static vfs_sync_t ext2_sync;
90 static vfs_vget_t ext2_vget;
91 static vfs_fhtovp_t ext2_fhtovp;
92 static vfs_mount_t ext2_mount;
93
94 MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part");
95 static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure");
96
97 static struct vfsops ext2fs_vfsops = {
98 .vfs_fhtovp = ext2_fhtovp,
99 .vfs_mount = ext2_mount,
100 .vfs_root = ext2_root, /* root inode via vget */
101 .vfs_statfs = ext2_statfs,
102 .vfs_sync = ext2_sync,
103 .vfs_unmount = ext2_unmount,
104 .vfs_vget = ext2_vget,
105 };
106
107 VFS_SET(ext2fs_vfsops, ext2fs, 0);
108
109 static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev,
110 int ronly);
111 static int ext2_compute_sb_data(struct vnode * devvp,
112 struct ext2fs * es, struct m_ext2fs * fs);
113
114 static const char *ext2_opts[] = { "acls", "async", "noatime", "noclusterr",
115 "noclusterw", "noexec", "export", "force", "from", "multilabel",
116 "suiddir", "nosymfollow", "sync", "union", NULL };
117
118 /*
119 * VFS Operations.
120 *
121 * mount system call
122 */
123 static int
124 ext2_mount(struct mount *mp)
125 {
126 struct vfsoptlist *opts;
127 struct vnode *devvp;
128 struct thread *td;
129 struct ext2mount *ump = NULL;
130 struct m_ext2fs *fs;
131 struct nameidata nd, *ndp = &nd;
132 accmode_t accmode;
133 char *path, *fspec;
134 int error, flags, len;
135
136 td = curthread;
137 opts = mp->mnt_optnew;
138
139 if (vfs_filteropt(opts, ext2_opts))
140 return (EINVAL);
141
142 vfs_getopt(opts, "fspath", (void **)&path, NULL);
143 /* Double-check the length of path.. */
144 if (strlen(path) >= MAXMNTLEN)
145 return (ENAMETOOLONG);
146
147 fspec = NULL;
148 error = vfs_getopt(opts, "from", (void **)&fspec, &len);
149 if (!error && fspec[len - 1] != '\0')
150 return (EINVAL);
151
152 /*
153 * If updating, check whether changing from read-only to
154 * read/write; if there is no device name, that's all we do.
155 */
156 if (mp->mnt_flag & MNT_UPDATE) {
157 ump = VFSTOEXT2(mp);
158 fs = ump->um_e2fs;
159 error = 0;
160 if (fs->e2fs_ronly == 0 &&
161 vfs_flagopt(opts, "ro", NULL, 0)) {
162 error = VFS_SYNC(mp, MNT_WAIT);
163 if (error)
164 return (error);
165 flags = WRITECLOSE;
166 if (mp->mnt_flag & MNT_FORCE)
167 flags |= FORCECLOSE;
168 error = ext2_flushfiles(mp, flags, td);
169 if (error == 0 && fs->e2fs_wasvalid &&
170 ext2_cgupdate(ump, MNT_WAIT) == 0) {
171 fs->e2fs->e2fs_state =
172 htole16((le16toh(fs->e2fs->e2fs_state) |
173 E2FS_ISCLEAN));
174 ext2_sbupdate(ump, MNT_WAIT);
175 }
176 fs->e2fs_ronly = 1;
177 vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY);
178 g_topology_lock();
179 g_access(ump->um_cp, 0, -1, 0);
180 g_topology_unlock();
181 }
182 if (!error && (mp->mnt_flag & MNT_RELOAD))
183 error = ext2_reload(mp, td);
184 if (error)
185 return (error);
186 devvp = ump->um_devvp;
187 if (fs->e2fs_ronly && !vfs_flagopt(opts, "ro", NULL, 0)) {
188 if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0))
189 return (EPERM);
190
191 /*
192 * If upgrade to read-write by non-root, then verify
193 * that user has necessary permissions on the device.
194 */
195 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
196 error = VOP_ACCESS(devvp, VREAD | VWRITE,
197 td->td_ucred, td);
198 if (error)
199 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
200 if (error) {
201 VOP_UNLOCK(devvp);
202 return (error);
203 }
204 VOP_UNLOCK(devvp);
205 g_topology_lock();
206 error = g_access(ump->um_cp, 0, 1, 0);
207 g_topology_unlock();
208 if (error)
209 return (error);
210
211 if ((le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN) == 0 ||
212 (le16toh(fs->e2fs->e2fs_state) & E2FS_ERRORS)) {
213 if (mp->mnt_flag & MNT_FORCE) {
214 printf(
215 "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt);
216 } else {
217 printf(
218 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",
219 fs->e2fs_fsmnt);
220 return (EPERM);
221 }
222 }
223 fs->e2fs->e2fs_state =
224 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN);
225 (void)ext2_cgupdate(ump, MNT_WAIT);
226 fs->e2fs_ronly = 0;
227 MNT_ILOCK(mp);
228 mp->mnt_flag &= ~MNT_RDONLY;
229 MNT_IUNLOCK(mp);
230 }
231 if (vfs_flagopt(opts, "export", NULL, 0)) {
232 /* Process export requests in vfs_mount.c. */
233 return (error);
234 }
235 }
236
237 /*
238 * Not an update, or updating the name: look up the name
239 * and verify that it refers to a sensible disk device.
240 */
241 if (fspec == NULL)
242 return (EINVAL);
243 NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
244 if ((error = namei(ndp)) != 0)
245 return (error);
246 NDFREE(ndp, NDF_ONLY_PNBUF);
247 devvp = ndp->ni_vp;
248
249 if (!vn_isdisk_error(devvp, &error)) {
250 vput(devvp);
251 return (error);
252 }
253
254 /*
255 * If mount by non-root, then verify that user has necessary
256 * permissions on the device.
257 *
258 * XXXRW: VOP_ACCESS() enough?
259 */
260 accmode = VREAD;
261 if ((mp->mnt_flag & MNT_RDONLY) == 0)
262 accmode |= VWRITE;
263 error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
264 if (error)
265 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
266 if (error) {
267 vput(devvp);
268 return (error);
269 }
270
271 if ((mp->mnt_flag & MNT_UPDATE) == 0) {
272 error = ext2_mountfs(devvp, mp);
273 } else {
274 if (devvp != ump->um_devvp) {
275 vput(devvp);
276 return (EINVAL); /* needs translation */
277 } else
278 vput(devvp);
279 }
280 if (error) {
281 vrele(devvp);
282 return (error);
283 }
284 ump = VFSTOEXT2(mp);
285 fs = ump->um_e2fs;
286
287 /*
288 * Note that this strncpy() is ok because of a check at the start
289 * of ext2_mount().
290 */
291 strncpy(fs->e2fs_fsmnt, path, MAXMNTLEN);
292 fs->e2fs_fsmnt[MAXMNTLEN - 1] = '\0';
293 vfs_mountedfrom(mp, fspec);
294 return (0);
295 }
296
297 static int
298 ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly)
299 {
300 uint32_t i, mask;
301
302 if (le16toh(es->e2fs_magic) != E2FS_MAGIC) {
303 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n",
304 devtoname(dev), le16toh(es->e2fs_magic), E2FS_MAGIC);
305 return (1);
306 }
307 if (le32toh(es->e2fs_rev) > E2FS_REV0) {
308 mask = le32toh(es->e2fs_features_incompat) & ~(EXT2F_INCOMPAT_SUPP);
309 if (mask) {
310 printf("WARNING: mount of %s denied due to "
311 "unsupported optional features:\n", devtoname(dev));
312 for (i = 0;
313 i < sizeof(incompat)/sizeof(struct ext2_feature);
314 i++)
315 if (mask & incompat[i].mask)
316 printf("%s ", incompat[i].name);
317 printf("\n");
318 return (1);
319 }
320 mask = le32toh(es->e2fs_features_rocompat) & ~EXT2F_ROCOMPAT_SUPP;
321 if (!ronly && mask) {
322 printf("WARNING: R/W mount of %s denied due to "
323 "unsupported optional features:\n", devtoname(dev));
324 for (i = 0;
325 i < sizeof(ro_compat)/sizeof(struct ext2_feature);
326 i++)
327 if (mask & ro_compat[i].mask)
328 printf("%s ", ro_compat[i].name);
329 printf("\n");
330 return (1);
331 }
332 }
333 return (0);
334 }
335
336 static e4fs_daddr_t
337 ext2_cg_location(struct m_ext2fs *fs, int number)
338 {
339 int cg, descpb, logical_sb, has_super = 0;
340
341 /*
342 * Adjust logical superblock block number.
343 * Godmar thinks: if the blocksize is greater than 1024, then
344 * the superblock is logically part of block zero.
345 */
346 logical_sb = fs->e2fs_bsize > SBSIZE ? 0 : 1;
347
348 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) ||
349 number < le32toh(fs->e2fs->e3fs_first_meta_bg))
350 return (logical_sb + number + 1);
351
352 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT))
353 descpb = fs->e2fs_bsize / sizeof(struct ext2_gd);
354 else
355 descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE;
356
357 cg = descpb * number;
358
359 if (ext2_cg_has_sb(fs, cg))
360 has_super = 1;
361
362 return (has_super + cg * (e4fs_daddr_t)EXT2_BLOCKS_PER_GROUP(fs) +
363 le32toh(fs->e2fs->e2fs_first_dblock));
364 }
365
366 static int
367 ext2_cg_validate(struct m_ext2fs *fs)
368 {
369 uint64_t b_bitmap;
370 uint64_t i_bitmap;
371 uint64_t i_tables;
372 uint64_t first_block, last_block, last_cg_block;
373 struct ext2_gd *gd;
374 unsigned int i, cg_count;
375
376 first_block = le32toh(fs->e2fs->e2fs_first_dblock);
377 last_cg_block = ext2_cg_number_gdb(fs, 0);
378 cg_count = fs->e2fs_gcount;
379
380 for (i = 0; i < fs->e2fs_gcount; i++) {
381 gd = &fs->e2fs_gd[i];
382
383 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) ||
384 i == fs->e2fs_gcount - 1) {
385 last_block = fs->e2fs_bcount - 1;
386 } else {
387 last_block = first_block +
388 (EXT2_BLOCKS_PER_GROUP(fs) - 1);
389 }
390
391 if ((cg_count == fs->e2fs_gcount) &&
392 !(le16toh(gd->ext4bgd_flags) & EXT2_BG_INODE_ZEROED))
393 cg_count = i;
394
395 b_bitmap = e2fs_gd_get_b_bitmap(gd);
396 if (b_bitmap == 0) {
397 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
398 "block bitmap is zero", i);
399 return (EINVAL);
400 }
401 if (b_bitmap <= last_cg_block) {
402 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
403 "block bitmap overlaps gds", i);
404 return (EINVAL);
405 }
406 if (b_bitmap < first_block || b_bitmap > last_block) {
407 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
408 "block bitmap not in group", i);
409 return (EINVAL);
410 }
411
412 i_bitmap = e2fs_gd_get_i_bitmap(gd);
413 if (i_bitmap == 0) {
414 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
415 "inode bitmap is zero", i);
416 return (EINVAL);
417 }
418 if (i_bitmap <= last_cg_block) {
419 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
420 "inode bitmap overlaps gds", i);
421 return (EINVAL);
422 }
423 if (i_bitmap < first_block || i_bitmap > last_block) {
424 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
425 "inode bitmap not in group blk", i);
426 return (EINVAL);
427 }
428
429 i_tables = e2fs_gd_get_i_tables(gd);
430 if (i_tables == 0) {
431 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
432 "inode table is zero", i);
433 return (EINVAL);
434 }
435 if (i_tables <= last_cg_block) {
436 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
437 "inode tables overlaps gds", i);
438 return (EINVAL);
439 }
440 if (i_tables < first_block ||
441 i_tables + fs->e2fs_itpg - 1 > last_block) {
442 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
443 "inode tables not in group blk", i);
444 return (EINVAL);
445 }
446
447 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG))
448 first_block += EXT2_BLOCKS_PER_GROUP(fs);
449 }
450
451 return (0);
452 }
453
454 /*
455 * This computes the fields of the m_ext2fs structure from the
456 * data in the ext2fs structure read in.
457 */
458 static int
459 ext2_compute_sb_data(struct vnode *devvp, struct ext2fs *es,
460 struct m_ext2fs *fs)
461 {
462 struct buf *bp;
463 uint32_t e2fs_descpb, e2fs_gdbcount_alloc;
464 int i, j;
465 int g_count = 0;
466 int error;
467
468 /* Check if first dblock is valid */
469 if (fs->e2fs->e2fs_bcount >= 1024 && fs->e2fs->e2fs_first_dblock) {
470 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
471 "first dblock is invalid");
472 return (EINVAL);
473 }
474
475 /* Check checksum features */
476 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) &&
477 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
478 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
479 "incorrect checksum features combination");
480 return (EINVAL);
481 }
482
483 /* Precompute checksum seed for all metadata */
484 ext2_sb_csum_set_seed(fs);
485
486 /* Verify sb csum if possible */
487 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
488 error = ext2_sb_csum_verify(fs);
489 if (error) {
490 return (error);
491 }
492 }
493
494 /* Check for block size = 1K|2K|4K */
495 if (le32toh(es->e2fs_log_bsize) > 2) {
496 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
497 "bad block size");
498 return (EINVAL);
499 }
500
501 fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + le32toh(es->e2fs_log_bsize);
502 fs->e2fs_bsize = 1U << fs->e2fs_bshift;
503 fs->e2fs_fsbtodb = le32toh(es->e2fs_log_bsize) + 1;
504 fs->e2fs_qbmask = fs->e2fs_bsize - 1;
505
506 /* Check for fragment size */
507 if (le32toh(es->e2fs_log_fsize) >
508 (EXT2_MAX_FRAG_LOG_SIZE - EXT2_MIN_BLOCK_LOG_SIZE)) {
509 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
510 "invalid log cluster size");
511 return (EINVAL);
512 }
513
514 fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << le32toh(es->e2fs_log_fsize);
515 if (fs->e2fs_fsize != fs->e2fs_bsize) {
516 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
517 "fragment size != block size");
518 return (EINVAL);
519 }
520
521 fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize;
522
523 /* Check reserved gdt blocks for future filesystem expansion */
524 if (le16toh(es->e2fs_reserved_ngdb) > (fs->e2fs_bsize / 4)) {
525 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
526 "number of reserved GDT blocks too large");
527 return (EINVAL);
528 }
529
530 if (le32toh(es->e2fs_rev) == E2FS_REV0) {
531 fs->e2fs_isize = E2FS_REV0_INODE_SIZE;
532 } else {
533 fs->e2fs_isize = le16toh(es->e2fs_inode_size);
534
535 /*
536 * Check first ino.
537 */
538 if (le32toh(es->e2fs_first_ino) < EXT2_FIRSTINO) {
539 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
540 "invalid first ino");
541 return (EINVAL);
542 }
543
544 /*
545 * Simple sanity check for superblock inode size value.
546 */
547 if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE ||
548 EXT2_INODE_SIZE(fs) > fs->e2fs_bsize ||
549 (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) {
550 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
551 "invalid inode size");
552 return (EINVAL);
553 }
554 }
555
556 /* Check group descriptors */
557 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT) &&
558 le16toh(es->e3fs_desc_size) != E2FS_64BIT_GD_SIZE) {
559 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
560 "unsupported 64bit descriptor size");
561 return (EINVAL);
562 }
563
564 fs->e2fs_bpg = le32toh(es->e2fs_bpg);
565 fs->e2fs_fpg = le32toh(es->e2fs_fpg);
566 if (fs->e2fs_bpg == 0 || fs->e2fs_fpg == 0) {
567 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
568 "zero blocks/fragments per group");
569 return (EINVAL);
570 } else if (fs->e2fs_bpg != fs->e2fs_fpg) {
571 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
572 "blocks per group not equal fragments per group");
573 return (EINVAL);
574 }
575
576 if (fs->e2fs_bpg != fs->e2fs_bsize * 8) {
577 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
578 "non-standard group size unsupported");
579 return (EINVAL);
580 }
581
582 fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs);
583 if (fs->e2fs_ipb == 0 ||
584 fs->e2fs_ipb > fs->e2fs_bsize / E2FS_REV0_INODE_SIZE) {
585 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
586 "bad inodes per block size");
587 return (EINVAL);
588 }
589
590 fs->e2fs_ipg = le32toh(es->e2fs_ipg);
591 if (fs->e2fs_ipg < fs->e2fs_ipb || fs->e2fs_ipg > fs->e2fs_bsize * 8) {
592 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
593 "invalid inodes per group");
594 return (EINVAL);
595 }
596
597 fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb;
598
599 fs->e2fs_bcount = le32toh(es->e2fs_bcount);
600 fs->e2fs_rbcount = le32toh(es->e2fs_rbcount);
601 fs->e2fs_fbcount = le32toh(es->e2fs_fbcount);
602 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
603 fs->e2fs_bcount |= (uint64_t)(le32toh(es->e4fs_bcount_hi)) << 32;
604 fs->e2fs_rbcount |= (uint64_t)(le32toh(es->e4fs_rbcount_hi)) << 32;
605 fs->e2fs_fbcount |= (uint64_t)(le32toh(es->e4fs_fbcount_hi)) << 32;
606 }
607 if (fs->e2fs_rbcount > fs->e2fs_bcount ||
608 fs->e2fs_fbcount > fs->e2fs_bcount) {
609 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
610 "invalid block count");
611 return (EINVAL);
612 }
613
614 fs->e2fs_ficount = le32toh(es->e2fs_ficount);
615 if (fs->e2fs_ficount > le32toh(es->e2fs_icount)) {
616 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
617 "invalid number of free inodes");
618 return (EINVAL);
619 }
620
621 if (le32toh(es->e2fs_first_dblock) != (fs->e2fs_bsize > 1024 ? 0 : 1) ||
622 le32toh(es->e2fs_first_dblock) >= fs->e2fs_bcount) {
623 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
624 "first data block out of range");
625 return (EINVAL);
626 }
627
628 fs->e2fs_gcount = howmany(fs->e2fs_bcount -
629 le32toh(es->e2fs_first_dblock), EXT2_BLOCKS_PER_GROUP(fs));
630 if (fs->e2fs_gcount > ((uint64_t)1 << 32) - EXT2_DESCS_PER_BLOCK(fs)) {
631 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
632 "groups count too large");
633 return (EINVAL);
634 }
635
636 /* Check for extra isize in big inodes. */
637 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) &&
638 EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) {
639 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
640 "no space for extra inode timestamps");
641 return (EINVAL);
642 }
643
644 /* s_resuid / s_resgid ? */
645
646 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
647 e2fs_descpb = fs->e2fs_bsize / E2FS_64BIT_GD_SIZE;
648 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, e2fs_descpb);
649 } else {
650 e2fs_descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE;
651 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount,
652 fs->e2fs_bsize / sizeof(struct ext2_gd));
653 }
654 fs->e2fs_gdbcount = howmany(fs->e2fs_gcount, e2fs_descpb);
655 fs->e2fs_gd = malloc(e2fs_gdbcount_alloc * fs->e2fs_bsize,
656 M_EXT2MNT, M_WAITOK | M_ZERO);
657 fs->e2fs_contigdirs = malloc(fs->e2fs_gcount *
658 sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO);
659
660 for (i = 0; i < fs->e2fs_gdbcount; i++) {
661 error = bread(devvp,
662 fsbtodb(fs, ext2_cg_location(fs, i)),
663 fs->e2fs_bsize, NOCRED, &bp);
664 if (error) {
665 /*
666 * fs->e2fs_gd and fs->e2fs_contigdirs
667 * will be freed later by the caller,
668 * because this function could be called from
669 * MNT_UPDATE path.
670 */
671 return (error);
672 }
673 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
674 memcpy(&fs->e2fs_gd[
675 i * fs->e2fs_bsize / sizeof(struct ext2_gd)],
676 bp->b_data, fs->e2fs_bsize);
677 } else {
678 for (j = 0; j < e2fs_descpb &&
679 g_count < fs->e2fs_gcount; j++, g_count++)
680 memcpy(&fs->e2fs_gd[g_count],
681 bp->b_data + j * E2FS_REV0_GD_SIZE,
682 E2FS_REV0_GD_SIZE);
683 }
684 brelse(bp);
685 bp = NULL;
686 }
687
688 /* Validate cgs consistency */
689 error = ext2_cg_validate(fs);
690 if (error)
691 return (error);
692
693 /* Verfy cgs csum */
694 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) ||
695 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
696 error = ext2_gd_csum_verify(fs, devvp->v_rdev);
697 if (error)
698 return (error);
699 }
700 /* Initialization for the ext2 Orlov allocator variant. */
701 fs->e2fs_total_dir = 0;
702 for (i = 0; i < fs->e2fs_gcount; i++)
703 fs->e2fs_total_dir += e2fs_gd_get_ndirs(&fs->e2fs_gd[i]);
704
705 if (le32toh(es->e2fs_rev) == E2FS_REV0 ||
706 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE))
707 fs->e2fs_maxfilesize = 0x7fffffff;
708 else {
709 fs->e2fs_maxfilesize = 0xffffffffffff;
710 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE))
711 fs->e2fs_maxfilesize = 0x7fffffffffffffff;
712 }
713 if (le32toh(es->e4fs_flags) & E2FS_UNSIGNED_HASH) {
714 fs->e2fs_uhash = 3;
715 } else if ((le32toh(es->e4fs_flags) & E2FS_SIGNED_HASH) == 0) {
716 #ifdef __CHAR_UNSIGNED__
717 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_UNSIGNED_HASH);
718 fs->e2fs_uhash = 3;
719 #else
720 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_SIGNED_HASH);
721 #endif
722 }
723 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM))
724 error = ext2_sb_csum_verify(fs);
725
726 return (error);
727 }
728
729 /*
730 * Reload all incore data for a filesystem (used after running fsck on
731 * the root filesystem and finding things to fix). The filesystem must
732 * be mounted read-only.
733 *
734 * Things to do to update the mount:
735 * 1) invalidate all cached meta-data.
736 * 2) re-read superblock from disk.
737 * 3) invalidate all cluster summary information.
738 * 4) invalidate all inactive vnodes.
739 * 5) invalidate all cached file data.
740 * 6) re-read inode data for all active vnodes.
741 * XXX we are missing some steps, in particular # 3, this has to be reviewed.
742 */
743 static int
744 ext2_reload(struct mount *mp, struct thread *td)
745 {
746 struct vnode *vp, *mvp, *devvp;
747 struct inode *ip;
748 struct buf *bp;
749 struct ext2fs *es;
750 struct m_ext2fs *fs;
751 struct csum *sump;
752 int error, i;
753 int32_t *lp;
754
755 if ((mp->mnt_flag & MNT_RDONLY) == 0)
756 return (EINVAL);
757 /*
758 * Step 1: invalidate all cached meta-data.
759 */
760 devvp = VFSTOEXT2(mp)->um_devvp;
761 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
762 if (vinvalbuf(devvp, 0, 0, 0) != 0)
763 panic("ext2_reload: dirty1");
764 VOP_UNLOCK(devvp);
765
766 /*
767 * Step 2: re-read superblock from disk.
768 * constants have been adjusted for ext2
769 */
770 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
771 return (error);
772 es = (struct ext2fs *)bp->b_data;
773 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) {
774 brelse(bp);
775 return (EIO); /* XXX needs translation */
776 }
777 fs = VFSTOEXT2(mp)->um_e2fs;
778 bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs));
779
780 if ((error = ext2_compute_sb_data(devvp, es, fs)) != 0) {
781 brelse(bp);
782 return (error);
783 }
784 #ifdef UNKLAR
785 if (fs->fs_sbsize < SBSIZE)
786 bp->b_flags |= B_INVAL;
787 #endif
788 brelse(bp);
789
790 /*
791 * Step 3: invalidate all cluster summary information.
792 */
793 if (fs->e2fs_contigsumsize > 0) {
794 lp = fs->e2fs_maxcluster;
795 sump = fs->e2fs_clustersum;
796 for (i = 0; i < fs->e2fs_gcount; i++, sump++) {
797 *lp++ = fs->e2fs_contigsumsize;
798 sump->cs_init = 0;
799 bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1);
800 }
801 }
802
803 loop:
804 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
805 /*
806 * Step 4: invalidate all cached file data.
807 */
808 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
809 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
810 goto loop;
811 }
812 if (vinvalbuf(vp, 0, 0, 0))
813 panic("ext2_reload: dirty2");
814
815 /*
816 * Step 5: re-read inode data for all active vnodes.
817 */
818 ip = VTOI(vp);
819 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
820 (int)fs->e2fs_bsize, NOCRED, &bp);
821 if (error) {
822 VOP_UNLOCK(vp);
823 vrele(vp);
824 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
825 return (error);
826 }
827
828 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data +
829 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)), ip);
830
831 brelse(bp);
832 VOP_UNLOCK(vp);
833 vrele(vp);
834
835 if (error) {
836 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
837 return (error);
838 }
839 }
840 return (0);
841 }
842
843 /*
844 * Common code for mount and mountroot.
845 */
846 static int
847 ext2_mountfs(struct vnode *devvp, struct mount *mp)
848 {
849 struct ext2mount *ump;
850 struct buf *bp;
851 struct m_ext2fs *fs;
852 struct ext2fs *es;
853 struct cdev *dev = devvp->v_rdev;
854 struct g_consumer *cp;
855 struct bufobj *bo;
856 struct csum *sump;
857 int error;
858 int ronly;
859 int i;
860 u_long size;
861 int32_t *lp;
862 int32_t e2fs_maxcontig;
863
864 ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0);
865 /* XXX: use VOP_ACESS to check FS perms */
866 g_topology_lock();
867 error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1);
868 g_topology_unlock();
869 VOP_UNLOCK(devvp);
870 if (error)
871 return (error);
872
873 /* XXX: should we check for some sectorsize or 512 instead? */
874 if (((SBSIZE % cp->provider->sectorsize) != 0) ||
875 (SBSIZE < cp->provider->sectorsize)) {
876 g_topology_lock();
877 g_vfs_close(cp);
878 g_topology_unlock();
879 return (EINVAL);
880 }
881
882 bo = &devvp->v_bufobj;
883 bo->bo_private = cp;
884 bo->bo_ops = g_vfs_bufops;
885 if (devvp->v_rdev->si_iosize_max != 0)
886 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
887 if (mp->mnt_iosize_max > maxphys)
888 mp->mnt_iosize_max = maxphys;
889
890 bp = NULL;
891 ump = NULL;
892 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
893 goto out;
894 es = (struct ext2fs *)bp->b_data;
895 if (ext2_check_sb_compat(es, dev, ronly) != 0) {
896 error = EINVAL; /* XXX needs translation */
897 goto out;
898 }
899 if ((le16toh(es->e2fs_state) & E2FS_ISCLEAN) == 0 ||
900 (le16toh(es->e2fs_state) & E2FS_ERRORS)) {
901 if (ronly || (mp->mnt_flag & MNT_FORCE)) {
902 printf(
903 "WARNING: Filesystem was not properly dismounted\n");
904 } else {
905 printf(
906 "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n");
907 error = EPERM;
908 goto out;
909 }
910 }
911 ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO);
912
913 /*
914 * I don't know whether this is the right strategy. Note that
915 * we dynamically allocate both an m_ext2fs and an ext2fs
916 * while Linux keeps the super block in a locked buffer.
917 */
918 ump->um_e2fs = malloc(sizeof(struct m_ext2fs),
919 M_EXT2MNT, M_WAITOK | M_ZERO);
920 ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs),
921 M_EXT2MNT, M_WAITOK);
922 mtx_init(EXT2_MTX(ump), "EXT2FS", "EXT2FS Lock", MTX_DEF);
923 bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs));
924 if ((error = ext2_compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs)))
925 goto out;
926
927 /*
928 * Calculate the maximum contiguous blocks and size of cluster summary
929 * array. In FFS this is done by newfs; however, the superblock
930 * in ext2fs doesn't have these variables, so we can calculate
931 * them here.
932 */
933 e2fs_maxcontig = MAX(1, maxphys / ump->um_e2fs->e2fs_bsize);
934 ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG);
935 ump->um_e2fs->e2fs_maxsymlinklen = EXT2_MAXSYMLINKLEN;
936 if (ump->um_e2fs->e2fs_contigsumsize > 0) {
937 size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t);
938 ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK);
939 size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum);
940 ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK);
941 lp = ump->um_e2fs->e2fs_maxcluster;
942 sump = ump->um_e2fs->e2fs_clustersum;
943 for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) {
944 *lp++ = ump->um_e2fs->e2fs_contigsumsize;
945 sump->cs_init = 0;
946 sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) *
947 sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO);
948 }
949 }
950
951 brelse(bp);
952 bp = NULL;
953 fs = ump->um_e2fs;
954 fs->e2fs_ronly = ronly; /* ronly is set according to mnt_flags */
955
956 /*
957 * If the fs is not mounted read-only, make sure the super block is
958 * always written back on a sync().
959 */
960 fs->e2fs_wasvalid = le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN ? 1 : 0;
961 if (ronly == 0) {
962 fs->e2fs_fmod = 1; /* mark it modified and set fs invalid */
963 fs->e2fs->e2fs_state =
964 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN);
965 }
966 mp->mnt_data = ump;
967 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev);
968 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
969 MNT_ILOCK(mp);
970 mp->mnt_flag |= MNT_LOCAL;
971 MNT_IUNLOCK(mp);
972 ump->um_mountp = mp;
973 ump->um_dev = dev;
974 ump->um_devvp = devvp;
975 ump->um_bo = &devvp->v_bufobj;
976 ump->um_cp = cp;
977
978 /*
979 * Setting those two parameters allowed us to use
980 * ufs_bmap w/o changse!
981 */
982 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs);
983 ump->um_bptrtodb = le32toh(fs->e2fs->e2fs_log_bsize) + 1;
984 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs);
985 if (ronly == 0)
986 ext2_sbupdate(ump, MNT_WAIT);
987 /*
988 * Initialize filesystem stat information in mount struct.
989 */
990 MNT_ILOCK(mp);
991 mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED |
992 MNTK_USES_BCACHE;
993 MNT_IUNLOCK(mp);
994 return (0);
995 out:
996 if (bp)
997 brelse(bp);
998 if (cp != NULL) {
999 g_topology_lock();
1000 g_vfs_close(cp);
1001 g_topology_unlock();
1002 }
1003 if (ump) {
1004 mtx_destroy(EXT2_MTX(ump));
1005 free(ump->um_e2fs->e2fs_gd, M_EXT2MNT);
1006 free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT);
1007 free(ump->um_e2fs->e2fs, M_EXT2MNT);
1008 free(ump->um_e2fs, M_EXT2MNT);
1009 free(ump, M_EXT2MNT);
1010 mp->mnt_data = NULL;
1011 }
1012 return (error);
1013 }
1014
1015 /*
1016 * Unmount system call.
1017 */
1018 static int
1019 ext2_unmount(struct mount *mp, int mntflags)
1020 {
1021 struct ext2mount *ump;
1022 struct m_ext2fs *fs;
1023 struct csum *sump;
1024 int error, flags, i, ronly;
1025
1026 flags = 0;
1027 if (mntflags & MNT_FORCE) {
1028 if (mp->mnt_flag & MNT_ROOTFS)
1029 return (EINVAL);
1030 flags |= FORCECLOSE;
1031 }
1032 if ((error = ext2_flushfiles(mp, flags, curthread)) != 0)
1033 return (error);
1034 ump = VFSTOEXT2(mp);
1035 fs = ump->um_e2fs;
1036 ronly = fs->e2fs_ronly;
1037 if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) {
1038 if (fs->e2fs_wasvalid)
1039 fs->e2fs->e2fs_state =
1040 htole16(le16toh(fs->e2fs->e2fs_state) | E2FS_ISCLEAN);
1041 ext2_sbupdate(ump, MNT_WAIT);
1042 }
1043
1044 g_topology_lock();
1045 g_vfs_close(ump->um_cp);
1046 g_topology_unlock();
1047 vrele(ump->um_devvp);
1048 sump = fs->e2fs_clustersum;
1049 for (i = 0; i < fs->e2fs_gcount; i++, sump++)
1050 free(sump->cs_sum, M_EXT2MNT);
1051 free(fs->e2fs_clustersum, M_EXT2MNT);
1052 free(fs->e2fs_maxcluster, M_EXT2MNT);
1053 free(fs->e2fs_gd, M_EXT2MNT);
1054 free(fs->e2fs_contigdirs, M_EXT2MNT);
1055 free(fs->e2fs, M_EXT2MNT);
1056 free(fs, M_EXT2MNT);
1057 free(ump, M_EXT2MNT);
1058 mp->mnt_data = NULL;
1059 MNT_ILOCK(mp);
1060 mp->mnt_flag &= ~MNT_LOCAL;
1061 MNT_IUNLOCK(mp);
1062 return (error);
1063 }
1064
1065 /*
1066 * Flush out all the files in a filesystem.
1067 */
1068 static int
1069 ext2_flushfiles(struct mount *mp, int flags, struct thread *td)
1070 {
1071 int error;
1072
1073 error = vflush(mp, 0, flags, td);
1074 return (error);
1075 }
1076
1077 /*
1078 * Get filesystem statistics.
1079 */
1080 int
1081 ext2_statfs(struct mount *mp, struct statfs *sbp)
1082 {
1083 struct ext2mount *ump;
1084 struct m_ext2fs *fs;
1085 uint32_t overhead, overhead_per_group, ngdb;
1086 int i, ngroups;
1087
1088 ump = VFSTOEXT2(mp);
1089 fs = ump->um_e2fs;
1090 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC)
1091 panic("ext2_statfs");
1092
1093 /*
1094 * Compute the overhead (FS structures)
1095 */
1096 overhead_per_group =
1097 1 /* block bitmap */ +
1098 1 /* inode bitmap */ +
1099 fs->e2fs_itpg;
1100 overhead = le32toh(fs->e2fs->e2fs_first_dblock) +
1101 fs->e2fs_gcount * overhead_per_group;
1102 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 &&
1103 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) {
1104 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) {
1105 if (ext2_cg_has_sb(fs, i))
1106 ngroups++;
1107 }
1108 } else {
1109 ngroups = fs->e2fs_gcount;
1110 }
1111 ngdb = fs->e2fs_gdbcount;
1112 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 &&
1113 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE)
1114 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb);
1115 overhead += ngroups * (1 /* superblock */ + ngdb);
1116
1117 sbp->f_bsize = EXT2_FRAG_SIZE(fs);
1118 sbp->f_iosize = EXT2_BLOCK_SIZE(fs);
1119 sbp->f_blocks = fs->e2fs_bcount - overhead;
1120 sbp->f_bfree = fs->e2fs_fbcount;
1121 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount;
1122 sbp->f_files = le32toh(fs->e2fs->e2fs_icount);
1123 sbp->f_ffree = fs->e2fs_ficount;
1124 return (0);
1125 }
1126
1127 /*
1128 * Go through the disk queues to initiate sandbagged IO;
1129 * go through the inodes to write those that have been modified;
1130 * initiate the writing of the super block if it has been modified.
1131 *
1132 * Note: we are always called with the filesystem marked `MPBUSY'.
1133 */
1134 static int
1135 ext2_sync(struct mount *mp, int waitfor)
1136 {
1137 struct vnode *mvp, *vp;
1138 struct thread *td;
1139 struct inode *ip;
1140 struct ext2mount *ump = VFSTOEXT2(mp);
1141 struct m_ext2fs *fs;
1142 int error, allerror = 0;
1143
1144 td = curthread;
1145 fs = ump->um_e2fs;
1146 if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */
1147 panic("ext2_sync: rofs mod fs=%s", fs->e2fs_fsmnt);
1148 }
1149
1150 /*
1151 * Write back each (modified) inode.
1152 */
1153 loop:
1154 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1155 if (vp->v_type == VNON) {
1156 VI_UNLOCK(vp);
1157 continue;
1158 }
1159 ip = VTOI(vp);
1160 if ((ip->i_flag &
1161 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1162 (vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1163 waitfor == MNT_LAZY)) {
1164 VI_UNLOCK(vp);
1165 continue;
1166 }
1167 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
1168 if (error) {
1169 if (error == ENOENT) {
1170 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1171 goto loop;
1172 }
1173 continue;
1174 }
1175 if ((error = VOP_FSYNC(vp, waitfor, td)) != 0)
1176 allerror = error;
1177 VOP_UNLOCK(vp);
1178 vrele(vp);
1179 }
1180
1181 /*
1182 * Force stale filesystem control information to be flushed.
1183 */
1184 if (waitfor != MNT_LAZY) {
1185 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1186 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0)
1187 allerror = error;
1188 VOP_UNLOCK(ump->um_devvp);
1189 }
1190
1191 /*
1192 * Write back modified superblock.
1193 */
1194 if (fs->e2fs_fmod != 0) {
1195 fs->e2fs_fmod = 0;
1196 fs->e2fs->e2fs_wtime = htole32(time_second);
1197 if ((error = ext2_cgupdate(ump, waitfor)) != 0)
1198 allerror = error;
1199 }
1200 return (allerror);
1201 }
1202
1203 /*
1204 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it
1205 * in from disk. If it is in core, wait for the lock bit to clear, then
1206 * return the inode locked. Detection and handling of mount points must be
1207 * done by the calling routine.
1208 */
1209 static int
1210 ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
1211 {
1212 struct m_ext2fs *fs;
1213 struct inode *ip;
1214 struct ext2mount *ump;
1215 struct buf *bp;
1216 struct vnode *vp;
1217 struct thread *td;
1218 unsigned int i, used_blocks;
1219 int error;
1220
1221 td = curthread;
1222 error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL);
1223 if (error || *vpp != NULL)
1224 return (error);
1225
1226 ump = VFSTOEXT2(mp);
1227 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO);
1228
1229 /* Allocate a new vnode/inode. */
1230 if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) {
1231 *vpp = NULL;
1232 free(ip, M_EXT2NODE);
1233 return (error);
1234 }
1235 vp->v_data = ip;
1236 ip->i_vnode = vp;
1237 ip->i_e2fs = fs = ump->um_e2fs;
1238 ip->i_ump = ump;
1239 ip->i_number = ino;
1240
1241 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
1242 error = insmntque(vp, mp);
1243 if (error != 0) {
1244 free(ip, M_EXT2NODE);
1245 *vpp = NULL;
1246 return (error);
1247 }
1248 error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL);
1249 if (error || *vpp != NULL)
1250 return (error);
1251
1252 /* Read in the disk contents for the inode, copy into the inode. */
1253 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1254 (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) {
1255 /*
1256 * The inode does not contain anything useful, so it would
1257 * be misleading to leave it on its hash chain. With mode
1258 * still zero, it will be unlinked and returned to the free
1259 * list by vput().
1260 */
1261 brelse(bp);
1262 vput(vp);
1263 *vpp = NULL;
1264 return (error);
1265 }
1266 /* convert ext2 inode to dinode */
1267 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data +
1268 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ino)), ip);
1269 if (error) {
1270 brelse(bp);
1271 vput(vp);
1272 *vpp = NULL;
1273 return (error);
1274 }
1275 ip->i_block_group = ino_to_cg(fs, ino);
1276 ip->i_next_alloc_block = 0;
1277 ip->i_next_alloc_goal = 0;
1278
1279 /*
1280 * Now we want to make sure that block pointers for unused
1281 * blocks are zeroed out - ext2_balloc depends on this
1282 * although for regular files and directories only
1283 *
1284 * If IN_E4EXTENTS is enabled, unused blocks are not zeroed
1285 * out because we could corrupt the extent tree.
1286 */
1287 if (!(ip->i_flag & IN_E4EXTENTS) &&
1288 (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) {
1289 used_blocks = howmany(ip->i_size, fs->e2fs_bsize);
1290 for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++)
1291 ip->i_db[i] = 0;
1292 }
1293 #ifdef EXT2FS_PRINT_EXTENTS
1294 ext2_print_inode(ip);
1295 ext4_ext_print_extent_tree_status(ip);
1296 #endif
1297 bqrelse(bp);
1298
1299 /*
1300 * Initialize the vnode from the inode, check for aliases.
1301 * Note that the underlying vnode may have changed.
1302 */
1303 if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) {
1304 vput(vp);
1305 *vpp = NULL;
1306 return (error);
1307 }
1308
1309 /*
1310 * Finish inode initialization.
1311 */
1312
1313 *vpp = vp;
1314 return (0);
1315 }
1316
1317 /*
1318 * File handle to vnode
1319 *
1320 * Have to be really careful about stale file handles:
1321 * - check that the inode number is valid
1322 * - call ext2_vget() to get the locked inode
1323 * - check for an unallocated inode (i_mode == 0)
1324 * - check that the given client host has export rights and return
1325 * those rights via. exflagsp and credanonp
1326 */
1327 static int
1328 ext2_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
1329 {
1330 struct inode *ip;
1331 struct ufid *ufhp;
1332 struct vnode *nvp;
1333 struct m_ext2fs *fs;
1334 int error;
1335
1336 ufhp = (struct ufid *)fhp;
1337 fs = VFSTOEXT2(mp)->um_e2fs;
1338 if (ufhp->ufid_ino < EXT2_ROOTINO ||
1339 ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs_ipg)
1340 return (ESTALE);
1341
1342 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp);
1343 if (error) {
1344 *vpp = NULLVP;
1345 return (error);
1346 }
1347 ip = VTOI(nvp);
1348 if (ip->i_mode == 0 ||
1349 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) {
1350 vput(nvp);
1351 *vpp = NULLVP;
1352 return (ESTALE);
1353 }
1354 *vpp = nvp;
1355 vnode_create_vobject(*vpp, 0, curthread);
1356 return (0);
1357 }
1358
1359 /*
1360 * Write a superblock and associated information back to disk.
1361 */
1362 static int
1363 ext2_sbupdate(struct ext2mount *mp, int waitfor)
1364 {
1365 struct m_ext2fs *fs = mp->um_e2fs;
1366 struct ext2fs *es = fs->e2fs;
1367 struct buf *bp;
1368 int error = 0;
1369
1370 es->e2fs_bcount = htole32(fs->e2fs_bcount & 0xffffffff);
1371 es->e2fs_rbcount = htole32(fs->e2fs_rbcount & 0xffffffff);
1372 es->e2fs_fbcount = htole32(fs->e2fs_fbcount & 0xffffffff);
1373 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
1374 es->e4fs_bcount_hi = htole32(fs->e2fs_bcount >> 32);
1375 es->e4fs_rbcount_hi = htole32(fs->e2fs_rbcount >> 32);
1376 es->e4fs_fbcount_hi = htole32(fs->e2fs_fbcount >> 32);
1377 }
1378
1379 es->e2fs_ficount = htole32(fs->e2fs_ficount);
1380
1381 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM))
1382 ext2_sb_csum_set(fs);
1383
1384 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0);
1385 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2fs));
1386 if (waitfor == MNT_WAIT)
1387 error = bwrite(bp);
1388 else
1389 bawrite(bp);
1390
1391 /*
1392 * The buffers for group descriptors, inode bitmaps and block bitmaps
1393 * are not busy at this point and are (hopefully) written by the
1394 * usual sync mechanism. No need to write them here.
1395 */
1396 return (error);
1397 }
1398 int
1399 ext2_cgupdate(struct ext2mount *mp, int waitfor)
1400 {
1401 struct m_ext2fs *fs = mp->um_e2fs;
1402 struct buf *bp;
1403 int i, j, g_count = 0, error = 0, allerror = 0;
1404
1405 allerror = ext2_sbupdate(mp, waitfor);
1406
1407 /* Update gd csums */
1408 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) ||
1409 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM))
1410 ext2_gd_csum_set(fs);
1411
1412 for (i = 0; i < fs->e2fs_gdbcount; i++) {
1413 bp = getblk(mp->um_devvp, fsbtodb(fs,
1414 ext2_cg_location(fs, i)),
1415 fs->e2fs_bsize, 0, 0, 0);
1416 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
1417 memcpy(bp->b_data, &fs->e2fs_gd[
1418 i * fs->e2fs_bsize / sizeof(struct ext2_gd)],
1419 fs->e2fs_bsize);
1420 } else {
1421 for (j = 0; j < fs->e2fs_bsize / E2FS_REV0_GD_SIZE &&
1422 g_count < fs->e2fs_gcount; j++, g_count++)
1423 memcpy(bp->b_data + j * E2FS_REV0_GD_SIZE,
1424 &fs->e2fs_gd[g_count], E2FS_REV0_GD_SIZE);
1425 }
1426 if (waitfor == MNT_WAIT)
1427 error = bwrite(bp);
1428 else
1429 bawrite(bp);
1430 }
1431
1432 if (!allerror && error)
1433 allerror = error;
1434 return (allerror);
1435 }
1436
1437 /*
1438 * Return the root of a filesystem.
1439 */
1440 static int
1441 ext2_root(struct mount *mp, int flags, struct vnode **vpp)
1442 {
1443 struct vnode *nvp;
1444 int error;
1445
1446 error = VFS_VGET(mp, EXT2_ROOTINO, LK_EXCLUSIVE, &nvp);
1447 if (error)
1448 return (error);
1449 *vpp = nvp;
1450 return (0);
1451 }
Cache object: caa7ef30b5c792504051d4153b2ec4dd
|