1 /*-
2 * modified for EXT2FS support in Lites 1.1
3 *
4 * Aug 1995, Godmar Back (gback@cs.utah.edu)
5 * University of Utah, Department of Computer Science
6 */
7 /*-
8 * SPDX-License-Identifier: BSD-3-Clause
9 *
10 * Copyright (c) 1989, 1991, 1993, 1994
11 * The Regents of the University of California. All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94
38 * $FreeBSD$
39 */
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/namei.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/kernel.h>
47 #include <sys/vnode.h>
48 #include <sys/mount.h>
49 #include <sys/bio.h>
50 #include <sys/buf.h>
51 #include <sys/conf.h>
52 #include <sys/endian.h>
53 #include <sys/fcntl.h>
54 #include <sys/malloc.h>
55 #include <sys/sdt.h>
56 #include <sys/stat.h>
57 #include <sys/mutex.h>
58
59 #include <geom/geom.h>
60 #include <geom/geom_vfs.h>
61
62 #include <fs/ext2fs/fs.h>
63 #include <fs/ext2fs/ext2_mount.h>
64 #include <fs/ext2fs/inode.h>
65
66 #include <fs/ext2fs/ext2fs.h>
67 #include <fs/ext2fs/ext2_dinode.h>
68 #include <fs/ext2fs/ext2_extern.h>
69 #include <fs/ext2fs/ext2_extents.h>
70
71 SDT_PROVIDER_DECLARE(ext2fs);
72 /*
73 * ext2fs trace probe:
74 * arg0: verbosity. Higher numbers give more verbose messages
75 * arg1: Textual message
76 */
77 SDT_PROBE_DEFINE2(ext2fs, , vfsops, trace, "int", "char*");
78 SDT_PROBE_DEFINE2(ext2fs, , vfsops, ext2_cg_validate_error, "char*", "int");
79 SDT_PROBE_DEFINE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "char*");
80
81 static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td);
82 static int ext2_mountfs(struct vnode *, struct mount *);
83 static int ext2_reload(struct mount *mp, struct thread *td);
84 static int ext2_sbupdate(struct ext2mount *, int);
85 static int ext2_cgupdate(struct ext2mount *, int);
86 static vfs_unmount_t ext2_unmount;
87 static vfs_root_t ext2_root;
88 static vfs_statfs_t ext2_statfs;
89 static vfs_sync_t ext2_sync;
90 static vfs_vget_t ext2_vget;
91 static vfs_fhtovp_t ext2_fhtovp;
92 static vfs_mount_t ext2_mount;
93
94 MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part");
95 static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure");
96
97 static struct vfsops ext2fs_vfsops = {
98 .vfs_fhtovp = ext2_fhtovp,
99 .vfs_mount = ext2_mount,
100 .vfs_root = ext2_root, /* root inode via vget */
101 .vfs_statfs = ext2_statfs,
102 .vfs_sync = ext2_sync,
103 .vfs_unmount = ext2_unmount,
104 .vfs_vget = ext2_vget,
105 };
106
107 VFS_SET(ext2fs_vfsops, ext2fs, 0);
108
109 static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev,
110 int ronly);
111 static int ext2_compute_sb_data(struct vnode * devvp,
112 struct ext2fs * es, struct m_ext2fs * fs);
113
114 static const char *ext2_opts[] = { "acls", "async", "noatime", "noclusterr",
115 "noclusterw", "noexec", "export", "force", "from", "multilabel",
116 "suiddir", "nosymfollow", "sync", "union", NULL };
117
118 /*
119 * VFS Operations.
120 *
121 * mount system call
122 */
123 static int
124 ext2_mount(struct mount *mp)
125 {
126 struct vfsoptlist *opts;
127 struct vnode *devvp;
128 struct thread *td;
129 struct ext2mount *ump = NULL;
130 struct m_ext2fs *fs;
131 struct nameidata nd, *ndp = &nd;
132 accmode_t accmode;
133 char *path, *fspec;
134 int error, flags, len;
135
136 td = curthread;
137 opts = mp->mnt_optnew;
138
139 if (vfs_filteropt(opts, ext2_opts))
140 return (EINVAL);
141
142 vfs_getopt(opts, "fspath", (void **)&path, NULL);
143 /* Double-check the length of path.. */
144 if (strlen(path) >= MAXMNTLEN)
145 return (ENAMETOOLONG);
146
147 fspec = NULL;
148 error = vfs_getopt(opts, "from", (void **)&fspec, &len);
149 if (!error && fspec[len - 1] != '\0')
150 return (EINVAL);
151
152 /*
153 * If updating, check whether changing from read-only to
154 * read/write; if there is no device name, that's all we do.
155 */
156 if (mp->mnt_flag & MNT_UPDATE) {
157 ump = VFSTOEXT2(mp);
158 fs = ump->um_e2fs;
159 error = 0;
160 if (fs->e2fs_ronly == 0 &&
161 vfs_flagopt(opts, "ro", NULL, 0)) {
162 error = VFS_SYNC(mp, MNT_WAIT);
163 if (error)
164 return (error);
165 flags = WRITECLOSE;
166 if (mp->mnt_flag & MNT_FORCE)
167 flags |= FORCECLOSE;
168 error = ext2_flushfiles(mp, flags, td);
169 if (error == 0 && fs->e2fs_wasvalid &&
170 ext2_cgupdate(ump, MNT_WAIT) == 0) {
171 fs->e2fs->e2fs_state =
172 htole16((le16toh(fs->e2fs->e2fs_state) |
173 E2FS_ISCLEAN));
174 ext2_sbupdate(ump, MNT_WAIT);
175 }
176 fs->e2fs_ronly = 1;
177 vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY);
178 g_topology_lock();
179 g_access(ump->um_cp, 0, -1, 0);
180 g_topology_unlock();
181 }
182 if (!error && (mp->mnt_flag & MNT_RELOAD))
183 error = ext2_reload(mp, td);
184 if (error)
185 return (error);
186 devvp = ump->um_devvp;
187 if (fs->e2fs_ronly && !vfs_flagopt(opts, "ro", NULL, 0)) {
188 if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0))
189 return (EPERM);
190
191 /*
192 * If upgrade to read-write by non-root, then verify
193 * that user has necessary permissions on the device.
194 */
195 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
196 error = VOP_ACCESS(devvp, VREAD | VWRITE,
197 td->td_ucred, td);
198 if (error)
199 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
200 if (error) {
201 VOP_UNLOCK(devvp);
202 return (error);
203 }
204 VOP_UNLOCK(devvp);
205 g_topology_lock();
206 error = g_access(ump->um_cp, 0, 1, 0);
207 g_topology_unlock();
208 if (error)
209 return (error);
210
211 if ((le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN) == 0 ||
212 (le16toh(fs->e2fs->e2fs_state) & E2FS_ERRORS)) {
213 if (mp->mnt_flag & MNT_FORCE) {
214 printf(
215 "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt);
216 } else {
217 printf(
218 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",
219 fs->e2fs_fsmnt);
220 return (EPERM);
221 }
222 }
223 fs->e2fs->e2fs_state =
224 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN);
225 (void)ext2_cgupdate(ump, MNT_WAIT);
226 fs->e2fs_ronly = 0;
227 MNT_ILOCK(mp);
228 mp->mnt_flag &= ~MNT_RDONLY;
229 MNT_IUNLOCK(mp);
230 }
231 if (vfs_flagopt(opts, "export", NULL, 0)) {
232 /* Process export requests in vfs_mount.c. */
233 return (error);
234 }
235 }
236
237 /*
238 * Not an update, or updating the name: look up the name
239 * and verify that it refers to a sensible disk device.
240 */
241 if (fspec == NULL)
242 return (EINVAL);
243 NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
244 if ((error = namei(ndp)) != 0)
245 return (error);
246 NDFREE(ndp, NDF_ONLY_PNBUF);
247 devvp = ndp->ni_vp;
248
249 if (!vn_isdisk_error(devvp, &error)) {
250 vput(devvp);
251 return (error);
252 }
253
254 /*
255 * If mount by non-root, then verify that user has necessary
256 * permissions on the device.
257 *
258 * XXXRW: VOP_ACCESS() enough?
259 */
260 accmode = VREAD;
261 if ((mp->mnt_flag & MNT_RDONLY) == 0)
262 accmode |= VWRITE;
263 error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
264 if (error)
265 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
266 if (error) {
267 vput(devvp);
268 return (error);
269 }
270
271 if ((mp->mnt_flag & MNT_UPDATE) == 0) {
272 error = ext2_mountfs(devvp, mp);
273 } else {
274 if (devvp != ump->um_devvp) {
275 vput(devvp);
276 return (EINVAL); /* needs translation */
277 } else
278 vput(devvp);
279 }
280 if (error) {
281 vrele(devvp);
282 return (error);
283 }
284 ump = VFSTOEXT2(mp);
285 fs = ump->um_e2fs;
286
287 /*
288 * Note that this strncpy() is ok because of a check at the start
289 * of ext2_mount().
290 */
291 strncpy(fs->e2fs_fsmnt, path, MAXMNTLEN);
292 fs->e2fs_fsmnt[MAXMNTLEN - 1] = '\0';
293 vfs_mountedfrom(mp, fspec);
294 return (0);
295 }
296
297 static int
298 ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly)
299 {
300 uint32_t i, mask;
301
302 if (le16toh(es->e2fs_magic) != E2FS_MAGIC) {
303 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n",
304 devtoname(dev), le16toh(es->e2fs_magic), E2FS_MAGIC);
305 return (1);
306 }
307 if (le32toh(es->e2fs_rev) > E2FS_REV0) {
308 mask = le32toh(es->e2fs_features_incompat) & ~(EXT2F_INCOMPAT_SUPP);
309 if (mask) {
310 printf("WARNING: mount of %s denied due to "
311 "unsupported optional features:\n", devtoname(dev));
312 for (i = 0;
313 i < sizeof(incompat)/sizeof(struct ext2_feature);
314 i++)
315 if (mask & incompat[i].mask)
316 printf("%s ", incompat[i].name);
317 printf("\n");
318 return (1);
319 }
320 mask = le32toh(es->e2fs_features_rocompat) & ~EXT2F_ROCOMPAT_SUPP;
321 if (!ronly && mask) {
322 printf("WARNING: R/W mount of %s denied due to "
323 "unsupported optional features:\n", devtoname(dev));
324 for (i = 0;
325 i < sizeof(ro_compat)/sizeof(struct ext2_feature);
326 i++)
327 if (mask & ro_compat[i].mask)
328 printf("%s ", ro_compat[i].name);
329 printf("\n");
330 return (1);
331 }
332 }
333 return (0);
334 }
335
336 static e4fs_daddr_t
337 ext2_cg_location(struct m_ext2fs *fs, int number)
338 {
339 int cg, descpb, logical_sb, has_super = 0;
340
341 /*
342 * Adjust logical superblock block number.
343 * Godmar thinks: if the blocksize is greater than 1024, then
344 * the superblock is logically part of block zero.
345 */
346 logical_sb = fs->e2fs_bsize > SBSIZE ? 0 : 1;
347
348 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) ||
349 number < le32toh(fs->e2fs->e3fs_first_meta_bg))
350 return (logical_sb + number + 1);
351
352 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT))
353 descpb = fs->e2fs_bsize / sizeof(struct ext2_gd);
354 else
355 descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE;
356
357 cg = descpb * number;
358
359 if (ext2_cg_has_sb(fs, cg))
360 has_super = 1;
361
362 return (has_super + cg * (e4fs_daddr_t)EXT2_BLOCKS_PER_GROUP(fs) +
363 le32toh(fs->e2fs->e2fs_first_dblock));
364 }
365
366 static int
367 ext2_cg_validate(struct m_ext2fs *fs)
368 {
369 uint64_t b_bitmap;
370 uint64_t i_bitmap;
371 uint64_t i_tables;
372 uint64_t first_block, last_block, last_cg_block;
373 struct ext2_gd *gd;
374 unsigned int i, cg_count;
375
376 first_block = le32toh(fs->e2fs->e2fs_first_dblock);
377 last_cg_block = ext2_cg_number_gdb(fs, 0);
378 cg_count = fs->e2fs_gcount;
379
380 for (i = 0; i < fs->e2fs_gcount; i++) {
381 gd = &fs->e2fs_gd[i];
382
383 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) ||
384 i == fs->e2fs_gcount - 1) {
385 last_block = fs->e2fs_bcount - 1;
386 } else {
387 last_block = first_block +
388 (EXT2_BLOCKS_PER_GROUP(fs) - 1);
389 }
390
391 if ((cg_count == fs->e2fs_gcount) &&
392 !(le16toh(gd->ext4bgd_flags) & EXT2_BG_INODE_ZEROED))
393 cg_count = i;
394
395 b_bitmap = e2fs_gd_get_b_bitmap(gd);
396 if (b_bitmap == 0) {
397 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
398 "block bitmap is zero", i);
399 return (EINVAL);
400 }
401 if (b_bitmap <= last_cg_block) {
402 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
403 "block bitmap overlaps gds", i);
404 return (EINVAL);
405 }
406 if (b_bitmap < first_block || b_bitmap > last_block) {
407 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
408 "block bitmap not in group", i);
409 return (EINVAL);
410 }
411
412 i_bitmap = e2fs_gd_get_i_bitmap(gd);
413 if (i_bitmap == 0) {
414 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
415 "inode bitmap is zero", i);
416 return (EINVAL);
417 }
418 if (i_bitmap <= last_cg_block) {
419 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
420 "inode bitmap overlaps gds", i);
421 return (EINVAL);
422 }
423 if (i_bitmap < first_block || i_bitmap > last_block) {
424 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
425 "inode bitmap not in group blk", i);
426 return (EINVAL);
427 }
428
429 i_tables = e2fs_gd_get_i_tables(gd);
430 if (i_tables == 0) {
431 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
432 "inode table is zero", i);
433 return (EINVAL);
434 }
435 if (i_tables <= last_cg_block) {
436 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
437 "inode tables overlaps gds", i);
438 return (EINVAL);
439 }
440 if (i_tables < first_block ||
441 i_tables + fs->e2fs_itpg - 1 > last_block) {
442 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
443 "inode tables not in group blk", i);
444 return (EINVAL);
445 }
446
447 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG))
448 first_block += EXT2_BLOCKS_PER_GROUP(fs);
449 }
450
451 return (0);
452 }
453
454 /*
455 * This computes the fields of the m_ext2fs structure from the
456 * data in the ext2fs structure read in.
457 */
458 static int
459 ext2_compute_sb_data(struct vnode *devvp, struct ext2fs *es,
460 struct m_ext2fs *fs)
461 {
462 struct buf *bp;
463 uint32_t e2fs_descpb, e2fs_gdbcount_alloc;
464 int i, j;
465 int g_count = 0;
466 int error;
467
468 /* Check if first dblock is valid */
469 if (fs->e2fs->e2fs_bcount >= 1024 && fs->e2fs->e2fs_first_dblock) {
470 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
471 "first dblock is invalid");
472 return (EINVAL);
473 }
474
475 /* Check checksum features */
476 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) &&
477 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
478 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
479 "incorrect checksum features combination");
480 return (EINVAL);
481 }
482
483 /* Precompute checksum seed for all metadata */
484 ext2_sb_csum_set_seed(fs);
485
486 /* Verify sb csum if possible */
487 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
488 error = ext2_sb_csum_verify(fs);
489 if (error) {
490 return (error);
491 }
492 }
493
494 /* Check for block size = 1K|2K|4K */
495 if (le32toh(es->e2fs_log_bsize) > 2) {
496 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
497 "bad block size");
498 return (EINVAL);
499 }
500
501 fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + le32toh(es->e2fs_log_bsize);
502 fs->e2fs_bsize = 1U << fs->e2fs_bshift;
503 fs->e2fs_fsbtodb = le32toh(es->e2fs_log_bsize) + 1;
504 fs->e2fs_qbmask = fs->e2fs_bsize - 1;
505
506 /* Check for fragment size */
507 if (le32toh(es->e2fs_log_fsize) >
508 (EXT2_MAX_FRAG_LOG_SIZE - EXT2_MIN_BLOCK_LOG_SIZE)) {
509 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
510 "invalid log cluster size");
511 return (EINVAL);
512 }
513
514 fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << le32toh(es->e2fs_log_fsize);
515 if (fs->e2fs_fsize != fs->e2fs_bsize) {
516 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
517 "fragment size != block size");
518 return (EINVAL);
519 }
520
521 fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize;
522
523 /* Check reserved gdt blocks for future filesystem expansion */
524 if (le16toh(es->e2fs_reserved_ngdb) > (fs->e2fs_bsize / 4)) {
525 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
526 "number of reserved GDT blocks too large");
527 return (EINVAL);
528 }
529
530 if (le32toh(es->e2fs_rev) == E2FS_REV0) {
531 fs->e2fs_isize = E2FS_REV0_INODE_SIZE;
532 } else {
533 fs->e2fs_isize = le16toh(es->e2fs_inode_size);
534
535 /*
536 * Check first ino.
537 */
538 if (le32toh(es->e2fs_first_ino) < EXT2_FIRSTINO) {
539 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
540 "invalid first ino");
541 return (EINVAL);
542 }
543
544 /*
545 * Simple sanity check for superblock inode size value.
546 */
547 if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE ||
548 EXT2_INODE_SIZE(fs) > fs->e2fs_bsize ||
549 (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) {
550 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
551 "invalid inode size");
552 return (EINVAL);
553 }
554 }
555
556 /* Check group descriptors */
557 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT) &&
558 le16toh(es->e3fs_desc_size) != E2FS_64BIT_GD_SIZE) {
559 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
560 "unsupported 64bit descriptor size");
561 return (EINVAL);
562 }
563
564 fs->e2fs_bpg = le32toh(es->e2fs_bpg);
565 fs->e2fs_fpg = le32toh(es->e2fs_fpg);
566 if (fs->e2fs_bpg == 0 || fs->e2fs_fpg == 0) {
567 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
568 "zero blocks/fragments per group");
569 return (EINVAL);
570 } else if (fs->e2fs_bpg != fs->e2fs_fpg) {
571 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
572 "blocks per group not equal fragments per group");
573 return (EINVAL);
574 }
575
576 if (fs->e2fs_bpg != fs->e2fs_bsize * 8) {
577 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
578 "non-standard group size unsupported");
579 return (EINVAL);
580 }
581
582 fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs);
583 if (fs->e2fs_ipb == 0 ||
584 fs->e2fs_ipb > fs->e2fs_bsize / E2FS_REV0_INODE_SIZE) {
585 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
586 "bad inodes per block size");
587 return (EINVAL);
588 }
589
590 fs->e2fs_ipg = le32toh(es->e2fs_ipg);
591 if (fs->e2fs_ipg < fs->e2fs_ipb || fs->e2fs_ipg > fs->e2fs_bsize * 8) {
592 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
593 "invalid inodes per group");
594 return (EINVAL);
595 }
596
597 fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb;
598
599 fs->e2fs_bcount = le32toh(es->e2fs_bcount);
600 fs->e2fs_rbcount = le32toh(es->e2fs_rbcount);
601 fs->e2fs_fbcount = le32toh(es->e2fs_fbcount);
602 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
603 fs->e2fs_bcount |= (uint64_t)(le32toh(es->e4fs_bcount_hi)) << 32;
604 fs->e2fs_rbcount |= (uint64_t)(le32toh(es->e4fs_rbcount_hi)) << 32;
605 fs->e2fs_fbcount |= (uint64_t)(le32toh(es->e4fs_fbcount_hi)) << 32;
606 }
607 if (fs->e2fs_rbcount > fs->e2fs_bcount ||
608 fs->e2fs_fbcount > fs->e2fs_bcount) {
609 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
610 "invalid block count");
611 return (EINVAL);
612 }
613
614 fs->e2fs_ficount = le32toh(es->e2fs_ficount);
615 if (fs->e2fs_ficount > le32toh(es->e2fs_icount)) {
616 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
617 "invalid number of free inodes");
618 return (EINVAL);
619 }
620
621 if (le32toh(es->e2fs_first_dblock) != (fs->e2fs_bsize > 1024 ? 0 : 1) ||
622 le32toh(es->e2fs_first_dblock) >= fs->e2fs_bcount) {
623 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
624 "first data block out of range");
625 return (EINVAL);
626 }
627
628 fs->e2fs_gcount = howmany(fs->e2fs_bcount -
629 le32toh(es->e2fs_first_dblock), EXT2_BLOCKS_PER_GROUP(fs));
630 if (fs->e2fs_gcount > ((uint64_t)1 << 32) - EXT2_DESCS_PER_BLOCK(fs)) {
631 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
632 "groups count too large");
633 return (EINVAL);
634 }
635
636 /* Check for extra isize in big inodes. */
637 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) &&
638 EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) {
639 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
640 "no space for extra inode timestamps");
641 return (EINVAL);
642 }
643
644 /* s_resuid / s_resgid ? */
645
646 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
647 e2fs_descpb = fs->e2fs_bsize / E2FS_64BIT_GD_SIZE;
648 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, e2fs_descpb);
649 } else {
650 e2fs_descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE;
651 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount,
652 fs->e2fs_bsize / sizeof(struct ext2_gd));
653 }
654 fs->e2fs_gdbcount = howmany(fs->e2fs_gcount, e2fs_descpb);
655 fs->e2fs_gd = malloc(e2fs_gdbcount_alloc * fs->e2fs_bsize,
656 M_EXT2MNT, M_WAITOK | M_ZERO);
657 fs->e2fs_contigdirs = malloc(fs->e2fs_gcount *
658 sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO);
659
660 for (i = 0; i < fs->e2fs_gdbcount; i++) {
661 error = bread(devvp,
662 fsbtodb(fs, ext2_cg_location(fs, i)),
663 fs->e2fs_bsize, NOCRED, &bp);
664 if (error) {
665 /*
666 * fs->e2fs_gd and fs->e2fs_contigdirs
667 * will be freed later by the caller,
668 * because this function could be called from
669 * MNT_UPDATE path.
670 */
671 return (error);
672 }
673 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
674 memcpy(&fs->e2fs_gd[
675 i * fs->e2fs_bsize / sizeof(struct ext2_gd)],
676 bp->b_data, fs->e2fs_bsize);
677 } else {
678 for (j = 0; j < e2fs_descpb &&
679 g_count < fs->e2fs_gcount; j++, g_count++)
680 memcpy(&fs->e2fs_gd[g_count],
681 bp->b_data + j * E2FS_REV0_GD_SIZE,
682 E2FS_REV0_GD_SIZE);
683 }
684 brelse(bp);
685 bp = NULL;
686 }
687
688 /* Validate cgs consistency */
689 error = ext2_cg_validate(fs);
690 if (error)
691 return (error);
692
693 /* Verfy cgs csum */
694 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) ||
695 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
696 error = ext2_gd_csum_verify(fs, devvp->v_rdev);
697 if (error)
698 return (error);
699 }
700 /* Initialization for the ext2 Orlov allocator variant. */
701 fs->e2fs_total_dir = 0;
702 for (i = 0; i < fs->e2fs_gcount; i++)
703 fs->e2fs_total_dir += e2fs_gd_get_ndirs(&fs->e2fs_gd[i]);
704
705 if (le32toh(es->e2fs_rev) == E2FS_REV0 ||
706 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE))
707 fs->e2fs_maxfilesize = 0x7fffffff;
708 else {
709 fs->e2fs_maxfilesize = 0xffffffffffff;
710 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE))
711 fs->e2fs_maxfilesize = 0x7fffffffffffffff;
712 }
713 if (le32toh(es->e4fs_flags) & E2FS_UNSIGNED_HASH) {
714 fs->e2fs_uhash = 3;
715 } else if ((le32toh(es->e4fs_flags) & E2FS_SIGNED_HASH) == 0) {
716 #ifdef __CHAR_UNSIGNED__
717 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_UNSIGNED_HASH);
718 fs->e2fs_uhash = 3;
719 #else
720 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_SIGNED_HASH);
721 #endif
722 }
723 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM))
724 error = ext2_sb_csum_verify(fs);
725
726 return (error);
727 }
728
729 /*
730 * Reload all incore data for a filesystem (used after running fsck on
731 * the root filesystem and finding things to fix). The filesystem must
732 * be mounted read-only.
733 *
734 * Things to do to update the mount:
735 * 1) invalidate all cached meta-data.
736 * 2) re-read superblock from disk.
737 * 3) invalidate all cluster summary information.
738 * 4) invalidate all inactive vnodes.
739 * 5) invalidate all cached file data.
740 * 6) re-read inode data for all active vnodes.
741 * XXX we are missing some steps, in particular # 3, this has to be reviewed.
742 */
743 static int
744 ext2_reload(struct mount *mp, struct thread *td)
745 {
746 struct vnode *vp, *mvp, *devvp;
747 struct inode *ip;
748 struct buf *bp;
749 struct ext2fs *es;
750 struct m_ext2fs *fs;
751 struct csum *sump;
752 int error, i;
753 int32_t *lp;
754
755 if ((mp->mnt_flag & MNT_RDONLY) == 0)
756 return (EINVAL);
757 /*
758 * Step 1: invalidate all cached meta-data.
759 */
760 devvp = VFSTOEXT2(mp)->um_devvp;
761 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
762 if (vinvalbuf(devvp, 0, 0, 0) != 0)
763 panic("ext2_reload: dirty1");
764 VOP_UNLOCK(devvp);
765
766 /*
767 * Step 2: re-read superblock from disk.
768 * constants have been adjusted for ext2
769 */
770 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
771 return (error);
772 es = (struct ext2fs *)bp->b_data;
773 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) {
774 brelse(bp);
775 return (EIO); /* XXX needs translation */
776 }
777 fs = VFSTOEXT2(mp)->um_e2fs;
778 bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs));
779
780 if ((error = ext2_compute_sb_data(devvp, es, fs)) != 0) {
781 brelse(bp);
782 return (error);
783 }
784 #ifdef UNKLAR
785 if (fs->fs_sbsize < SBSIZE)
786 bp->b_flags |= B_INVAL;
787 #endif
788 brelse(bp);
789
790 /*
791 * Step 3: invalidate all cluster summary information.
792 */
793 if (fs->e2fs_contigsumsize > 0) {
794 lp = fs->e2fs_maxcluster;
795 sump = fs->e2fs_clustersum;
796 for (i = 0; i < fs->e2fs_gcount; i++, sump++) {
797 *lp++ = fs->e2fs_contigsumsize;
798 sump->cs_init = 0;
799 bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1);
800 }
801 }
802
803 loop:
804 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
805 /*
806 * Step 4: invalidate all cached file data.
807 */
808 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
809 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
810 goto loop;
811 }
812 if (vinvalbuf(vp, 0, 0, 0))
813 panic("ext2_reload: dirty2");
814
815 /*
816 * Step 5: re-read inode data for all active vnodes.
817 */
818 ip = VTOI(vp);
819 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
820 (int)fs->e2fs_bsize, NOCRED, &bp);
821 if (error) {
822 VOP_UNLOCK(vp);
823 vrele(vp);
824 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
825 return (error);
826 }
827
828 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data +
829 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)), ip);
830
831 brelse(bp);
832 VOP_UNLOCK(vp);
833 vrele(vp);
834
835 if (error) {
836 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
837 return (error);
838 }
839 }
840 return (0);
841 }
842
843 /*
844 * Common code for mount and mountroot.
845 */
846 static int
847 ext2_mountfs(struct vnode *devvp, struct mount *mp)
848 {
849 struct ext2mount *ump;
850 struct buf *bp;
851 struct m_ext2fs *fs;
852 struct ext2fs *es;
853 struct cdev *dev = devvp->v_rdev;
854 struct g_consumer *cp;
855 struct bufobj *bo;
856 struct csum *sump;
857 int error;
858 int ronly;
859 int i;
860 u_long size;
861 int32_t *lp;
862 int32_t e2fs_maxcontig;
863
864 ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0);
865 /* XXX: use VOP_ACESS to check FS perms */
866 g_topology_lock();
867 error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1);
868 g_topology_unlock();
869 VOP_UNLOCK(devvp);
870 if (error)
871 return (error);
872
873 /* XXX: should we check for some sectorsize or 512 instead? */
874 if (((SBSIZE % cp->provider->sectorsize) != 0) ||
875 (SBSIZE < cp->provider->sectorsize)) {
876 g_topology_lock();
877 g_vfs_close(cp);
878 g_topology_unlock();
879 return (EINVAL);
880 }
881
882 bo = &devvp->v_bufobj;
883 bo->bo_private = cp;
884 bo->bo_ops = g_vfs_bufops;
885 if (devvp->v_rdev->si_iosize_max != 0)
886 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
887 if (mp->mnt_iosize_max > maxphys)
888 mp->mnt_iosize_max = maxphys;
889
890 bp = NULL;
891 ump = NULL;
892 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
893 goto out;
894 es = (struct ext2fs *)bp->b_data;
895 if (ext2_check_sb_compat(es, dev, ronly) != 0) {
896 error = EINVAL; /* XXX needs translation */
897 goto out;
898 }
899 if ((le16toh(es->e2fs_state) & E2FS_ISCLEAN) == 0 ||
900 (le16toh(es->e2fs_state) & E2FS_ERRORS)) {
901 if (ronly || (mp->mnt_flag & MNT_FORCE)) {
902 printf(
903 "WARNING: Filesystem was not properly dismounted\n");
904 } else {
905 printf(
906 "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n");
907 error = EPERM;
908 goto out;
909 }
910 }
911 ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO);
912
913 /*
914 * I don't know whether this is the right strategy. Note that
915 * we dynamically allocate both an m_ext2fs and an ext2fs
916 * while Linux keeps the super block in a locked buffer.
917 */
918 ump->um_e2fs = malloc(sizeof(struct m_ext2fs),
919 M_EXT2MNT, M_WAITOK | M_ZERO);
920 ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs),
921 M_EXT2MNT, M_WAITOK);
922 mtx_init(EXT2_MTX(ump), "EXT2FS", "EXT2FS Lock", MTX_DEF);
923 bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs));
924 if ((error = ext2_compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs)))
925 goto out;
926
927 /*
928 * Calculate the maximum contiguous blocks and size of cluster summary
929 * array. In FFS this is done by newfs; however, the superblock
930 * in ext2fs doesn't have these variables, so we can calculate
931 * them here.
932 */
933 e2fs_maxcontig = MAX(1, maxphys / ump->um_e2fs->e2fs_bsize);
934 ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG);
935 ump->um_e2fs->e2fs_maxsymlinklen = EXT2_MAXSYMLINKLEN;
936 if (ump->um_e2fs->e2fs_contigsumsize > 0) {
937 size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t);
938 ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK);
939 size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum);
940 ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK);
941 lp = ump->um_e2fs->e2fs_maxcluster;
942 sump = ump->um_e2fs->e2fs_clustersum;
943 for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) {
944 *lp++ = ump->um_e2fs->e2fs_contigsumsize;
945 sump->cs_init = 0;
946 sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) *
947 sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO);
948 }
949 }
950
951 brelse(bp);
952 bp = NULL;
953 fs = ump->um_e2fs;
954 fs->e2fs_ronly = ronly; /* ronly is set according to mnt_flags */
955
956 /*
957 * If the fs is not mounted read-only, make sure the super block is
958 * always written back on a sync().
959 */
960 fs->e2fs_wasvalid = le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN ? 1 : 0;
961 if (ronly == 0) {
962 fs->e2fs_fmod = 1; /* mark it modified and set fs invalid */
963 fs->e2fs->e2fs_state =
964 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN);
965 }
966 mp->mnt_data = ump;
967 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev);
968 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
969 MNT_ILOCK(mp);
970 mp->mnt_flag |= MNT_LOCAL;
971 MNT_IUNLOCK(mp);
972 ump->um_mountp = mp;
973 ump->um_dev = dev;
974 ump->um_devvp = devvp;
975 ump->um_bo = &devvp->v_bufobj;
976 ump->um_cp = cp;
977
978 /*
979 * Setting those two parameters allowed us to use
980 * ufs_bmap w/o changse!
981 */
982 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs);
983 ump->um_bptrtodb = le32toh(fs->e2fs->e2fs_log_bsize) + 1;
984 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs);
985 if (ronly == 0)
986 ext2_sbupdate(ump, MNT_WAIT);
987 /*
988 * Initialize filesystem stat information in mount struct.
989 */
990 MNT_ILOCK(mp);
991 mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED |
992 MNTK_USES_BCACHE;
993 MNT_IUNLOCK(mp);
994 return (0);
995 out:
996 if (bp)
997 brelse(bp);
998 if (cp != NULL) {
999 g_topology_lock();
1000 g_vfs_close(cp);
1001 g_topology_unlock();
1002 }
1003 if (ump) {
1004 mtx_destroy(EXT2_MTX(ump));
1005 free(ump->um_e2fs->e2fs_gd, M_EXT2MNT);
1006 free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT);
1007 free(ump->um_e2fs->e2fs, M_EXT2MNT);
1008 free(ump->um_e2fs, M_EXT2MNT);
1009 free(ump, M_EXT2MNT);
1010 mp->mnt_data = NULL;
1011 }
1012 return (error);
1013 }
1014
1015 /*
1016 * Unmount system call.
1017 */
1018 static int
1019 ext2_unmount(struct mount *mp, int mntflags)
1020 {
1021 struct ext2mount *ump;
1022 struct m_ext2fs *fs;
1023 struct csum *sump;
1024 int error, flags, i, ronly;
1025
1026 flags = 0;
1027 if (mntflags & MNT_FORCE) {
1028 if (mp->mnt_flag & MNT_ROOTFS)
1029 return (EINVAL);
1030 flags |= FORCECLOSE;
1031 }
1032 if ((error = ext2_flushfiles(mp, flags, curthread)) != 0)
1033 return (error);
1034 ump = VFSTOEXT2(mp);
1035 fs = ump->um_e2fs;
1036 ronly = fs->e2fs_ronly;
1037 if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) {
1038 if (fs->e2fs_wasvalid)
1039 fs->e2fs->e2fs_state =
1040 htole16(le16toh(fs->e2fs->e2fs_state) | E2FS_ISCLEAN);
1041 ext2_sbupdate(ump, MNT_WAIT);
1042 }
1043
1044 g_topology_lock();
1045 g_vfs_close(ump->um_cp);
1046 g_topology_unlock();
1047 vrele(ump->um_devvp);
1048 sump = fs->e2fs_clustersum;
1049 for (i = 0; i < fs->e2fs_gcount; i++, sump++)
1050 free(sump->cs_sum, M_EXT2MNT);
1051 free(fs->e2fs_clustersum, M_EXT2MNT);
1052 free(fs->e2fs_maxcluster, M_EXT2MNT);
1053 free(fs->e2fs_gd, M_EXT2MNT);
1054 free(fs->e2fs_contigdirs, M_EXT2MNT);
1055 free(fs->e2fs, M_EXT2MNT);
1056 free(fs, M_EXT2MNT);
1057 free(ump, M_EXT2MNT);
1058 mp->mnt_data = NULL;
1059 return (error);
1060 }
1061
1062 /*
1063 * Flush out all the files in a filesystem.
1064 */
1065 static int
1066 ext2_flushfiles(struct mount *mp, int flags, struct thread *td)
1067 {
1068 int error;
1069
1070 error = vflush(mp, 0, flags, td);
1071 return (error);
1072 }
1073
1074 /*
1075 * Get filesystem statistics.
1076 */
1077 int
1078 ext2_statfs(struct mount *mp, struct statfs *sbp)
1079 {
1080 struct ext2mount *ump;
1081 struct m_ext2fs *fs;
1082 uint32_t overhead, overhead_per_group, ngdb;
1083 int i, ngroups;
1084
1085 ump = VFSTOEXT2(mp);
1086 fs = ump->um_e2fs;
1087 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC)
1088 panic("ext2_statfs");
1089
1090 /*
1091 * Compute the overhead (FS structures)
1092 */
1093 overhead_per_group =
1094 1 /* block bitmap */ +
1095 1 /* inode bitmap */ +
1096 fs->e2fs_itpg;
1097 overhead = le32toh(fs->e2fs->e2fs_first_dblock) +
1098 fs->e2fs_gcount * overhead_per_group;
1099 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 &&
1100 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) {
1101 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) {
1102 if (ext2_cg_has_sb(fs, i))
1103 ngroups++;
1104 }
1105 } else {
1106 ngroups = fs->e2fs_gcount;
1107 }
1108 ngdb = fs->e2fs_gdbcount;
1109 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 &&
1110 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE)
1111 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb);
1112 overhead += ngroups * (1 /* superblock */ + ngdb);
1113
1114 sbp->f_bsize = EXT2_FRAG_SIZE(fs);
1115 sbp->f_iosize = EXT2_BLOCK_SIZE(fs);
1116 sbp->f_blocks = fs->e2fs_bcount - overhead;
1117 sbp->f_bfree = fs->e2fs_fbcount;
1118 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount;
1119 sbp->f_files = le32toh(fs->e2fs->e2fs_icount);
1120 sbp->f_ffree = fs->e2fs_ficount;
1121 return (0);
1122 }
1123
1124 /*
1125 * Go through the disk queues to initiate sandbagged IO;
1126 * go through the inodes to write those that have been modified;
1127 * initiate the writing of the super block if it has been modified.
1128 *
1129 * Note: we are always called with the filesystem marked `MPBUSY'.
1130 */
1131 static int
1132 ext2_sync(struct mount *mp, int waitfor)
1133 {
1134 struct vnode *mvp, *vp;
1135 struct thread *td;
1136 struct inode *ip;
1137 struct ext2mount *ump = VFSTOEXT2(mp);
1138 struct m_ext2fs *fs;
1139 int error, allerror = 0;
1140
1141 td = curthread;
1142 fs = ump->um_e2fs;
1143 if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */
1144 panic("ext2_sync: rofs mod fs=%s", fs->e2fs_fsmnt);
1145 }
1146
1147 /*
1148 * Write back each (modified) inode.
1149 */
1150 loop:
1151 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1152 if (vp->v_type == VNON) {
1153 VI_UNLOCK(vp);
1154 continue;
1155 }
1156 ip = VTOI(vp);
1157 if ((ip->i_flag &
1158 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1159 (vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1160 waitfor == MNT_LAZY)) {
1161 VI_UNLOCK(vp);
1162 continue;
1163 }
1164 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
1165 if (error) {
1166 if (error == ENOENT) {
1167 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1168 goto loop;
1169 }
1170 continue;
1171 }
1172 if ((error = VOP_FSYNC(vp, waitfor, td)) != 0)
1173 allerror = error;
1174 VOP_UNLOCK(vp);
1175 vrele(vp);
1176 }
1177
1178 /*
1179 * Force stale filesystem control information to be flushed.
1180 */
1181 if (waitfor != MNT_LAZY) {
1182 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1183 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0)
1184 allerror = error;
1185 VOP_UNLOCK(ump->um_devvp);
1186 }
1187
1188 /*
1189 * Write back modified superblock.
1190 */
1191 if (fs->e2fs_fmod != 0) {
1192 fs->e2fs_fmod = 0;
1193 fs->e2fs->e2fs_wtime = htole32(time_second);
1194 if ((error = ext2_cgupdate(ump, waitfor)) != 0)
1195 allerror = error;
1196 }
1197 return (allerror);
1198 }
1199
1200 /*
1201 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it
1202 * in from disk. If it is in core, wait for the lock bit to clear, then
1203 * return the inode locked. Detection and handling of mount points must be
1204 * done by the calling routine.
1205 */
1206 static int
1207 ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
1208 {
1209 struct m_ext2fs *fs;
1210 struct inode *ip;
1211 struct ext2mount *ump;
1212 struct buf *bp;
1213 struct vnode *vp;
1214 struct thread *td;
1215 unsigned int i, used_blocks;
1216 int error;
1217
1218 td = curthread;
1219 error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL);
1220 if (error || *vpp != NULL)
1221 return (error);
1222
1223 ump = VFSTOEXT2(mp);
1224 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO);
1225
1226 /* Allocate a new vnode/inode. */
1227 if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) {
1228 *vpp = NULL;
1229 free(ip, M_EXT2NODE);
1230 return (error);
1231 }
1232 vp->v_data = ip;
1233 ip->i_vnode = vp;
1234 ip->i_e2fs = fs = ump->um_e2fs;
1235 ip->i_ump = ump;
1236 ip->i_number = ino;
1237
1238 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
1239 error = insmntque(vp, mp);
1240 if (error != 0) {
1241 free(ip, M_EXT2NODE);
1242 *vpp = NULL;
1243 return (error);
1244 }
1245 error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL);
1246 if (error || *vpp != NULL)
1247 return (error);
1248
1249 /* Read in the disk contents for the inode, copy into the inode. */
1250 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1251 (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) {
1252 /*
1253 * The inode does not contain anything useful, so it would
1254 * be misleading to leave it on its hash chain. With mode
1255 * still zero, it will be unlinked and returned to the free
1256 * list by vput().
1257 */
1258 brelse(bp);
1259 vput(vp);
1260 *vpp = NULL;
1261 return (error);
1262 }
1263 /* convert ext2 inode to dinode */
1264 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data +
1265 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ino)), ip);
1266 if (error) {
1267 brelse(bp);
1268 vput(vp);
1269 *vpp = NULL;
1270 return (error);
1271 }
1272 ip->i_block_group = ino_to_cg(fs, ino);
1273 ip->i_next_alloc_block = 0;
1274 ip->i_next_alloc_goal = 0;
1275
1276 /*
1277 * Now we want to make sure that block pointers for unused
1278 * blocks are zeroed out - ext2_balloc depends on this
1279 * although for regular files and directories only
1280 *
1281 * If IN_E4EXTENTS is enabled, unused blocks are not zeroed
1282 * out because we could corrupt the extent tree.
1283 */
1284 if (!(ip->i_flag & IN_E4EXTENTS) &&
1285 (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) {
1286 used_blocks = howmany(ip->i_size, fs->e2fs_bsize);
1287 for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++)
1288 ip->i_db[i] = 0;
1289 }
1290 #ifdef EXT2FS_PRINT_EXTENTS
1291 ext2_print_inode(ip);
1292 ext4_ext_print_extent_tree_status(ip);
1293 #endif
1294 bqrelse(bp);
1295
1296 /*
1297 * Initialize the vnode from the inode, check for aliases.
1298 * Note that the underlying vnode may have changed.
1299 */
1300 if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) {
1301 vput(vp);
1302 *vpp = NULL;
1303 return (error);
1304 }
1305
1306 /*
1307 * Finish inode initialization.
1308 */
1309
1310 *vpp = vp;
1311 return (0);
1312 }
1313
1314 /*
1315 * File handle to vnode
1316 *
1317 * Have to be really careful about stale file handles:
1318 * - check that the inode number is valid
1319 * - call ext2_vget() to get the locked inode
1320 * - check for an unallocated inode (i_mode == 0)
1321 * - check that the given client host has export rights and return
1322 * those rights via. exflagsp and credanonp
1323 */
1324 static int
1325 ext2_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
1326 {
1327 struct inode *ip;
1328 struct ufid *ufhp;
1329 struct vnode *nvp;
1330 struct m_ext2fs *fs;
1331 int error;
1332
1333 ufhp = (struct ufid *)fhp;
1334 fs = VFSTOEXT2(mp)->um_e2fs;
1335 if (ufhp->ufid_ino < EXT2_ROOTINO ||
1336 ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs_ipg)
1337 return (ESTALE);
1338
1339 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp);
1340 if (error) {
1341 *vpp = NULLVP;
1342 return (error);
1343 }
1344 ip = VTOI(nvp);
1345 if (ip->i_mode == 0 ||
1346 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) {
1347 vput(nvp);
1348 *vpp = NULLVP;
1349 return (ESTALE);
1350 }
1351 *vpp = nvp;
1352 vnode_create_vobject(*vpp, 0, curthread);
1353 return (0);
1354 }
1355
1356 /*
1357 * Write a superblock and associated information back to disk.
1358 */
1359 static int
1360 ext2_sbupdate(struct ext2mount *mp, int waitfor)
1361 {
1362 struct m_ext2fs *fs = mp->um_e2fs;
1363 struct ext2fs *es = fs->e2fs;
1364 struct buf *bp;
1365 int error = 0;
1366
1367 es->e2fs_bcount = htole32(fs->e2fs_bcount & 0xffffffff);
1368 es->e2fs_rbcount = htole32(fs->e2fs_rbcount & 0xffffffff);
1369 es->e2fs_fbcount = htole32(fs->e2fs_fbcount & 0xffffffff);
1370 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
1371 es->e4fs_bcount_hi = htole32(fs->e2fs_bcount >> 32);
1372 es->e4fs_rbcount_hi = htole32(fs->e2fs_rbcount >> 32);
1373 es->e4fs_fbcount_hi = htole32(fs->e2fs_fbcount >> 32);
1374 }
1375
1376 es->e2fs_ficount = htole32(fs->e2fs_ficount);
1377
1378 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM))
1379 ext2_sb_csum_set(fs);
1380
1381 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0);
1382 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2fs));
1383 if (waitfor == MNT_WAIT)
1384 error = bwrite(bp);
1385 else
1386 bawrite(bp);
1387
1388 /*
1389 * The buffers for group descriptors, inode bitmaps and block bitmaps
1390 * are not busy at this point and are (hopefully) written by the
1391 * usual sync mechanism. No need to write them here.
1392 */
1393 return (error);
1394 }
1395 int
1396 ext2_cgupdate(struct ext2mount *mp, int waitfor)
1397 {
1398 struct m_ext2fs *fs = mp->um_e2fs;
1399 struct buf *bp;
1400 int i, j, g_count = 0, error = 0, allerror = 0;
1401
1402 allerror = ext2_sbupdate(mp, waitfor);
1403
1404 /* Update gd csums */
1405 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) ||
1406 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM))
1407 ext2_gd_csum_set(fs);
1408
1409 for (i = 0; i < fs->e2fs_gdbcount; i++) {
1410 bp = getblk(mp->um_devvp, fsbtodb(fs,
1411 ext2_cg_location(fs, i)),
1412 fs->e2fs_bsize, 0, 0, 0);
1413 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
1414 memcpy(bp->b_data, &fs->e2fs_gd[
1415 i * fs->e2fs_bsize / sizeof(struct ext2_gd)],
1416 fs->e2fs_bsize);
1417 } else {
1418 for (j = 0; j < fs->e2fs_bsize / E2FS_REV0_GD_SIZE &&
1419 g_count < fs->e2fs_gcount; j++, g_count++)
1420 memcpy(bp->b_data + j * E2FS_REV0_GD_SIZE,
1421 &fs->e2fs_gd[g_count], E2FS_REV0_GD_SIZE);
1422 }
1423 if (waitfor == MNT_WAIT)
1424 error = bwrite(bp);
1425 else
1426 bawrite(bp);
1427 }
1428
1429 if (!allerror && error)
1430 allerror = error;
1431 return (allerror);
1432 }
1433
1434 /*
1435 * Return the root of a filesystem.
1436 */
1437 static int
1438 ext2_root(struct mount *mp, int flags, struct vnode **vpp)
1439 {
1440 struct vnode *nvp;
1441 int error;
1442
1443 error = VFS_VGET(mp, EXT2_ROOTINO, LK_EXCLUSIVE, &nvp);
1444 if (error)
1445 return (error);
1446 *vpp = nvp;
1447 return (0);
1448 }
Cache object: b705bf90511635f36ad7dd384cfcff5e
|