1 /*-
2 * modified for EXT2FS support in Lites 1.1
3 *
4 * Aug 1995, Godmar Back (gback@cs.utah.edu)
5 * University of Utah, Department of Computer Science
6 */
7 /*-
8 * SPDX-License-Identifier: BSD-3-Clause
9 *
10 * Copyright (c) 1989, 1991, 1993, 1994
11 * The Regents of the University of California. All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94
38 * $FreeBSD$
39 */
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/namei.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/kernel.h>
47 #include <sys/vnode.h>
48 #include <sys/mount.h>
49 #include <sys/bio.h>
50 #include <sys/buf.h>
51 #include <sys/conf.h>
52 #include <sys/endian.h>
53 #include <sys/fcntl.h>
54 #include <sys/malloc.h>
55 #include <sys/sdt.h>
56 #include <sys/stat.h>
57 #include <sys/mutex.h>
58
59 #include <geom/geom.h>
60 #include <geom/geom_vfs.h>
61
62 #include <fs/ext2fs/fs.h>
63 #include <fs/ext2fs/ext2_mount.h>
64 #include <fs/ext2fs/inode.h>
65
66 #include <fs/ext2fs/ext2fs.h>
67 #include <fs/ext2fs/ext2_dinode.h>
68 #include <fs/ext2fs/ext2_extern.h>
69 #include <fs/ext2fs/ext2_extents.h>
70
71 SDT_PROVIDER_DECLARE(ext2fs);
72 /*
73 * ext2fs trace probe:
74 * arg0: verbosity. Higher numbers give more verbose messages
75 * arg1: Textual message
76 */
77 SDT_PROBE_DEFINE2(ext2fs, , vfsops, trace, "int", "char*");
78 SDT_PROBE_DEFINE2(ext2fs, , vfsops, ext2_cg_validate_error, "char*", "int");
79 SDT_PROBE_DEFINE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "char*");
80
81 static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td);
82 static int ext2_mountfs(struct vnode *, struct mount *);
83 static int ext2_reload(struct mount *mp, struct thread *td);
84 static int ext2_sbupdate(struct ext2mount *, int);
85 static int ext2_cgupdate(struct ext2mount *, int);
86 static vfs_unmount_t ext2_unmount;
87 static vfs_root_t ext2_root;
88 static vfs_statfs_t ext2_statfs;
89 static vfs_sync_t ext2_sync;
90 static vfs_vget_t ext2_vget;
91 static vfs_fhtovp_t ext2_fhtovp;
92 static vfs_mount_t ext2_mount;
93
94 MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part");
95 static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure");
96
97 static struct vfsops ext2fs_vfsops = {
98 .vfs_fhtovp = ext2_fhtovp,
99 .vfs_mount = ext2_mount,
100 .vfs_root = ext2_root, /* root inode via vget */
101 .vfs_statfs = ext2_statfs,
102 .vfs_sync = ext2_sync,
103 .vfs_unmount = ext2_unmount,
104 .vfs_vget = ext2_vget,
105 };
106
107 VFS_SET(ext2fs_vfsops, ext2fs, 0);
108
109 static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev,
110 int ronly);
111 static int ext2_compute_sb_data(struct vnode * devvp,
112 struct ext2fs * es, struct m_ext2fs * fs);
113
114 static const char *ext2_opts[] = { "acls", "async", "noatime", "noclusterr",
115 "noclusterw", "noexec", "export", "force", "from", "multilabel",
116 "suiddir", "nosymfollow", "sync", "union", NULL };
117
118 /*
119 * VFS Operations.
120 *
121 * mount system call
122 */
123 static int
124 ext2_mount(struct mount *mp)
125 {
126 struct vfsoptlist *opts;
127 struct vnode *devvp;
128 struct thread *td;
129 struct ext2mount *ump = NULL;
130 struct m_ext2fs *fs;
131 struct nameidata nd, *ndp = &nd;
132 accmode_t accmode;
133 char *path, *fspec;
134 int error, flags, len;
135
136 td = curthread;
137 opts = mp->mnt_optnew;
138
139 if (vfs_filteropt(opts, ext2_opts))
140 return (EINVAL);
141
142 vfs_getopt(opts, "fspath", (void **)&path, NULL);
143 /* Double-check the length of path.. */
144 if (strlen(path) >= MAXMNTLEN)
145 return (ENAMETOOLONG);
146
147 fspec = NULL;
148 error = vfs_getopt(opts, "from", (void **)&fspec, &len);
149 if (!error && fspec[len - 1] != '\0')
150 return (EINVAL);
151
152 /*
153 * If updating, check whether changing from read-only to
154 * read/write; if there is no device name, that's all we do.
155 */
156 if (mp->mnt_flag & MNT_UPDATE) {
157 ump = VFSTOEXT2(mp);
158 fs = ump->um_e2fs;
159 error = 0;
160 if (fs->e2fs_ronly == 0 &&
161 vfs_flagopt(opts, "ro", NULL, 0)) {
162 error = VFS_SYNC(mp, MNT_WAIT);
163 if (error)
164 return (error);
165 flags = WRITECLOSE;
166 if (mp->mnt_flag & MNT_FORCE)
167 flags |= FORCECLOSE;
168 error = ext2_flushfiles(mp, flags, td);
169 if (error == 0 && fs->e2fs_wasvalid &&
170 ext2_cgupdate(ump, MNT_WAIT) == 0) {
171 fs->e2fs->e2fs_state =
172 htole16((le16toh(fs->e2fs->e2fs_state) |
173 E2FS_ISCLEAN));
174 ext2_sbupdate(ump, MNT_WAIT);
175 }
176 fs->e2fs_ronly = 1;
177 vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY);
178 g_topology_lock();
179 g_access(ump->um_cp, 0, -1, 0);
180 g_topology_unlock();
181 }
182 if (!error && (mp->mnt_flag & MNT_RELOAD))
183 error = ext2_reload(mp, td);
184 if (error)
185 return (error);
186 devvp = ump->um_devvp;
187 if (fs->e2fs_ronly && !vfs_flagopt(opts, "ro", NULL, 0)) {
188 if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0))
189 return (EPERM);
190
191 /*
192 * If upgrade to read-write by non-root, then verify
193 * that user has necessary permissions on the device.
194 */
195 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
196 error = VOP_ACCESS(devvp, VREAD | VWRITE,
197 td->td_ucred, td);
198 if (error)
199 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
200 if (error) {
201 VOP_UNLOCK(devvp);
202 return (error);
203 }
204 VOP_UNLOCK(devvp);
205 g_topology_lock();
206 error = g_access(ump->um_cp, 0, 1, 0);
207 g_topology_unlock();
208 if (error)
209 return (error);
210
211 if ((le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN) == 0 ||
212 (le16toh(fs->e2fs->e2fs_state) & E2FS_ERRORS)) {
213 if (mp->mnt_flag & MNT_FORCE) {
214 printf(
215 "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt);
216 } else {
217 printf(
218 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",
219 fs->e2fs_fsmnt);
220 return (EPERM);
221 }
222 }
223 fs->e2fs->e2fs_state =
224 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN);
225 (void)ext2_cgupdate(ump, MNT_WAIT);
226 fs->e2fs_ronly = 0;
227 MNT_ILOCK(mp);
228 mp->mnt_flag &= ~MNT_RDONLY;
229 MNT_IUNLOCK(mp);
230 }
231 if (vfs_flagopt(opts, "export", NULL, 0)) {
232 /* Process export requests in vfs_mount.c. */
233 return (error);
234 }
235 }
236
237 /*
238 * Not an update, or updating the name: look up the name
239 * and verify that it refers to a sensible disk device.
240 */
241 if (fspec == NULL)
242 return (EINVAL);
243 NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec);
244 if ((error = namei(ndp)) != 0)
245 return (error);
246 NDFREE_PNBUF(ndp);
247 devvp = ndp->ni_vp;
248
249 if (!vn_isdisk_error(devvp, &error)) {
250 vput(devvp);
251 return (error);
252 }
253
254 /*
255 * If mount by non-root, then verify that user has necessary
256 * permissions on the device.
257 *
258 * XXXRW: VOP_ACCESS() enough?
259 */
260 accmode = VREAD;
261 if ((mp->mnt_flag & MNT_RDONLY) == 0)
262 accmode |= VWRITE;
263 error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
264 if (error)
265 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
266 if (error) {
267 vput(devvp);
268 return (error);
269 }
270
271 if ((mp->mnt_flag & MNT_UPDATE) == 0) {
272 error = ext2_mountfs(devvp, mp);
273 } else {
274 if (devvp != ump->um_devvp) {
275 vput(devvp);
276 return (EINVAL); /* needs translation */
277 } else
278 vput(devvp);
279 }
280 if (error) {
281 vrele(devvp);
282 return (error);
283 }
284 ump = VFSTOEXT2(mp);
285 fs = ump->um_e2fs;
286
287 /*
288 * Note that this strncpy() is ok because of a check at the start
289 * of ext2_mount().
290 */
291 strncpy(fs->e2fs_fsmnt, path, MAXMNTLEN);
292 fs->e2fs_fsmnt[MAXMNTLEN - 1] = '\0';
293 vfs_mountedfrom(mp, fspec);
294 return (0);
295 }
296
297 static int
298 ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly)
299 {
300 uint32_t i, mask;
301
302 if (le16toh(es->e2fs_magic) != E2FS_MAGIC) {
303 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n",
304 devtoname(dev), le16toh(es->e2fs_magic), E2FS_MAGIC);
305 return (1);
306 }
307 if (le32toh(es->e2fs_rev) > E2FS_REV0) {
308 mask = le32toh(es->e2fs_features_incompat) & ~(EXT2F_INCOMPAT_SUPP);
309 if (mask) {
310 printf("WARNING: mount of %s denied due to "
311 "unsupported optional features:\n", devtoname(dev));
312 for (i = 0;
313 i < sizeof(incompat)/sizeof(struct ext2_feature);
314 i++)
315 if (mask & incompat[i].mask)
316 printf("%s ", incompat[i].name);
317 printf("\n");
318 return (1);
319 }
320 mask = le32toh(es->e2fs_features_rocompat) & ~EXT2F_ROCOMPAT_SUPP;
321 if (!ronly && mask) {
322 printf("WARNING: R/W mount of %s denied due to "
323 "unsupported optional features:\n", devtoname(dev));
324 for (i = 0;
325 i < sizeof(ro_compat)/sizeof(struct ext2_feature);
326 i++)
327 if (mask & ro_compat[i].mask)
328 printf("%s ", ro_compat[i].name);
329 printf("\n");
330 return (1);
331 }
332 }
333 return (0);
334 }
335
336 static e4fs_daddr_t
337 ext2_cg_location(struct m_ext2fs *fs, int number)
338 {
339 int cg, descpb, logical_sb, has_super = 0;
340
341 /*
342 * Adjust logical superblock block number.
343 * Godmar thinks: if the blocksize is greater than 1024, then
344 * the superblock is logically part of block zero.
345 */
346 logical_sb = fs->e2fs_bsize > SBSIZE ? 0 : 1;
347
348 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) ||
349 number < le32toh(fs->e2fs->e3fs_first_meta_bg))
350 return (logical_sb + number + 1);
351
352 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT))
353 descpb = fs->e2fs_bsize / sizeof(struct ext2_gd);
354 else
355 descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE;
356
357 cg = descpb * number;
358
359 if (ext2_cg_has_sb(fs, cg))
360 has_super = 1;
361
362 return (has_super + cg * (e4fs_daddr_t)EXT2_BLOCKS_PER_GROUP(fs) +
363 le32toh(fs->e2fs->e2fs_first_dblock));
364 }
365
366 static int
367 ext2_cg_validate(struct m_ext2fs *fs)
368 {
369 uint64_t b_bitmap;
370 uint64_t i_bitmap;
371 uint64_t i_tables;
372 uint64_t first_block, last_block, last_cg_block;
373 struct ext2_gd *gd;
374 unsigned int i, cg_count;
375
376 first_block = le32toh(fs->e2fs->e2fs_first_dblock);
377 last_cg_block = ext2_cg_number_gdb(fs, 0);
378 cg_count = fs->e2fs_gcount;
379
380 for (i = 0; i < fs->e2fs_gcount; i++) {
381 gd = &fs->e2fs_gd[i];
382
383 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) ||
384 i == fs->e2fs_gcount - 1) {
385 last_block = fs->e2fs_bcount - 1;
386 } else {
387 last_block = first_block +
388 (EXT2_BLOCKS_PER_GROUP(fs) - 1);
389 }
390
391 if ((cg_count == fs->e2fs_gcount) &&
392 !(le16toh(gd->ext4bgd_flags) & EXT2_BG_INODE_ZEROED))
393 cg_count = i;
394
395 b_bitmap = e2fs_gd_get_b_bitmap(gd);
396 if (b_bitmap == 0) {
397 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
398 "block bitmap is zero", i);
399 return (EINVAL);
400 }
401 if (b_bitmap <= last_cg_block) {
402 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
403 "block bitmap overlaps gds", i);
404 return (EINVAL);
405 }
406 if (b_bitmap < first_block || b_bitmap > last_block) {
407 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
408 "block bitmap not in group", i);
409 return (EINVAL);
410 }
411
412 i_bitmap = e2fs_gd_get_i_bitmap(gd);
413 if (i_bitmap == 0) {
414 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
415 "inode bitmap is zero", i);
416 return (EINVAL);
417 }
418 if (i_bitmap <= last_cg_block) {
419 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
420 "inode bitmap overlaps gds", i);
421 return (EINVAL);
422 }
423 if (i_bitmap < first_block || i_bitmap > last_block) {
424 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
425 "inode bitmap not in group blk", i);
426 return (EINVAL);
427 }
428
429 i_tables = e2fs_gd_get_i_tables(gd);
430 if (i_tables == 0) {
431 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
432 "inode table is zero", i);
433 return (EINVAL);
434 }
435 if (i_tables <= last_cg_block) {
436 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
437 "inode tables overlaps gds", i);
438 return (EINVAL);
439 }
440 if (i_tables < first_block ||
441 i_tables + fs->e2fs_itpg - 1 > last_block) {
442 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error,
443 "inode tables not in group blk", i);
444 return (EINVAL);
445 }
446
447 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG))
448 first_block += EXT2_BLOCKS_PER_GROUP(fs);
449 }
450
451 return (0);
452 }
453
454 /*
455 * This computes the fields of the m_ext2fs structure from the
456 * data in the ext2fs structure read in.
457 */
458 static int
459 ext2_compute_sb_data(struct vnode *devvp, struct ext2fs *es,
460 struct m_ext2fs *fs)
461 {
462 struct buf *bp;
463 uint32_t e2fs_descpb, e2fs_gdbcount_alloc;
464 int i, j;
465 int g_count = 0;
466 int error;
467
468 /* Check checksum features */
469 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) &&
470 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
471 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
472 "incorrect checksum features combination");
473 return (EINVAL);
474 }
475
476 /* Precompute checksum seed for all metadata */
477 ext2_sb_csum_set_seed(fs);
478
479 /* Verify sb csum if possible */
480 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
481 error = ext2_sb_csum_verify(fs);
482 if (error) {
483 return (error);
484 }
485 }
486
487 /* Check for block size = 1K|2K|4K */
488 if (le32toh(es->e2fs_log_bsize) > 2) {
489 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
490 "bad block size");
491 return (EINVAL);
492 }
493
494 fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + le32toh(es->e2fs_log_bsize);
495 fs->e2fs_bsize = 1U << fs->e2fs_bshift;
496 fs->e2fs_fsbtodb = le32toh(es->e2fs_log_bsize) + 1;
497 fs->e2fs_qbmask = fs->e2fs_bsize - 1;
498
499 /* Check for fragment size */
500 if (le32toh(es->e2fs_log_fsize) >
501 (EXT2_MAX_FRAG_LOG_SIZE - EXT2_MIN_BLOCK_LOG_SIZE)) {
502 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
503 "invalid log cluster size");
504 return (EINVAL);
505 }
506
507 fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << le32toh(es->e2fs_log_fsize);
508 if (fs->e2fs_fsize != fs->e2fs_bsize) {
509 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
510 "fragment size != block size");
511 return (EINVAL);
512 }
513
514 fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize;
515
516 /* Check reserved gdt blocks for future filesystem expansion */
517 if (le16toh(es->e2fs_reserved_ngdb) > (fs->e2fs_bsize / 4)) {
518 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
519 "number of reserved GDT blocks too large");
520 return (EINVAL);
521 }
522
523 if (le32toh(es->e2fs_rev) == E2FS_REV0) {
524 fs->e2fs_isize = E2FS_REV0_INODE_SIZE;
525 } else {
526 fs->e2fs_isize = le16toh(es->e2fs_inode_size);
527
528 /*
529 * Check first ino.
530 */
531 if (le32toh(es->e2fs_first_ino) < EXT2_FIRSTINO) {
532 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
533 "invalid first ino");
534 return (EINVAL);
535 }
536
537 /*
538 * Simple sanity check for superblock inode size value.
539 */
540 if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE ||
541 EXT2_INODE_SIZE(fs) > fs->e2fs_bsize ||
542 (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) {
543 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
544 "invalid inode size");
545 return (EINVAL);
546 }
547 }
548
549 /* Check group descriptors */
550 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT) &&
551 le16toh(es->e3fs_desc_size) != E2FS_64BIT_GD_SIZE) {
552 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
553 "unsupported 64bit descriptor size");
554 return (EINVAL);
555 }
556
557 fs->e2fs_bpg = le32toh(es->e2fs_bpg);
558 fs->e2fs_fpg = le32toh(es->e2fs_fpg);
559 if (fs->e2fs_bpg == 0 || fs->e2fs_fpg == 0) {
560 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
561 "zero blocks/fragments per group");
562 return (EINVAL);
563 } else if (fs->e2fs_bpg != fs->e2fs_fpg) {
564 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
565 "blocks per group not equal fragments per group");
566 return (EINVAL);
567 }
568
569 if (fs->e2fs_bpg != fs->e2fs_bsize * 8) {
570 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
571 "non-standard group size unsupported");
572 return (EINVAL);
573 }
574
575 fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs);
576 if (fs->e2fs_ipb == 0 ||
577 fs->e2fs_ipb > fs->e2fs_bsize / E2FS_REV0_INODE_SIZE) {
578 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
579 "bad inodes per block size");
580 return (EINVAL);
581 }
582
583 fs->e2fs_ipg = le32toh(es->e2fs_ipg);
584 if (fs->e2fs_ipg < fs->e2fs_ipb || fs->e2fs_ipg > fs->e2fs_bsize * 8) {
585 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
586 "invalid inodes per group");
587 return (EINVAL);
588 }
589
590 fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb;
591
592 fs->e2fs_bcount = le32toh(es->e2fs_bcount);
593 fs->e2fs_rbcount = le32toh(es->e2fs_rbcount);
594 fs->e2fs_fbcount = le32toh(es->e2fs_fbcount);
595 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
596 fs->e2fs_bcount |= (uint64_t)(le32toh(es->e4fs_bcount_hi)) << 32;
597 fs->e2fs_rbcount |= (uint64_t)(le32toh(es->e4fs_rbcount_hi)) << 32;
598 fs->e2fs_fbcount |= (uint64_t)(le32toh(es->e4fs_fbcount_hi)) << 32;
599 }
600 if (fs->e2fs_rbcount > fs->e2fs_bcount ||
601 fs->e2fs_fbcount > fs->e2fs_bcount) {
602 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
603 "invalid block count");
604 return (EINVAL);
605 }
606
607 fs->e2fs_ficount = le32toh(es->e2fs_ficount);
608 if (fs->e2fs_ficount > le32toh(es->e2fs_icount)) {
609 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
610 "invalid number of free inodes");
611 return (EINVAL);
612 }
613
614 if (le32toh(es->e2fs_first_dblock) != (fs->e2fs_bsize > 1024 ? 0 : 1) ||
615 le32toh(es->e2fs_first_dblock) >= fs->e2fs_bcount) {
616 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
617 "first data block out of range");
618 return (EINVAL);
619 }
620
621 fs->e2fs_gcount = howmany(fs->e2fs_bcount -
622 le32toh(es->e2fs_first_dblock), EXT2_BLOCKS_PER_GROUP(fs));
623 if (fs->e2fs_gcount > ((uint64_t)1 << 32) - EXT2_DESCS_PER_BLOCK(fs)) {
624 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
625 "groups count too large");
626 return (EINVAL);
627 }
628
629 /* Check for extra isize in big inodes. */
630 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) &&
631 EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) {
632 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error,
633 "no space for extra inode timestamps");
634 return (EINVAL);
635 }
636
637 /* s_resuid / s_resgid ? */
638
639 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
640 e2fs_descpb = fs->e2fs_bsize / E2FS_64BIT_GD_SIZE;
641 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, e2fs_descpb);
642 } else {
643 e2fs_descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE;
644 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount,
645 fs->e2fs_bsize / sizeof(struct ext2_gd));
646 }
647 fs->e2fs_gdbcount = howmany(fs->e2fs_gcount, e2fs_descpb);
648 fs->e2fs_gd = malloc(e2fs_gdbcount_alloc * fs->e2fs_bsize,
649 M_EXT2MNT, M_WAITOK | M_ZERO);
650 fs->e2fs_contigdirs = malloc(fs->e2fs_gcount *
651 sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO);
652
653 for (i = 0; i < fs->e2fs_gdbcount; i++) {
654 error = bread(devvp,
655 fsbtodb(fs, ext2_cg_location(fs, i)),
656 fs->e2fs_bsize, NOCRED, &bp);
657 if (error) {
658 /*
659 * fs->e2fs_gd and fs->e2fs_contigdirs
660 * will be freed later by the caller,
661 * because this function could be called from
662 * MNT_UPDATE path.
663 */
664 return (error);
665 }
666 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
667 memcpy(&fs->e2fs_gd[
668 i * fs->e2fs_bsize / sizeof(struct ext2_gd)],
669 bp->b_data, fs->e2fs_bsize);
670 } else {
671 for (j = 0; j < e2fs_descpb &&
672 g_count < fs->e2fs_gcount; j++, g_count++)
673 memcpy(&fs->e2fs_gd[g_count],
674 bp->b_data + j * E2FS_REV0_GD_SIZE,
675 E2FS_REV0_GD_SIZE);
676 }
677 brelse(bp);
678 bp = NULL;
679 }
680
681 /* Validate cgs consistency */
682 error = ext2_cg_validate(fs);
683 if (error)
684 return (error);
685
686 /* Verfy cgs csum */
687 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) ||
688 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
689 error = ext2_gd_csum_verify(fs, devvp->v_rdev);
690 if (error)
691 return (error);
692 }
693 /* Initialization for the ext2 Orlov allocator variant. */
694 fs->e2fs_total_dir = 0;
695 for (i = 0; i < fs->e2fs_gcount; i++)
696 fs->e2fs_total_dir += e2fs_gd_get_ndirs(&fs->e2fs_gd[i]);
697
698 if (le32toh(es->e2fs_rev) == E2FS_REV0 ||
699 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE))
700 fs->e2fs_maxfilesize = 0x7fffffff;
701 else {
702 fs->e2fs_maxfilesize = 0xffffffffffff;
703 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE))
704 fs->e2fs_maxfilesize = 0x7fffffffffffffff;
705 }
706 if (le32toh(es->e4fs_flags) & E2FS_UNSIGNED_HASH) {
707 fs->e2fs_uhash = 3;
708 } else if ((le32toh(es->e4fs_flags) & E2FS_SIGNED_HASH) == 0) {
709 #ifdef __CHAR_UNSIGNED__
710 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_UNSIGNED_HASH);
711 fs->e2fs_uhash = 3;
712 #else
713 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_SIGNED_HASH);
714 #endif
715 }
716 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM))
717 error = ext2_sb_csum_verify(fs);
718
719 return (error);
720 }
721
722 /*
723 * Reload all incore data for a filesystem (used after running fsck on
724 * the root filesystem and finding things to fix). The filesystem must
725 * be mounted read-only.
726 *
727 * Things to do to update the mount:
728 * 1) invalidate all cached meta-data.
729 * 2) re-read superblock from disk.
730 * 3) invalidate all cluster summary information.
731 * 4) invalidate all inactive vnodes.
732 * 5) invalidate all cached file data.
733 * 6) re-read inode data for all active vnodes.
734 * XXX we are missing some steps, in particular # 3, this has to be reviewed.
735 */
736 static int
737 ext2_reload(struct mount *mp, struct thread *td)
738 {
739 struct vnode *vp, *mvp, *devvp;
740 struct inode *ip;
741 struct buf *bp;
742 struct ext2fs *es;
743 struct m_ext2fs *fs;
744 struct csum *sump;
745 int error, i;
746 int32_t *lp;
747
748 if ((mp->mnt_flag & MNT_RDONLY) == 0)
749 return (EINVAL);
750 /*
751 * Step 1: invalidate all cached meta-data.
752 */
753 devvp = VFSTOEXT2(mp)->um_devvp;
754 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
755 if (vinvalbuf(devvp, 0, 0, 0) != 0)
756 panic("ext2_reload: dirty1");
757 VOP_UNLOCK(devvp);
758
759 /*
760 * Step 2: re-read superblock from disk.
761 * constants have been adjusted for ext2
762 */
763 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
764 return (error);
765 es = (struct ext2fs *)bp->b_data;
766 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) {
767 brelse(bp);
768 return (EIO); /* XXX needs translation */
769 }
770 fs = VFSTOEXT2(mp)->um_e2fs;
771 bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs));
772
773 if ((error = ext2_compute_sb_data(devvp, es, fs)) != 0) {
774 brelse(bp);
775 return (error);
776 }
777 #ifdef UNKLAR
778 if (fs->fs_sbsize < SBSIZE)
779 bp->b_flags |= B_INVAL;
780 #endif
781 brelse(bp);
782
783 /*
784 * Step 3: invalidate all cluster summary information.
785 */
786 if (fs->e2fs_contigsumsize > 0) {
787 lp = fs->e2fs_maxcluster;
788 sump = fs->e2fs_clustersum;
789 for (i = 0; i < fs->e2fs_gcount; i++, sump++) {
790 *lp++ = fs->e2fs_contigsumsize;
791 sump->cs_init = 0;
792 bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1);
793 }
794 }
795
796 loop:
797 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
798 /*
799 * Step 4: invalidate all cached file data.
800 */
801 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
802 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
803 goto loop;
804 }
805 if (vinvalbuf(vp, 0, 0, 0))
806 panic("ext2_reload: dirty2");
807
808 /*
809 * Step 5: re-read inode data for all active vnodes.
810 */
811 ip = VTOI(vp);
812 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
813 (int)fs->e2fs_bsize, NOCRED, &bp);
814 if (error) {
815 vput(vp);
816 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
817 return (error);
818 }
819
820 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data +
821 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)), ip);
822
823 brelse(bp);
824 vput(vp);
825
826 if (error) {
827 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
828 return (error);
829 }
830 }
831 return (0);
832 }
833
834 /*
835 * Common code for mount and mountroot.
836 */
837 static int
838 ext2_mountfs(struct vnode *devvp, struct mount *mp)
839 {
840 struct ext2mount *ump;
841 struct buf *bp;
842 struct m_ext2fs *fs;
843 struct ext2fs *es;
844 struct cdev *dev = devvp->v_rdev;
845 struct g_consumer *cp;
846 struct bufobj *bo;
847 struct csum *sump;
848 int error;
849 int ronly;
850 int i;
851 u_long size;
852 int32_t *lp;
853 int32_t e2fs_maxcontig;
854
855 ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0);
856 /* XXX: use VOP_ACESS to check FS perms */
857 g_topology_lock();
858 error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1);
859 g_topology_unlock();
860 VOP_UNLOCK(devvp);
861 if (error)
862 return (error);
863
864 /* XXX: should we check for some sectorsize or 512 instead? */
865 if (((SBSIZE % cp->provider->sectorsize) != 0) ||
866 (SBSIZE < cp->provider->sectorsize)) {
867 g_topology_lock();
868 g_vfs_close(cp);
869 g_topology_unlock();
870 return (EINVAL);
871 }
872
873 bo = &devvp->v_bufobj;
874 bo->bo_private = cp;
875 bo->bo_ops = g_vfs_bufops;
876 if (devvp->v_rdev->si_iosize_max != 0)
877 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
878 if (mp->mnt_iosize_max > maxphys)
879 mp->mnt_iosize_max = maxphys;
880
881 bp = NULL;
882 ump = NULL;
883 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
884 goto out;
885 es = (struct ext2fs *)bp->b_data;
886 if (ext2_check_sb_compat(es, dev, ronly) != 0) {
887 error = EINVAL; /* XXX needs translation */
888 goto out;
889 }
890 if ((le16toh(es->e2fs_state) & E2FS_ISCLEAN) == 0 ||
891 (le16toh(es->e2fs_state) & E2FS_ERRORS)) {
892 if (ronly || (mp->mnt_flag & MNT_FORCE)) {
893 printf(
894 "WARNING: Filesystem was not properly dismounted\n");
895 } else {
896 printf(
897 "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n");
898 error = EPERM;
899 goto out;
900 }
901 }
902 ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO);
903
904 /*
905 * I don't know whether this is the right strategy. Note that
906 * we dynamically allocate both an m_ext2fs and an ext2fs
907 * while Linux keeps the super block in a locked buffer.
908 */
909 ump->um_e2fs = malloc(sizeof(struct m_ext2fs),
910 M_EXT2MNT, M_WAITOK | M_ZERO);
911 ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs),
912 M_EXT2MNT, M_WAITOK);
913 mtx_init(EXT2_MTX(ump), "EXT2FS", "EXT2FS Lock", MTX_DEF);
914 bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs));
915 if ((error = ext2_compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs)))
916 goto out;
917
918 /*
919 * Calculate the maximum contiguous blocks and size of cluster summary
920 * array. In FFS this is done by newfs; however, the superblock
921 * in ext2fs doesn't have these variables, so we can calculate
922 * them here.
923 */
924 e2fs_maxcontig = MAX(1, maxphys / ump->um_e2fs->e2fs_bsize);
925 ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG);
926 ump->um_e2fs->e2fs_maxsymlinklen = EXT2_MAXSYMLINKLEN;
927 if (ump->um_e2fs->e2fs_contigsumsize > 0) {
928 size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t);
929 ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK);
930 size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum);
931 ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK);
932 lp = ump->um_e2fs->e2fs_maxcluster;
933 sump = ump->um_e2fs->e2fs_clustersum;
934 for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) {
935 *lp++ = ump->um_e2fs->e2fs_contigsumsize;
936 sump->cs_init = 0;
937 sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) *
938 sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO);
939 }
940 }
941
942 brelse(bp);
943 bp = NULL;
944 fs = ump->um_e2fs;
945 fs->e2fs_ronly = ronly; /* ronly is set according to mnt_flags */
946
947 /*
948 * If the fs is not mounted read-only, make sure the super block is
949 * always written back on a sync().
950 */
951 fs->e2fs_wasvalid = le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN ? 1 : 0;
952 if (ronly == 0) {
953 fs->e2fs_fmod = 1; /* mark it modified and set fs invalid */
954 fs->e2fs->e2fs_state =
955 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN);
956 }
957 mp->mnt_data = ump;
958 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev);
959 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
960 MNT_ILOCK(mp);
961 mp->mnt_flag |= MNT_LOCAL;
962 MNT_IUNLOCK(mp);
963 ump->um_mountp = mp;
964 ump->um_dev = dev;
965 ump->um_devvp = devvp;
966 ump->um_bo = &devvp->v_bufobj;
967 ump->um_cp = cp;
968
969 /*
970 * Setting those two parameters allowed us to use
971 * ufs_bmap w/o changse!
972 */
973 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs);
974 ump->um_bptrtodb = le32toh(fs->e2fs->e2fs_log_bsize) + 1;
975 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs);
976 if (ronly == 0)
977 ext2_sbupdate(ump, MNT_WAIT);
978 /*
979 * Initialize filesystem stat information in mount struct.
980 */
981 MNT_ILOCK(mp);
982 mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED |
983 MNTK_USES_BCACHE;
984 MNT_IUNLOCK(mp);
985 return (0);
986 out:
987 if (bp)
988 brelse(bp);
989 if (cp != NULL) {
990 g_topology_lock();
991 g_vfs_close(cp);
992 g_topology_unlock();
993 }
994 if (ump) {
995 mtx_destroy(EXT2_MTX(ump));
996 free(ump->um_e2fs->e2fs_gd, M_EXT2MNT);
997 free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT);
998 free(ump->um_e2fs->e2fs, M_EXT2MNT);
999 free(ump->um_e2fs, M_EXT2MNT);
1000 free(ump, M_EXT2MNT);
1001 mp->mnt_data = NULL;
1002 }
1003 return (error);
1004 }
1005
1006 /*
1007 * Unmount system call.
1008 */
1009 static int
1010 ext2_unmount(struct mount *mp, int mntflags)
1011 {
1012 struct ext2mount *ump;
1013 struct m_ext2fs *fs;
1014 struct csum *sump;
1015 int error, flags, i, ronly;
1016
1017 flags = 0;
1018 if (mntflags & MNT_FORCE) {
1019 if (mp->mnt_flag & MNT_ROOTFS)
1020 return (EINVAL);
1021 flags |= FORCECLOSE;
1022 }
1023 if ((error = ext2_flushfiles(mp, flags, curthread)) != 0)
1024 return (error);
1025 ump = VFSTOEXT2(mp);
1026 fs = ump->um_e2fs;
1027 ronly = fs->e2fs_ronly;
1028 if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) {
1029 if (fs->e2fs_wasvalid)
1030 fs->e2fs->e2fs_state =
1031 htole16(le16toh(fs->e2fs->e2fs_state) | E2FS_ISCLEAN);
1032 ext2_sbupdate(ump, MNT_WAIT);
1033 }
1034
1035 g_topology_lock();
1036 g_vfs_close(ump->um_cp);
1037 g_topology_unlock();
1038 vrele(ump->um_devvp);
1039 sump = fs->e2fs_clustersum;
1040 for (i = 0; i < fs->e2fs_gcount; i++, sump++)
1041 free(sump->cs_sum, M_EXT2MNT);
1042 free(fs->e2fs_clustersum, M_EXT2MNT);
1043 free(fs->e2fs_maxcluster, M_EXT2MNT);
1044 free(fs->e2fs_gd, M_EXT2MNT);
1045 free(fs->e2fs_contigdirs, M_EXT2MNT);
1046 free(fs->e2fs, M_EXT2MNT);
1047 free(fs, M_EXT2MNT);
1048 free(ump, M_EXT2MNT);
1049 mp->mnt_data = NULL;
1050 return (error);
1051 }
1052
1053 /*
1054 * Flush out all the files in a filesystem.
1055 */
1056 static int
1057 ext2_flushfiles(struct mount *mp, int flags, struct thread *td)
1058 {
1059 int error;
1060
1061 error = vflush(mp, 0, flags, td);
1062 return (error);
1063 }
1064
1065 /*
1066 * Get filesystem statistics.
1067 */
1068 int
1069 ext2_statfs(struct mount *mp, struct statfs *sbp)
1070 {
1071 struct ext2mount *ump;
1072 struct m_ext2fs *fs;
1073 uint32_t overhead, overhead_per_group, ngdb;
1074 int i, ngroups;
1075
1076 ump = VFSTOEXT2(mp);
1077 fs = ump->um_e2fs;
1078 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC)
1079 panic("ext2_statfs");
1080
1081 /*
1082 * Compute the overhead (FS structures)
1083 */
1084 overhead_per_group =
1085 1 /* block bitmap */ +
1086 1 /* inode bitmap */ +
1087 fs->e2fs_itpg;
1088 overhead = le32toh(fs->e2fs->e2fs_first_dblock) +
1089 fs->e2fs_gcount * overhead_per_group;
1090 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 &&
1091 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) {
1092 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) {
1093 if (ext2_cg_has_sb(fs, i))
1094 ngroups++;
1095 }
1096 } else {
1097 ngroups = fs->e2fs_gcount;
1098 }
1099 ngdb = fs->e2fs_gdbcount;
1100 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 &&
1101 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE)
1102 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb);
1103 overhead += ngroups * (1 /* superblock */ + ngdb);
1104
1105 sbp->f_bsize = EXT2_FRAG_SIZE(fs);
1106 sbp->f_iosize = EXT2_BLOCK_SIZE(fs);
1107 sbp->f_blocks = fs->e2fs_bcount - overhead;
1108 sbp->f_bfree = fs->e2fs_fbcount;
1109 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount;
1110 sbp->f_files = le32toh(fs->e2fs->e2fs_icount);
1111 sbp->f_ffree = fs->e2fs_ficount;
1112 return (0);
1113 }
1114
1115 /*
1116 * Go through the disk queues to initiate sandbagged IO;
1117 * go through the inodes to write those that have been modified;
1118 * initiate the writing of the super block if it has been modified.
1119 *
1120 * Note: we are always called with the filesystem marked `MPBUSY'.
1121 */
1122 static int
1123 ext2_sync(struct mount *mp, int waitfor)
1124 {
1125 struct vnode *mvp, *vp;
1126 struct thread *td;
1127 struct inode *ip;
1128 struct ext2mount *ump = VFSTOEXT2(mp);
1129 struct m_ext2fs *fs;
1130 int error, allerror = 0;
1131
1132 td = curthread;
1133 fs = ump->um_e2fs;
1134 if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */
1135 panic("ext2_sync: rofs mod fs=%s", fs->e2fs_fsmnt);
1136 }
1137
1138 /*
1139 * Write back each (modified) inode.
1140 */
1141 loop:
1142 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1143 if (vp->v_type == VNON) {
1144 VI_UNLOCK(vp);
1145 continue;
1146 }
1147 ip = VTOI(vp);
1148 if ((ip->i_flag &
1149 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1150 (vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1151 waitfor == MNT_LAZY)) {
1152 VI_UNLOCK(vp);
1153 continue;
1154 }
1155 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
1156 if (error) {
1157 if (error == ENOENT) {
1158 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1159 goto loop;
1160 }
1161 continue;
1162 }
1163 if ((error = VOP_FSYNC(vp, waitfor, td)) != 0)
1164 allerror = error;
1165 vput(vp);
1166 }
1167
1168 /*
1169 * Force stale filesystem control information to be flushed.
1170 */
1171 if (waitfor != MNT_LAZY) {
1172 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1173 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0)
1174 allerror = error;
1175 VOP_UNLOCK(ump->um_devvp);
1176 }
1177
1178 /*
1179 * Write back modified superblock.
1180 */
1181 if (fs->e2fs_fmod != 0) {
1182 fs->e2fs_fmod = 0;
1183 fs->e2fs->e2fs_wtime = htole32(time_second);
1184 if ((error = ext2_cgupdate(ump, waitfor)) != 0)
1185 allerror = error;
1186 }
1187 return (allerror);
1188 }
1189
1190 /*
1191 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it
1192 * in from disk. If it is in core, wait for the lock bit to clear, then
1193 * return the inode locked. Detection and handling of mount points must be
1194 * done by the calling routine.
1195 */
1196 static int
1197 ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
1198 {
1199 struct m_ext2fs *fs;
1200 struct inode *ip;
1201 struct ext2mount *ump;
1202 struct buf *bp;
1203 struct vnode *vp;
1204 struct thread *td;
1205 unsigned int i, used_blocks;
1206 int error;
1207
1208 td = curthread;
1209 error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL);
1210 if (error || *vpp != NULL)
1211 return (error);
1212
1213 ump = VFSTOEXT2(mp);
1214 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO);
1215
1216 /* Allocate a new vnode/inode. */
1217 if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) {
1218 *vpp = NULL;
1219 free(ip, M_EXT2NODE);
1220 return (error);
1221 }
1222 vp->v_data = ip;
1223 ip->i_vnode = vp;
1224 ip->i_e2fs = fs = ump->um_e2fs;
1225 ip->i_ump = ump;
1226 ip->i_number = ino;
1227 cluster_init_vn(&ip->i_clusterw);
1228
1229 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
1230 error = insmntque(vp, mp);
1231 if (error != 0) {
1232 free(ip, M_EXT2NODE);
1233 *vpp = NULL;
1234 return (error);
1235 }
1236 error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL);
1237 if (error || *vpp != NULL)
1238 return (error);
1239
1240 /* Read in the disk contents for the inode, copy into the inode. */
1241 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1242 (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) {
1243 /*
1244 * The inode does not contain anything useful, so it would
1245 * be misleading to leave it on its hash chain. With mode
1246 * still zero, it will be unlinked and returned to the free
1247 * list by vput().
1248 */
1249 brelse(bp);
1250 vput(vp);
1251 *vpp = NULL;
1252 return (error);
1253 }
1254 /* convert ext2 inode to dinode */
1255 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data +
1256 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ino)), ip);
1257 if (error) {
1258 brelse(bp);
1259 vput(vp);
1260 *vpp = NULL;
1261 return (error);
1262 }
1263 ip->i_block_group = ino_to_cg(fs, ino);
1264 ip->i_next_alloc_block = 0;
1265 ip->i_next_alloc_goal = 0;
1266
1267 /*
1268 * Now we want to make sure that block pointers for unused
1269 * blocks are zeroed out - ext2_balloc depends on this
1270 * although for regular files and directories only
1271 *
1272 * If IN_E4EXTENTS is enabled, unused blocks are not zeroed
1273 * out because we could corrupt the extent tree.
1274 */
1275 if (!(ip->i_flag & IN_E4EXTENTS) &&
1276 (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) {
1277 used_blocks = howmany(ip->i_size, fs->e2fs_bsize);
1278 for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++)
1279 ip->i_db[i] = 0;
1280 }
1281
1282 bqrelse(bp);
1283
1284 #ifdef EXT2FS_PRINT_EXTENTS
1285 ext2_print_inode(ip);
1286 error = ext4_ext_walk(ip);
1287 if (error) {
1288 vput(vp);
1289 *vpp = NULL;
1290 return (error);
1291 }
1292 #endif
1293
1294 /*
1295 * Initialize the vnode from the inode, check for aliases.
1296 * Note that the underlying vnode may have changed.
1297 */
1298 if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) {
1299 vput(vp);
1300 *vpp = NULL;
1301 return (error);
1302 }
1303
1304 /*
1305 * Finish inode initialization.
1306 */
1307
1308 vn_set_state(vp, VSTATE_CONSTRUCTED);
1309 *vpp = vp;
1310 return (0);
1311 }
1312
1313 /*
1314 * File handle to vnode
1315 *
1316 * Have to be really careful about stale file handles:
1317 * - check that the inode number is valid
1318 * - call ext2_vget() to get the locked inode
1319 * - check for an unallocated inode (i_mode == 0)
1320 * - check that the given client host has export rights and return
1321 * those rights via. exflagsp and credanonp
1322 */
1323 static int
1324 ext2_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
1325 {
1326 struct inode *ip;
1327 struct ufid *ufhp;
1328 struct vnode *nvp;
1329 struct m_ext2fs *fs;
1330 int error;
1331
1332 ufhp = (struct ufid *)fhp;
1333 fs = VFSTOEXT2(mp)->um_e2fs;
1334 if (ufhp->ufid_ino < EXT2_ROOTINO ||
1335 ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs_ipg)
1336 return (ESTALE);
1337
1338 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp);
1339 if (error) {
1340 *vpp = NULLVP;
1341 return (error);
1342 }
1343 ip = VTOI(nvp);
1344 if (ip->i_mode == 0 ||
1345 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) {
1346 vput(nvp);
1347 *vpp = NULLVP;
1348 return (ESTALE);
1349 }
1350 *vpp = nvp;
1351 vnode_create_vobject(*vpp, 0, curthread);
1352 return (0);
1353 }
1354
1355 /*
1356 * Write a superblock and associated information back to disk.
1357 */
1358 static int
1359 ext2_sbupdate(struct ext2mount *mp, int waitfor)
1360 {
1361 struct m_ext2fs *fs = mp->um_e2fs;
1362 struct ext2fs *es = fs->e2fs;
1363 struct buf *bp;
1364 int error = 0;
1365
1366 es->e2fs_bcount = htole32(fs->e2fs_bcount & 0xffffffff);
1367 es->e2fs_rbcount = htole32(fs->e2fs_rbcount & 0xffffffff);
1368 es->e2fs_fbcount = htole32(fs->e2fs_fbcount & 0xffffffff);
1369 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
1370 es->e4fs_bcount_hi = htole32(fs->e2fs_bcount >> 32);
1371 es->e4fs_rbcount_hi = htole32(fs->e2fs_rbcount >> 32);
1372 es->e4fs_fbcount_hi = htole32(fs->e2fs_fbcount >> 32);
1373 }
1374
1375 es->e2fs_ficount = htole32(fs->e2fs_ficount);
1376
1377 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM))
1378 ext2_sb_csum_set(fs);
1379
1380 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0);
1381 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2fs));
1382 if (waitfor == MNT_WAIT)
1383 error = bwrite(bp);
1384 else
1385 bawrite(bp);
1386
1387 /*
1388 * The buffers for group descriptors, inode bitmaps and block bitmaps
1389 * are not busy at this point and are (hopefully) written by the
1390 * usual sync mechanism. No need to write them here.
1391 */
1392 return (error);
1393 }
1394 int
1395 ext2_cgupdate(struct ext2mount *mp, int waitfor)
1396 {
1397 struct m_ext2fs *fs = mp->um_e2fs;
1398 struct buf *bp;
1399 int i, j, g_count = 0, error = 0, allerror = 0;
1400
1401 allerror = ext2_sbupdate(mp, waitfor);
1402
1403 /* Update gd csums */
1404 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) ||
1405 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM))
1406 ext2_gd_csum_set(fs);
1407
1408 for (i = 0; i < fs->e2fs_gdbcount; i++) {
1409 bp = getblk(mp->um_devvp, fsbtodb(fs,
1410 ext2_cg_location(fs, i)),
1411 fs->e2fs_bsize, 0, 0, 0);
1412 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) {
1413 memcpy(bp->b_data, &fs->e2fs_gd[
1414 i * fs->e2fs_bsize / sizeof(struct ext2_gd)],
1415 fs->e2fs_bsize);
1416 } else {
1417 for (j = 0; j < fs->e2fs_bsize / E2FS_REV0_GD_SIZE &&
1418 g_count < fs->e2fs_gcount; j++, g_count++)
1419 memcpy(bp->b_data + j * E2FS_REV0_GD_SIZE,
1420 &fs->e2fs_gd[g_count], E2FS_REV0_GD_SIZE);
1421 }
1422 if (waitfor == MNT_WAIT)
1423 error = bwrite(bp);
1424 else
1425 bawrite(bp);
1426 }
1427
1428 if (!allerror && error)
1429 allerror = error;
1430 return (allerror);
1431 }
1432
1433 /*
1434 * Return the root of a filesystem.
1435 */
1436 static int
1437 ext2_root(struct mount *mp, int flags, struct vnode **vpp)
1438 {
1439 struct vnode *nvp;
1440 int error;
1441
1442 error = VFS_VGET(mp, EXT2_ROOTINO, LK_EXCLUSIVE, &nvp);
1443 if (error)
1444 return (error);
1445 *vpp = nvp;
1446 return (0);
1447 }
Cache object: b63729b788b36b8f24d498f8da2fdbcc
|