1 /*-
2 * Copyright (c) 1989, 1991, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95
30 */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include "opt_quota.h"
36 #include "opt_ufs.h"
37 #include "opt_ffs.h"
38 #include "opt_ddb.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/priv.h>
44 #include <sys/proc.h>
45 #include <sys/taskqueue.h>
46 #include <sys/kernel.h>
47 #include <sys/vnode.h>
48 #include <sys/mount.h>
49 #include <sys/bio.h>
50 #include <sys/buf.h>
51 #include <sys/conf.h>
52 #include <sys/fcntl.h>
53 #include <sys/ioccom.h>
54 #include <sys/malloc.h>
55 #include <sys/mutex.h>
56 #include <sys/rwlock.h>
57 #include <sys/vmmeter.h>
58
59 #include <security/mac/mac_framework.h>
60
61 #include <ufs/ufs/extattr.h>
62 #include <ufs/ufs/gjournal.h>
63 #include <ufs/ufs/quota.h>
64 #include <ufs/ufs/ufsmount.h>
65 #include <ufs/ufs/inode.h>
66 #include <ufs/ufs/ufs_extern.h>
67
68 #include <ufs/ffs/fs.h>
69 #include <ufs/ffs/ffs_extern.h>
70
71 #include <vm/vm.h>
72 #include <vm/uma.h>
73 #include <vm/vm_page.h>
74
75 #include <geom/geom.h>
76 #include <geom/geom_vfs.h>
77
78 #include <ddb/ddb.h>
79
80 static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
81
82 static int ffs_mountfs(struct vnode *, struct mount *, struct thread *);
83 static void ffs_oldfscompat_read(struct fs *, struct ufsmount *,
84 ufs2_daddr_t);
85 static void ffs_ifree(struct ufsmount *ump, struct inode *ip);
86 static int ffs_sync_lazy(struct mount *mp);
87
88 static vfs_init_t ffs_init;
89 static vfs_uninit_t ffs_uninit;
90 static vfs_extattrctl_t ffs_extattrctl;
91 static vfs_cmount_t ffs_cmount;
92 static vfs_unmount_t ffs_unmount;
93 static vfs_mount_t ffs_mount;
94 static vfs_statfs_t ffs_statfs;
95 static vfs_fhtovp_t ffs_fhtovp;
96 static vfs_sync_t ffs_sync;
97
98 static struct vfsops ufs_vfsops = {
99 .vfs_extattrctl = ffs_extattrctl,
100 .vfs_fhtovp = ffs_fhtovp,
101 .vfs_init = ffs_init,
102 .vfs_mount = ffs_mount,
103 .vfs_cmount = ffs_cmount,
104 .vfs_quotactl = ufs_quotactl,
105 .vfs_root = ufs_root,
106 .vfs_statfs = ffs_statfs,
107 .vfs_sync = ffs_sync,
108 .vfs_uninit = ffs_uninit,
109 .vfs_unmount = ffs_unmount,
110 .vfs_vget = ffs_vget,
111 .vfs_susp_clean = process_deferred_inactive,
112 };
113
114 VFS_SET(ufs_vfsops, ufs, 0);
115 MODULE_VERSION(ufs, 1);
116
117 static b_strategy_t ffs_geom_strategy;
118 static b_write_t ffs_bufwrite;
119
120 static struct buf_ops ffs_ops = {
121 .bop_name = "FFS",
122 .bop_write = ffs_bufwrite,
123 .bop_strategy = ffs_geom_strategy,
124 .bop_sync = bufsync,
125 #ifdef NO_FFS_SNAPSHOT
126 .bop_bdflush = bufbdflush,
127 #else
128 .bop_bdflush = ffs_bdflush,
129 #endif
130 };
131
132 /*
133 * Note that userquota and groupquota options are not currently used
134 * by UFS/FFS code and generally mount(8) does not pass those options
135 * from userland, but they can be passed by loader(8) via
136 * vfs.root.mountfrom.options.
137 */
138 static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
139 "noclusterw", "noexec", "export", "force", "from", "groupquota",
140 "multilabel", "nfsv4acls", "fsckpid", "snapshot", "nosuid", "suiddir",
141 "nosymfollow", "sync", "union", "userquota", NULL };
142
143 static int
144 ffs_mount(struct mount *mp)
145 {
146 struct vnode *devvp;
147 struct thread *td;
148 struct ufsmount *ump = NULL;
149 struct fs *fs;
150 pid_t fsckpid = 0;
151 int error, error1, flags;
152 uint64_t mntorflags, saved_mnt_flag;
153 accmode_t accmode;
154 struct nameidata ndp;
155 char *fspec;
156
157 td = curthread;
158 if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
159 return (EINVAL);
160 if (uma_inode == NULL) {
161 uma_inode = uma_zcreate("FFS inode",
162 sizeof(struct inode), NULL, NULL, NULL, NULL,
163 UMA_ALIGN_PTR, 0);
164 uma_ufs1 = uma_zcreate("FFS1 dinode",
165 sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
166 UMA_ALIGN_PTR, 0);
167 uma_ufs2 = uma_zcreate("FFS2 dinode",
168 sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
169 UMA_ALIGN_PTR, 0);
170 }
171
172 vfs_deleteopt(mp->mnt_optnew, "groupquota");
173 vfs_deleteopt(mp->mnt_optnew, "userquota");
174
175 fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
176 if (error)
177 return (error);
178
179 mntorflags = 0;
180 if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
181 mntorflags |= MNT_ACLS;
182
183 if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) {
184 mntorflags |= MNT_SNAPSHOT;
185 /*
186 * Once we have set the MNT_SNAPSHOT flag, do not
187 * persist "snapshot" in the options list.
188 */
189 vfs_deleteopt(mp->mnt_optnew, "snapshot");
190 vfs_deleteopt(mp->mnt_opt, "snapshot");
191 }
192
193 if (vfs_getopt(mp->mnt_optnew, "fsckpid", NULL, NULL) == 0 &&
194 vfs_scanopt(mp->mnt_optnew, "fsckpid", "%d", &fsckpid) == 1) {
195 /*
196 * Once we have set the restricted PID, do not
197 * persist "fsckpid" in the options list.
198 */
199 vfs_deleteopt(mp->mnt_optnew, "fsckpid");
200 vfs_deleteopt(mp->mnt_opt, "fsckpid");
201 if (mp->mnt_flag & MNT_UPDATE) {
202 if (VFSTOUFS(mp)->um_fs->fs_ronly == 0 &&
203 vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) {
204 vfs_mount_error(mp,
205 "Checker enable: Must be read-only");
206 return (EINVAL);
207 }
208 } else if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) {
209 vfs_mount_error(mp,
210 "Checker enable: Must be read-only");
211 return (EINVAL);
212 }
213 /* Set to -1 if we are done */
214 if (fsckpid == 0)
215 fsckpid = -1;
216 }
217
218 if (vfs_getopt(mp->mnt_optnew, "nfsv4acls", NULL, NULL) == 0) {
219 if (mntorflags & MNT_ACLS) {
220 vfs_mount_error(mp,
221 "\"acls\" and \"nfsv4acls\" options "
222 "are mutually exclusive");
223 return (EINVAL);
224 }
225 mntorflags |= MNT_NFS4ACLS;
226 }
227
228 MNT_ILOCK(mp);
229 mp->mnt_flag |= mntorflags;
230 MNT_IUNLOCK(mp);
231 /*
232 * If updating, check whether changing from read-only to
233 * read/write; if there is no device name, that's all we do.
234 */
235 if (mp->mnt_flag & MNT_UPDATE) {
236 ump = VFSTOUFS(mp);
237 fs = ump->um_fs;
238 devvp = ump->um_devvp;
239 if (fsckpid == -1 && ump->um_fsckpid > 0) {
240 if ((error = ffs_flushfiles(mp, WRITECLOSE, td)) != 0 ||
241 (error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0)
242 return (error);
243 g_topology_lock();
244 /*
245 * Return to normal read-only mode.
246 */
247 error = g_access(ump->um_cp, 0, -1, 0);
248 g_topology_unlock();
249 ump->um_fsckpid = 0;
250 }
251 if (fs->fs_ronly == 0 &&
252 vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
253 /*
254 * Flush any dirty data and suspend filesystem.
255 */
256 if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
257 return (error);
258 error = vfs_write_suspend_umnt(mp);
259 if (error != 0)
260 return (error);
261 /*
262 * Check for and optionally get rid of files open
263 * for writing.
264 */
265 flags = WRITECLOSE;
266 if (mp->mnt_flag & MNT_FORCE)
267 flags |= FORCECLOSE;
268 if (MOUNTEDSOFTDEP(mp)) {
269 error = softdep_flushfiles(mp, flags, td);
270 } else {
271 error = ffs_flushfiles(mp, flags, td);
272 }
273 if (error) {
274 vfs_write_resume(mp, 0);
275 return (error);
276 }
277 if (fs->fs_pendingblocks != 0 ||
278 fs->fs_pendinginodes != 0) {
279 printf("WARNING: %s Update error: blocks %jd "
280 "files %d\n", fs->fs_fsmnt,
281 (intmax_t)fs->fs_pendingblocks,
282 fs->fs_pendinginodes);
283 fs->fs_pendingblocks = 0;
284 fs->fs_pendinginodes = 0;
285 }
286 if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
287 fs->fs_clean = 1;
288 if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
289 fs->fs_ronly = 0;
290 fs->fs_clean = 0;
291 vfs_write_resume(mp, 0);
292 return (error);
293 }
294 if (MOUNTEDSOFTDEP(mp))
295 softdep_unmount(mp);
296 g_topology_lock();
297 /*
298 * Drop our write and exclusive access.
299 */
300 g_access(ump->um_cp, 0, -1, -1);
301 g_topology_unlock();
302 fs->fs_ronly = 1;
303 MNT_ILOCK(mp);
304 mp->mnt_flag |= MNT_RDONLY;
305 MNT_IUNLOCK(mp);
306 /*
307 * Allow the writers to note that filesystem
308 * is ro now.
309 */
310 vfs_write_resume(mp, 0);
311 }
312 if ((mp->mnt_flag & MNT_RELOAD) &&
313 (error = ffs_reload(mp, td, 0)) != 0)
314 return (error);
315 if (fs->fs_ronly &&
316 !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
317 /*
318 * If we are running a checker, do not allow upgrade.
319 */
320 if (ump->um_fsckpid > 0) {
321 vfs_mount_error(mp,
322 "Active checker, cannot upgrade to write");
323 return (EINVAL);
324 }
325 /*
326 * If upgrade to read-write by non-root, then verify
327 * that user has necessary permissions on the device.
328 */
329 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
330 error = VOP_ACCESS(devvp, VREAD | VWRITE,
331 td->td_ucred, td);
332 if (error)
333 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
334 if (error) {
335 VOP_UNLOCK(devvp, 0);
336 return (error);
337 }
338 VOP_UNLOCK(devvp, 0);
339 fs->fs_flags &= ~FS_UNCLEAN;
340 if (fs->fs_clean == 0) {
341 fs->fs_flags |= FS_UNCLEAN;
342 if ((mp->mnt_flag & MNT_FORCE) ||
343 ((fs->fs_flags &
344 (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
345 (fs->fs_flags & FS_DOSOFTDEP))) {
346 printf("WARNING: %s was not properly "
347 "dismounted\n", fs->fs_fsmnt);
348 } else {
349 vfs_mount_error(mp,
350 "R/W mount of %s denied. %s.%s",
351 fs->fs_fsmnt,
352 "Filesystem is not clean - run fsck",
353 (fs->fs_flags & FS_SUJ) == 0 ? "" :
354 " Forced mount will invalidate"
355 " journal contents");
356 return (EPERM);
357 }
358 }
359 g_topology_lock();
360 /*
361 * Request exclusive write access.
362 */
363 error = g_access(ump->um_cp, 0, 1, 1);
364 g_topology_unlock();
365 if (error)
366 return (error);
367 if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
368 return (error);
369 error = vfs_write_suspend_umnt(mp);
370 if (error != 0)
371 return (error);
372 fs->fs_ronly = 0;
373 MNT_ILOCK(mp);
374 saved_mnt_flag = MNT_RDONLY;
375 if (MOUNTEDSOFTDEP(mp) && (mp->mnt_flag &
376 MNT_ASYNC) != 0)
377 saved_mnt_flag |= MNT_ASYNC;
378 mp->mnt_flag &= ~saved_mnt_flag;
379 MNT_IUNLOCK(mp);
380 fs->fs_mtime = time_second;
381 /* check to see if we need to start softdep */
382 if ((fs->fs_flags & FS_DOSOFTDEP) &&
383 (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
384 fs->fs_ronly = 1;
385 MNT_ILOCK(mp);
386 mp->mnt_flag |= saved_mnt_flag;
387 MNT_IUNLOCK(mp);
388 vfs_write_resume(mp, 0);
389 return (error);
390 }
391 fs->fs_clean = 0;
392 if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
393 fs->fs_ronly = 1;
394 MNT_ILOCK(mp);
395 mp->mnt_flag |= saved_mnt_flag;
396 MNT_IUNLOCK(mp);
397 vfs_write_resume(mp, 0);
398 return (error);
399 }
400 if (fs->fs_snapinum[0] != 0)
401 ffs_snapshot_mount(mp);
402 vfs_write_resume(mp, 0);
403 }
404 /*
405 * Soft updates is incompatible with "async",
406 * so if we are doing softupdates stop the user
407 * from setting the async flag in an update.
408 * Softdep_mount() clears it in an initial mount
409 * or ro->rw remount.
410 */
411 if (MOUNTEDSOFTDEP(mp)) {
412 /* XXX: Reset too late ? */
413 MNT_ILOCK(mp);
414 mp->mnt_flag &= ~MNT_ASYNC;
415 MNT_IUNLOCK(mp);
416 }
417 /*
418 * Keep MNT_ACLS flag if it is stored in superblock.
419 */
420 if ((fs->fs_flags & FS_ACLS) != 0) {
421 /* XXX: Set too late ? */
422 MNT_ILOCK(mp);
423 mp->mnt_flag |= MNT_ACLS;
424 MNT_IUNLOCK(mp);
425 }
426
427 if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
428 /* XXX: Set too late ? */
429 MNT_ILOCK(mp);
430 mp->mnt_flag |= MNT_NFS4ACLS;
431 MNT_IUNLOCK(mp);
432 }
433 /*
434 * If this is a request from fsck to clean up the filesystem,
435 * then allow the specified pid to proceed.
436 */
437 if (fsckpid > 0) {
438 if (ump->um_fsckpid != 0) {
439 vfs_mount_error(mp,
440 "Active checker already running on %s",
441 fs->fs_fsmnt);
442 return (EINVAL);
443 }
444 KASSERT(MOUNTEDSOFTDEP(mp) == 0,
445 ("soft updates enabled on read-only file system"));
446 g_topology_lock();
447 /*
448 * Request write access.
449 */
450 error = g_access(ump->um_cp, 0, 1, 0);
451 g_topology_unlock();
452 if (error) {
453 vfs_mount_error(mp,
454 "Checker activation failed on %s",
455 fs->fs_fsmnt);
456 return (error);
457 }
458 ump->um_fsckpid = fsckpid;
459 if (fs->fs_snapinum[0] != 0)
460 ffs_snapshot_mount(mp);
461 fs->fs_mtime = time_second;
462 fs->fs_fmod = 1;
463 fs->fs_clean = 0;
464 (void) ffs_sbupdate(ump, MNT_WAIT, 0);
465 }
466
467 /*
468 * If this is a snapshot request, take the snapshot.
469 */
470 if (mp->mnt_flag & MNT_SNAPSHOT)
471 return (ffs_snapshot(mp, fspec));
472
473 /*
474 * Must not call namei() while owning busy ref.
475 */
476 vfs_unbusy(mp);
477 }
478
479 /*
480 * Not an update, or updating the name: look up the name
481 * and verify that it refers to a sensible disk device.
482 */
483 NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
484 error = namei(&ndp);
485 if ((mp->mnt_flag & MNT_UPDATE) != 0) {
486 /*
487 * Unmount does not start if MNT_UPDATE is set. Mount
488 * update busies mp before setting MNT_UPDATE. We
489 * must be able to retain our busy ref succesfully,
490 * without sleep.
491 */
492 error1 = vfs_busy(mp, MBF_NOWAIT);
493 MPASS(error1 == 0);
494 }
495 if (error != 0)
496 return (error);
497 NDFREE(&ndp, NDF_ONLY_PNBUF);
498 devvp = ndp.ni_vp;
499 if (!vn_isdisk(devvp, &error)) {
500 vput(devvp);
501 return (error);
502 }
503
504 /*
505 * If mount by non-root, then verify that user has necessary
506 * permissions on the device.
507 */
508 accmode = VREAD;
509 if ((mp->mnt_flag & MNT_RDONLY) == 0)
510 accmode |= VWRITE;
511 error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
512 if (error)
513 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
514 if (error) {
515 vput(devvp);
516 return (error);
517 }
518
519 if (mp->mnt_flag & MNT_UPDATE) {
520 /*
521 * Update only
522 *
523 * If it's not the same vnode, or at least the same device
524 * then it's not correct.
525 */
526
527 if (devvp->v_rdev != ump->um_devvp->v_rdev)
528 error = EINVAL; /* needs translation */
529 vput(devvp);
530 if (error)
531 return (error);
532 } else {
533 /*
534 * New mount
535 *
536 * We need the name for the mount point (also used for
537 * "last mounted on") copied in. If an error occurs,
538 * the mount point is discarded by the upper level code.
539 * Note that vfs_mount_alloc() populates f_mntonname for us.
540 */
541 if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
542 vrele(devvp);
543 return (error);
544 }
545 if (fsckpid > 0) {
546 KASSERT(MOUNTEDSOFTDEP(mp) == 0,
547 ("soft updates enabled on read-only file system"));
548 ump = VFSTOUFS(mp);
549 fs = ump->um_fs;
550 g_topology_lock();
551 /*
552 * Request write access.
553 */
554 error = g_access(ump->um_cp, 0, 1, 0);
555 g_topology_unlock();
556 if (error) {
557 printf("WARNING: %s: Checker activation "
558 "failed\n", fs->fs_fsmnt);
559 } else {
560 ump->um_fsckpid = fsckpid;
561 if (fs->fs_snapinum[0] != 0)
562 ffs_snapshot_mount(mp);
563 fs->fs_mtime = time_second;
564 fs->fs_clean = 0;
565 (void) ffs_sbupdate(ump, MNT_WAIT, 0);
566 }
567 }
568 }
569 vfs_mountedfrom(mp, fspec);
570 return (0);
571 }
572
573 /*
574 * Compatibility with old mount system call.
575 */
576
577 static int
578 ffs_cmount(struct mntarg *ma, void *data, uint64_t flags)
579 {
580 struct ufs_args args;
581 struct export_args exp;
582 int error;
583
584 if (data == NULL)
585 return (EINVAL);
586 error = copyin(data, &args, sizeof args);
587 if (error)
588 return (error);
589 vfs_oexport_conv(&args.export, &exp);
590
591 ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
592 ma = mount_arg(ma, "export", &exp, sizeof(exp));
593 error = kernel_mount(ma, flags);
594
595 return (error);
596 }
597
598 /*
599 * Reload all incore data for a filesystem (used after running fsck on
600 * the root filesystem and finding things to fix). If the 'force' flag
601 * is 0, the filesystem must be mounted read-only.
602 *
603 * Things to do to update the mount:
604 * 1) invalidate all cached meta-data.
605 * 2) re-read superblock from disk.
606 * 3) re-read summary information from disk.
607 * 4) invalidate all inactive vnodes.
608 * 5) clear MNTK_SUSPEND2 and MNTK_SUSPENDED flags, allowing secondary
609 * writers, if requested.
610 * 6) invalidate all cached file data.
611 * 7) re-read inode data for all active vnodes.
612 */
613 int
614 ffs_reload(struct mount *mp, struct thread *td, int flags)
615 {
616 struct vnode *vp, *mvp, *devvp;
617 struct inode *ip;
618 void *space;
619 struct buf *bp;
620 struct fs *fs, *newfs;
621 struct ufsmount *ump;
622 ufs2_daddr_t sblockloc;
623 int i, blks, error;
624 u_long size;
625 int32_t *lp;
626
627 ump = VFSTOUFS(mp);
628
629 MNT_ILOCK(mp);
630 if ((mp->mnt_flag & MNT_RDONLY) == 0 && (flags & FFSR_FORCE) == 0) {
631 MNT_IUNLOCK(mp);
632 return (EINVAL);
633 }
634 MNT_IUNLOCK(mp);
635
636 /*
637 * Step 1: invalidate all cached meta-data.
638 */
639 devvp = VFSTOUFS(mp)->um_devvp;
640 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
641 if (vinvalbuf(devvp, 0, 0, 0) != 0)
642 panic("ffs_reload: dirty1");
643 VOP_UNLOCK(devvp, 0);
644
645 /*
646 * Step 2: re-read superblock from disk.
647 */
648 fs = VFSTOUFS(mp)->um_fs;
649 if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
650 NOCRED, &bp)) != 0)
651 return (error);
652 newfs = (struct fs *)bp->b_data;
653 if ((newfs->fs_magic != FS_UFS1_MAGIC &&
654 newfs->fs_magic != FS_UFS2_MAGIC) ||
655 newfs->fs_bsize > MAXBSIZE ||
656 newfs->fs_bsize < sizeof(struct fs)) {
657 brelse(bp);
658 return (EIO); /* XXX needs translation */
659 }
660 /*
661 * Copy pointer fields back into superblock before copying in XXX
662 * new superblock. These should really be in the ufsmount. XXX
663 * Note that important parameters (eg fs_ncg) are unchanged.
664 */
665 newfs->fs_csp = fs->fs_csp;
666 newfs->fs_maxcluster = fs->fs_maxcluster;
667 newfs->fs_contigdirs = fs->fs_contigdirs;
668 newfs->fs_active = fs->fs_active;
669 newfs->fs_ronly = fs->fs_ronly;
670 sblockloc = fs->fs_sblockloc;
671 bcopy(newfs, fs, (u_int)fs->fs_sbsize);
672 brelse(bp);
673 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
674 ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
675 UFS_LOCK(ump);
676 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
677 printf("WARNING: %s: reload pending error: blocks %jd "
678 "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
679 fs->fs_pendinginodes);
680 fs->fs_pendingblocks = 0;
681 fs->fs_pendinginodes = 0;
682 }
683 UFS_UNLOCK(ump);
684
685 /*
686 * Step 3: re-read summary information from disk.
687 */
688 size = fs->fs_cssize;
689 blks = howmany(size, fs->fs_fsize);
690 if (fs->fs_contigsumsize > 0)
691 size += fs->fs_ncg * sizeof(int32_t);
692 size += fs->fs_ncg * sizeof(u_int8_t);
693 free(fs->fs_csp, M_UFSMNT);
694 space = malloc(size, M_UFSMNT, M_WAITOK);
695 fs->fs_csp = space;
696 for (i = 0; i < blks; i += fs->fs_frag) {
697 size = fs->fs_bsize;
698 if (i + fs->fs_frag > blks)
699 size = (blks - i) * fs->fs_fsize;
700 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
701 NOCRED, &bp);
702 if (error)
703 return (error);
704 bcopy(bp->b_data, space, (u_int)size);
705 space = (char *)space + size;
706 brelse(bp);
707 }
708 /*
709 * We no longer know anything about clusters per cylinder group.
710 */
711 if (fs->fs_contigsumsize > 0) {
712 fs->fs_maxcluster = lp = space;
713 for (i = 0; i < fs->fs_ncg; i++)
714 *lp++ = fs->fs_contigsumsize;
715 space = lp;
716 }
717 size = fs->fs_ncg * sizeof(u_int8_t);
718 fs->fs_contigdirs = (u_int8_t *)space;
719 bzero(fs->fs_contigdirs, size);
720 if ((flags & FFSR_UNSUSPEND) != 0) {
721 MNT_ILOCK(mp);
722 mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | MNTK_SUSPEND2);
723 wakeup(&mp->mnt_flag);
724 MNT_IUNLOCK(mp);
725 }
726
727 loop:
728 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
729 /*
730 * Skip syncer vnode.
731 */
732 if (vp->v_type == VNON) {
733 VI_UNLOCK(vp);
734 continue;
735 }
736 /*
737 * Step 4: invalidate all cached file data.
738 */
739 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
740 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
741 goto loop;
742 }
743 if (vinvalbuf(vp, 0, 0, 0))
744 panic("ffs_reload: dirty2");
745 /*
746 * Step 5: re-read inode data for all active vnodes.
747 */
748 ip = VTOI(vp);
749 error =
750 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
751 (int)fs->fs_bsize, NOCRED, &bp);
752 if (error) {
753 VOP_UNLOCK(vp, 0);
754 vrele(vp);
755 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
756 return (error);
757 }
758 ffs_load_inode(bp, ip, fs, ip->i_number);
759 ip->i_effnlink = ip->i_nlink;
760 brelse(bp);
761 VOP_UNLOCK(vp, 0);
762 vrele(vp);
763 }
764 return (0);
765 }
766
767 /*
768 * Possible superblock locations ordered from most to least likely.
769 */
770 static int sblock_try[] = SBLOCKSEARCH;
771
772 /*
773 * Common code for mount and mountroot
774 */
775 static int
776 ffs_mountfs(devvp, mp, td)
777 struct vnode *devvp;
778 struct mount *mp;
779 struct thread *td;
780 {
781 struct ufsmount *ump;
782 struct buf *bp;
783 struct fs *fs;
784 struct cdev *dev;
785 void *space;
786 ufs2_daddr_t sblockloc;
787 int error, i, blks, len, ronly;
788 u_long size;
789 int32_t *lp;
790 struct ucred *cred;
791 struct g_consumer *cp;
792 struct mount *nmp;
793
794 bp = NULL;
795 ump = NULL;
796 cred = td ? td->td_ucred : NOCRED;
797 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
798
799 KASSERT(devvp->v_type == VCHR, ("reclaimed devvp"));
800 dev = devvp->v_rdev;
801 if (atomic_cmpset_acq_ptr((uintptr_t *)&dev->si_mountpt, 0,
802 (uintptr_t)mp) == 0) {
803 VOP_UNLOCK(devvp, 0);
804 return (EBUSY);
805 }
806 g_topology_lock();
807 error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
808 g_topology_unlock();
809 if (error != 0) {
810 atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
811 VOP_UNLOCK(devvp, 0);
812 return (error);
813 }
814 dev_ref(dev);
815 devvp->v_bufobj.bo_ops = &ffs_ops;
816 VOP_UNLOCK(devvp, 0);
817 if (dev->si_iosize_max != 0)
818 mp->mnt_iosize_max = dev->si_iosize_max;
819 if (mp->mnt_iosize_max > MAXPHYS)
820 mp->mnt_iosize_max = MAXPHYS;
821
822 fs = NULL;
823 sblockloc = 0;
824 /*
825 * Try reading the superblock in each of its possible locations.
826 */
827 for (i = 0; sblock_try[i] != -1; i++) {
828 if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
829 error = EINVAL;
830 vfs_mount_error(mp,
831 "Invalid sectorsize %d for superblock size %d",
832 cp->provider->sectorsize, SBLOCKSIZE);
833 goto out;
834 }
835 if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
836 cred, &bp)) != 0)
837 goto out;
838 fs = (struct fs *)bp->b_data;
839 sblockloc = sblock_try[i];
840 if ((fs->fs_magic == FS_UFS1_MAGIC ||
841 (fs->fs_magic == FS_UFS2_MAGIC &&
842 (fs->fs_sblockloc == sblockloc ||
843 (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
844 fs->fs_bsize <= MAXBSIZE &&
845 fs->fs_bsize >= sizeof(struct fs))
846 break;
847 brelse(bp);
848 bp = NULL;
849 }
850 if (sblock_try[i] == -1) {
851 error = EINVAL; /* XXX needs translation */
852 goto out;
853 }
854 fs->fs_fmod = 0;
855 fs->fs_flags &= ~FS_INDEXDIRS; /* no support for directory indices */
856 fs->fs_flags &= ~FS_UNCLEAN;
857 if (fs->fs_clean == 0) {
858 fs->fs_flags |= FS_UNCLEAN;
859 if (ronly || (mp->mnt_flag & MNT_FORCE) ||
860 ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
861 (fs->fs_flags & FS_DOSOFTDEP))) {
862 printf("WARNING: %s was not properly dismounted\n",
863 fs->fs_fsmnt);
864 } else {
865 vfs_mount_error(mp, "R/W mount of %s denied. %s%s",
866 fs->fs_fsmnt, "Filesystem is not clean - run fsck.",
867 (fs->fs_flags & FS_SUJ) == 0 ? "" :
868 " Forced mount will invalidate journal contents");
869 error = EPERM;
870 goto out;
871 }
872 if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
873 (mp->mnt_flag & MNT_FORCE)) {
874 printf("WARNING: %s: lost blocks %jd files %d\n",
875 fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
876 fs->fs_pendinginodes);
877 fs->fs_pendingblocks = 0;
878 fs->fs_pendinginodes = 0;
879 }
880 }
881 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
882 printf("WARNING: %s: mount pending error: blocks %jd "
883 "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
884 fs->fs_pendinginodes);
885 fs->fs_pendingblocks = 0;
886 fs->fs_pendinginodes = 0;
887 }
888 if ((fs->fs_flags & FS_GJOURNAL) != 0) {
889 #ifdef UFS_GJOURNAL
890 /*
891 * Get journal provider name.
892 */
893 len = 1024;
894 mp->mnt_gjprovider = malloc((u_long)len, M_UFSMNT, M_WAITOK);
895 if (g_io_getattr("GJOURNAL::provider", cp, &len,
896 mp->mnt_gjprovider) == 0) {
897 mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, len,
898 M_UFSMNT, M_WAITOK);
899 MNT_ILOCK(mp);
900 mp->mnt_flag |= MNT_GJOURNAL;
901 MNT_IUNLOCK(mp);
902 } else {
903 printf("WARNING: %s: GJOURNAL flag on fs "
904 "but no gjournal provider below\n",
905 mp->mnt_stat.f_mntonname);
906 free(mp->mnt_gjprovider, M_UFSMNT);
907 mp->mnt_gjprovider = NULL;
908 }
909 #else
910 printf("WARNING: %s: GJOURNAL flag on fs but no "
911 "UFS_GJOURNAL support\n", mp->mnt_stat.f_mntonname);
912 #endif
913 } else {
914 mp->mnt_gjprovider = NULL;
915 }
916 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
917 ump->um_cp = cp;
918 ump->um_bo = &devvp->v_bufobj;
919 ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
920 if (fs->fs_magic == FS_UFS1_MAGIC) {
921 ump->um_fstype = UFS1;
922 ump->um_balloc = ffs_balloc_ufs1;
923 } else {
924 ump->um_fstype = UFS2;
925 ump->um_balloc = ffs_balloc_ufs2;
926 }
927 ump->um_blkatoff = ffs_blkatoff;
928 ump->um_truncate = ffs_truncate;
929 ump->um_update = ffs_update;
930 ump->um_valloc = ffs_valloc;
931 ump->um_vfree = ffs_vfree;
932 ump->um_ifree = ffs_ifree;
933 ump->um_rdonly = ffs_rdonly;
934 ump->um_snapgone = ffs_snapgone;
935 mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
936 bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
937 if (fs->fs_sbsize < SBLOCKSIZE)
938 bp->b_flags |= B_INVAL | B_NOCACHE;
939 brelse(bp);
940 bp = NULL;
941 fs = ump->um_fs;
942 ffs_oldfscompat_read(fs, ump, sblockloc);
943 fs->fs_ronly = ronly;
944 size = fs->fs_cssize;
945 blks = howmany(size, fs->fs_fsize);
946 if (fs->fs_contigsumsize > 0)
947 size += fs->fs_ncg * sizeof(int32_t);
948 size += fs->fs_ncg * sizeof(u_int8_t);
949 space = malloc(size, M_UFSMNT, M_WAITOK);
950 fs->fs_csp = space;
951 for (i = 0; i < blks; i += fs->fs_frag) {
952 size = fs->fs_bsize;
953 if (i + fs->fs_frag > blks)
954 size = (blks - i) * fs->fs_fsize;
955 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
956 cred, &bp)) != 0) {
957 free(fs->fs_csp, M_UFSMNT);
958 goto out;
959 }
960 bcopy(bp->b_data, space, (u_int)size);
961 space = (char *)space + size;
962 brelse(bp);
963 bp = NULL;
964 }
965 if (fs->fs_contigsumsize > 0) {
966 fs->fs_maxcluster = lp = space;
967 for (i = 0; i < fs->fs_ncg; i++)
968 *lp++ = fs->fs_contigsumsize;
969 space = lp;
970 }
971 size = fs->fs_ncg * sizeof(u_int8_t);
972 fs->fs_contigdirs = (u_int8_t *)space;
973 bzero(fs->fs_contigdirs, size);
974 fs->fs_active = NULL;
975 mp->mnt_data = ump;
976 mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
977 mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
978 nmp = NULL;
979 if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
980 (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
981 if (nmp)
982 vfs_rel(nmp);
983 vfs_getnewfsid(mp);
984 }
985 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
986 MNT_ILOCK(mp);
987 mp->mnt_flag |= MNT_LOCAL;
988 MNT_IUNLOCK(mp);
989 if ((fs->fs_flags & FS_MULTILABEL) != 0) {
990 #ifdef MAC
991 MNT_ILOCK(mp);
992 mp->mnt_flag |= MNT_MULTILABEL;
993 MNT_IUNLOCK(mp);
994 #else
995 printf("WARNING: %s: multilabel flag on fs but "
996 "no MAC support\n", mp->mnt_stat.f_mntonname);
997 #endif
998 }
999 if ((fs->fs_flags & FS_ACLS) != 0) {
1000 #ifdef UFS_ACL
1001 MNT_ILOCK(mp);
1002
1003 if (mp->mnt_flag & MNT_NFS4ACLS)
1004 printf("WARNING: %s: ACLs flag on fs conflicts with "
1005 "\"nfsv4acls\" mount option; option ignored\n",
1006 mp->mnt_stat.f_mntonname);
1007 mp->mnt_flag &= ~MNT_NFS4ACLS;
1008 mp->mnt_flag |= MNT_ACLS;
1009
1010 MNT_IUNLOCK(mp);
1011 #else
1012 printf("WARNING: %s: ACLs flag on fs but no ACLs support\n",
1013 mp->mnt_stat.f_mntonname);
1014 #endif
1015 }
1016 if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
1017 #ifdef UFS_ACL
1018 MNT_ILOCK(mp);
1019
1020 if (mp->mnt_flag & MNT_ACLS)
1021 printf("WARNING: %s: NFSv4 ACLs flag on fs conflicts "
1022 "with \"acls\" mount option; option ignored\n",
1023 mp->mnt_stat.f_mntonname);
1024 mp->mnt_flag &= ~MNT_ACLS;
1025 mp->mnt_flag |= MNT_NFS4ACLS;
1026
1027 MNT_IUNLOCK(mp);
1028 #else
1029 printf("WARNING: %s: NFSv4 ACLs flag on fs but no "
1030 "ACLs support\n", mp->mnt_stat.f_mntonname);
1031 #endif
1032 }
1033 if ((fs->fs_flags & FS_TRIM) != 0) {
1034 len = sizeof(int);
1035 if (g_io_getattr("GEOM::candelete", cp, &len,
1036 &ump->um_candelete) == 0) {
1037 if (!ump->um_candelete)
1038 printf("WARNING: %s: TRIM flag on fs but disk "
1039 "does not support TRIM\n",
1040 mp->mnt_stat.f_mntonname);
1041 } else {
1042 printf("WARNING: %s: TRIM flag on fs but disk does "
1043 "not confirm that it supports TRIM\n",
1044 mp->mnt_stat.f_mntonname);
1045 ump->um_candelete = 0;
1046 }
1047 if (ump->um_candelete) {
1048 ump->um_trim_tq = taskqueue_create("trim", M_WAITOK,
1049 taskqueue_thread_enqueue, &ump->um_trim_tq);
1050 taskqueue_start_threads(&ump->um_trim_tq, 1, PVFS,
1051 "%s trim", mp->mnt_stat.f_mntonname);
1052 }
1053 }
1054
1055 ump->um_mountp = mp;
1056 ump->um_dev = dev;
1057 ump->um_devvp = devvp;
1058 ump->um_nindir = fs->fs_nindir;
1059 ump->um_bptrtodb = fs->fs_fsbtodb;
1060 ump->um_seqinc = fs->fs_frag;
1061 for (i = 0; i < MAXQUOTAS; i++)
1062 ump->um_quotas[i] = NULLVP;
1063 #ifdef UFS_EXTATTR
1064 ufs_extattr_uepm_init(&ump->um_extattr);
1065 #endif
1066 /*
1067 * Set FS local "last mounted on" information (NULL pad)
1068 */
1069 bzero(fs->fs_fsmnt, MAXMNTLEN);
1070 strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
1071 mp->mnt_stat.f_iosize = fs->fs_bsize;
1072
1073 if (mp->mnt_flag & MNT_ROOTFS) {
1074 /*
1075 * Root mount; update timestamp in mount structure.
1076 * this will be used by the common root mount code
1077 * to update the system clock.
1078 */
1079 mp->mnt_time = fs->fs_time;
1080 }
1081
1082 if (ronly == 0) {
1083 fs->fs_mtime = time_second;
1084 if ((fs->fs_flags & FS_DOSOFTDEP) &&
1085 (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
1086 free(fs->fs_csp, M_UFSMNT);
1087 ffs_flushfiles(mp, FORCECLOSE, td);
1088 goto out;
1089 }
1090 if (fs->fs_snapinum[0] != 0)
1091 ffs_snapshot_mount(mp);
1092 fs->fs_fmod = 1;
1093 fs->fs_clean = 0;
1094 (void) ffs_sbupdate(ump, MNT_WAIT, 0);
1095 }
1096 /*
1097 * Initialize filesystem state information in mount struct.
1098 */
1099 MNT_ILOCK(mp);
1100 mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED |
1101 MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS | MNTK_USES_BCACHE;
1102 MNT_IUNLOCK(mp);
1103 #ifdef UFS_EXTATTR
1104 #ifdef UFS_EXTATTR_AUTOSTART
1105 /*
1106 *
1107 * Auto-starting does the following:
1108 * - check for /.attribute in the fs, and extattr_start if so
1109 * - for each file in .attribute, enable that file with
1110 * an attribute of the same name.
1111 * Not clear how to report errors -- probably eat them.
1112 * This would all happen while the filesystem was busy/not
1113 * available, so would effectively be "atomic".
1114 */
1115 (void) ufs_extattr_autostart(mp, td);
1116 #endif /* !UFS_EXTATTR_AUTOSTART */
1117 #endif /* !UFS_EXTATTR */
1118 return (0);
1119 out:
1120 if (bp)
1121 brelse(bp);
1122 if (cp != NULL) {
1123 g_topology_lock();
1124 g_vfs_close(cp);
1125 g_topology_unlock();
1126 }
1127 if (ump) {
1128 mtx_destroy(UFS_MTX(ump));
1129 if (mp->mnt_gjprovider != NULL) {
1130 free(mp->mnt_gjprovider, M_UFSMNT);
1131 mp->mnt_gjprovider = NULL;
1132 }
1133 free(ump->um_fs, M_UFSMNT);
1134 free(ump, M_UFSMNT);
1135 mp->mnt_data = NULL;
1136 }
1137 atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
1138 dev_rel(dev);
1139 return (error);
1140 }
1141
1142 #include <sys/sysctl.h>
1143 static int bigcgs = 0;
1144 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
1145
1146 /*
1147 * Sanity checks for loading old filesystem superblocks.
1148 * See ffs_oldfscompat_write below for unwound actions.
1149 *
1150 * XXX - Parts get retired eventually.
1151 * Unfortunately new bits get added.
1152 */
1153 static void
1154 ffs_oldfscompat_read(fs, ump, sblockloc)
1155 struct fs *fs;
1156 struct ufsmount *ump;
1157 ufs2_daddr_t sblockloc;
1158 {
1159 off_t maxfilesize;
1160
1161 /*
1162 * If not yet done, update fs_flags location and value of fs_sblockloc.
1163 */
1164 if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
1165 fs->fs_flags = fs->fs_old_flags;
1166 fs->fs_old_flags |= FS_FLAGS_UPDATED;
1167 fs->fs_sblockloc = sblockloc;
1168 }
1169 /*
1170 * If not yet done, update UFS1 superblock with new wider fields.
1171 */
1172 if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
1173 fs->fs_maxbsize = fs->fs_bsize;
1174 fs->fs_time = fs->fs_old_time;
1175 fs->fs_size = fs->fs_old_size;
1176 fs->fs_dsize = fs->fs_old_dsize;
1177 fs->fs_csaddr = fs->fs_old_csaddr;
1178 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1179 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1180 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1181 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1182 }
1183 if (fs->fs_magic == FS_UFS1_MAGIC &&
1184 fs->fs_old_inodefmt < FS_44INODEFMT) {
1185 fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
1186 fs->fs_qbmask = ~fs->fs_bmask;
1187 fs->fs_qfmask = ~fs->fs_fmask;
1188 }
1189 if (fs->fs_magic == FS_UFS1_MAGIC) {
1190 ump->um_savedmaxfilesize = fs->fs_maxfilesize;
1191 maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
1192 if (fs->fs_maxfilesize > maxfilesize)
1193 fs->fs_maxfilesize = maxfilesize;
1194 }
1195 /* Compatibility for old filesystems */
1196 if (fs->fs_avgfilesize <= 0)
1197 fs->fs_avgfilesize = AVFILESIZ;
1198 if (fs->fs_avgfpdir <= 0)
1199 fs->fs_avgfpdir = AFPDIR;
1200 if (bigcgs) {
1201 fs->fs_save_cgsize = fs->fs_cgsize;
1202 fs->fs_cgsize = fs->fs_bsize;
1203 }
1204 }
1205
1206 /*
1207 * Unwinding superblock updates for old filesystems.
1208 * See ffs_oldfscompat_read above for details.
1209 *
1210 * XXX - Parts get retired eventually.
1211 * Unfortunately new bits get added.
1212 */
1213 void
1214 ffs_oldfscompat_write(fs, ump)
1215 struct fs *fs;
1216 struct ufsmount *ump;
1217 {
1218
1219 /*
1220 * Copy back UFS2 updated fields that UFS1 inspects.
1221 */
1222 if (fs->fs_magic == FS_UFS1_MAGIC) {
1223 fs->fs_old_time = fs->fs_time;
1224 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1225 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1226 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1227 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1228 fs->fs_maxfilesize = ump->um_savedmaxfilesize;
1229 }
1230 if (bigcgs) {
1231 fs->fs_cgsize = fs->fs_save_cgsize;
1232 fs->fs_save_cgsize = 0;
1233 }
1234 }
1235
1236 /*
1237 * unmount system call
1238 */
1239 static int
1240 ffs_unmount(mp, mntflags)
1241 struct mount *mp;
1242 int mntflags;
1243 {
1244 struct thread *td;
1245 struct ufsmount *ump = VFSTOUFS(mp);
1246 struct fs *fs;
1247 int error, flags, susp;
1248 #ifdef UFS_EXTATTR
1249 int e_restart;
1250 #endif
1251
1252 flags = 0;
1253 td = curthread;
1254 fs = ump->um_fs;
1255 if (mntflags & MNT_FORCE)
1256 flags |= FORCECLOSE;
1257 susp = fs->fs_ronly == 0;
1258 #ifdef UFS_EXTATTR
1259 if ((error = ufs_extattr_stop(mp, td))) {
1260 if (error != EOPNOTSUPP)
1261 printf("WARNING: unmount %s: ufs_extattr_stop "
1262 "returned errno %d\n", mp->mnt_stat.f_mntonname,
1263 error);
1264 e_restart = 0;
1265 } else {
1266 ufs_extattr_uepm_destroy(&ump->um_extattr);
1267 e_restart = 1;
1268 }
1269 #endif
1270 if (susp) {
1271 error = vfs_write_suspend_umnt(mp);
1272 if (error != 0)
1273 goto fail1;
1274 }
1275 if (MOUNTEDSOFTDEP(mp))
1276 error = softdep_flushfiles(mp, flags, td);
1277 else
1278 error = ffs_flushfiles(mp, flags, td);
1279 if (error != 0 && error != ENXIO)
1280 goto fail;
1281
1282 UFS_LOCK(ump);
1283 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1284 printf("WARNING: unmount %s: pending error: blocks %jd "
1285 "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
1286 fs->fs_pendinginodes);
1287 fs->fs_pendingblocks = 0;
1288 fs->fs_pendinginodes = 0;
1289 }
1290 UFS_UNLOCK(ump);
1291 if (MOUNTEDSOFTDEP(mp))
1292 softdep_unmount(mp);
1293 if (fs->fs_ronly == 0 || ump->um_fsckpid > 0) {
1294 fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
1295 error = ffs_sbupdate(ump, MNT_WAIT, 0);
1296 if (error && error != ENXIO) {
1297 fs->fs_clean = 0;
1298 goto fail;
1299 }
1300 }
1301 if (susp)
1302 vfs_write_resume(mp, VR_START_WRITE);
1303 if (ump->um_trim_tq != NULL) {
1304 while (ump->um_trim_inflight != 0)
1305 pause("ufsutr", hz);
1306 taskqueue_drain_all(ump->um_trim_tq);
1307 taskqueue_free(ump->um_trim_tq);
1308 }
1309 g_topology_lock();
1310 if (ump->um_fsckpid > 0) {
1311 /*
1312 * Return to normal read-only mode.
1313 */
1314 error = g_access(ump->um_cp, 0, -1, 0);
1315 ump->um_fsckpid = 0;
1316 }
1317 g_vfs_close(ump->um_cp);
1318 g_topology_unlock();
1319 atomic_store_rel_ptr((uintptr_t *)&ump->um_dev->si_mountpt, 0);
1320 vrele(ump->um_devvp);
1321 dev_rel(ump->um_dev);
1322 mtx_destroy(UFS_MTX(ump));
1323 if (mp->mnt_gjprovider != NULL) {
1324 free(mp->mnt_gjprovider, M_UFSMNT);
1325 mp->mnt_gjprovider = NULL;
1326 }
1327 free(fs->fs_csp, M_UFSMNT);
1328 free(fs, M_UFSMNT);
1329 free(ump, M_UFSMNT);
1330 mp->mnt_data = NULL;
1331 MNT_ILOCK(mp);
1332 mp->mnt_flag &= ~MNT_LOCAL;
1333 MNT_IUNLOCK(mp);
1334 if (td->td_su == mp) {
1335 td->td_su = NULL;
1336 vfs_rel(mp);
1337 }
1338 return (error);
1339
1340 fail:
1341 if (susp)
1342 vfs_write_resume(mp, VR_START_WRITE);
1343 fail1:
1344 #ifdef UFS_EXTATTR
1345 if (e_restart) {
1346 ufs_extattr_uepm_init(&ump->um_extattr);
1347 #ifdef UFS_EXTATTR_AUTOSTART
1348 (void) ufs_extattr_autostart(mp, td);
1349 #endif
1350 }
1351 #endif
1352
1353 return (error);
1354 }
1355
1356 /*
1357 * Flush out all the files in a filesystem.
1358 */
1359 int
1360 ffs_flushfiles(mp, flags, td)
1361 struct mount *mp;
1362 int flags;
1363 struct thread *td;
1364 {
1365 struct ufsmount *ump;
1366 int qerror, error;
1367
1368 ump = VFSTOUFS(mp);
1369 qerror = 0;
1370 #ifdef QUOTA
1371 if (mp->mnt_flag & MNT_QUOTA) {
1372 int i;
1373 error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1374 if (error)
1375 return (error);
1376 for (i = 0; i < MAXQUOTAS; i++) {
1377 error = quotaoff(td, mp, i);
1378 if (error != 0) {
1379 if ((flags & EARLYFLUSH) == 0)
1380 return (error);
1381 else
1382 qerror = error;
1383 }
1384 }
1385
1386 /*
1387 * Here we fall through to vflush again to ensure that
1388 * we have gotten rid of all the system vnodes, unless
1389 * quotas must not be closed.
1390 */
1391 }
1392 #endif
1393 ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1394 if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1395 if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1396 return (error);
1397 ffs_snapshot_unmount(mp);
1398 flags |= FORCECLOSE;
1399 /*
1400 * Here we fall through to vflush again to ensure
1401 * that we have gotten rid of all the system vnodes.
1402 */
1403 }
1404
1405 /*
1406 * Do not close system files if quotas were not closed, to be
1407 * able to sync the remaining dquots. The freeblks softupdate
1408 * workitems might hold a reference on a dquot, preventing
1409 * quotaoff() from completing. Next round of
1410 * softdep_flushworklist() iteration should process the
1411 * blockers, allowing the next run of quotaoff() to finally
1412 * flush held dquots.
1413 *
1414 * Otherwise, flush all the files.
1415 */
1416 if (qerror == 0 && (error = vflush(mp, 0, flags, td)) != 0)
1417 return (error);
1418
1419 /*
1420 * Flush filesystem metadata.
1421 */
1422 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1423 error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1424 VOP_UNLOCK(ump->um_devvp, 0);
1425 return (error);
1426 }
1427
1428 /*
1429 * Get filesystem statistics.
1430 */
1431 static int
1432 ffs_statfs(mp, sbp)
1433 struct mount *mp;
1434 struct statfs *sbp;
1435 {
1436 struct ufsmount *ump;
1437 struct fs *fs;
1438
1439 ump = VFSTOUFS(mp);
1440 fs = ump->um_fs;
1441 if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1442 panic("ffs_statfs");
1443 sbp->f_version = STATFS_VERSION;
1444 sbp->f_bsize = fs->fs_fsize;
1445 sbp->f_iosize = fs->fs_bsize;
1446 sbp->f_blocks = fs->fs_dsize;
1447 UFS_LOCK(ump);
1448 sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1449 fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1450 sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1451 dbtofsb(fs, fs->fs_pendingblocks);
1452 sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO;
1453 sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1454 UFS_UNLOCK(ump);
1455 sbp->f_namemax = NAME_MAX;
1456 return (0);
1457 }
1458
1459 static bool
1460 sync_doupdate(struct inode *ip)
1461 {
1462
1463 return ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED |
1464 IN_UPDATE)) != 0);
1465 }
1466
1467 /*
1468 * For a lazy sync, we only care about access times, quotas and the
1469 * superblock. Other filesystem changes are already converted to
1470 * cylinder group blocks or inode blocks updates and are written to
1471 * disk by syncer.
1472 */
1473 static int
1474 ffs_sync_lazy(mp)
1475 struct mount *mp;
1476 {
1477 struct vnode *mvp, *vp;
1478 struct inode *ip;
1479 struct thread *td;
1480 int allerror, error;
1481
1482 allerror = 0;
1483 td = curthread;
1484 if ((mp->mnt_flag & MNT_NOATIME) != 0) {
1485 #ifdef QUOTA
1486 qsync(mp);
1487 #endif
1488 goto sbupdate;
1489 }
1490 MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) {
1491 if (vp->v_type == VNON) {
1492 VI_UNLOCK(vp);
1493 continue;
1494 }
1495 ip = VTOI(vp);
1496
1497 /*
1498 * The IN_ACCESS flag is converted to IN_MODIFIED by
1499 * ufs_close() and ufs_getattr() by the calls to
1500 * ufs_itimes_locked(), without subsequent UFS_UPDATE().
1501 * Test also all the other timestamp flags too, to pick up
1502 * any other cases that could be missed.
1503 */
1504 if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0) {
1505 VI_UNLOCK(vp);
1506 continue;
1507 }
1508 if ((error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK,
1509 td)) != 0)
1510 continue;
1511 #ifdef QUOTA
1512 qsyncvp(vp);
1513 #endif
1514 if (sync_doupdate(ip))
1515 error = ffs_update(vp, 0);
1516 if (error != 0)
1517 allerror = error;
1518 vput(vp);
1519 }
1520 sbupdate:
1521 if (VFSTOUFS(mp)->um_fs->fs_fmod != 0 &&
1522 (error = ffs_sbupdate(VFSTOUFS(mp), MNT_LAZY, 0)) != 0)
1523 allerror = error;
1524 return (allerror);
1525 }
1526
1527 /*
1528 * Go through the disk queues to initiate sandbagged IO;
1529 * go through the inodes to write those that have been modified;
1530 * initiate the writing of the super block if it has been modified.
1531 *
1532 * Note: we are always called with the filesystem marked busy using
1533 * vfs_busy().
1534 */
1535 static int
1536 ffs_sync(mp, waitfor)
1537 struct mount *mp;
1538 int waitfor;
1539 {
1540 struct vnode *mvp, *vp, *devvp;
1541 struct thread *td;
1542 struct inode *ip;
1543 struct ufsmount *ump = VFSTOUFS(mp);
1544 struct fs *fs;
1545 int error, count, lockreq, allerror = 0;
1546 int suspend;
1547 int suspended;
1548 int secondary_writes;
1549 int secondary_accwrites;
1550 int softdep_deps;
1551 int softdep_accdeps;
1552 struct bufobj *bo;
1553
1554 suspend = 0;
1555 suspended = 0;
1556 td = curthread;
1557 fs = ump->um_fs;
1558 if (fs->fs_fmod != 0 && fs->fs_ronly != 0 && ump->um_fsckpid == 0)
1559 panic("%s: ffs_sync: modification on read-only filesystem",
1560 fs->fs_fsmnt);
1561 if (waitfor == MNT_LAZY) {
1562 if (!rebooting)
1563 return (ffs_sync_lazy(mp));
1564 waitfor = MNT_NOWAIT;
1565 }
1566
1567 /*
1568 * Write back each (modified) inode.
1569 */
1570 lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1571 if (waitfor == MNT_SUSPEND) {
1572 suspend = 1;
1573 waitfor = MNT_WAIT;
1574 }
1575 if (waitfor == MNT_WAIT)
1576 lockreq = LK_EXCLUSIVE;
1577 lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
1578 loop:
1579 /* Grab snapshot of secondary write counts */
1580 MNT_ILOCK(mp);
1581 secondary_writes = mp->mnt_secondary_writes;
1582 secondary_accwrites = mp->mnt_secondary_accwrites;
1583 MNT_IUNLOCK(mp);
1584
1585 /* Grab snapshot of softdep dependency counts */
1586 softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
1587
1588 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1589 /*
1590 * Depend on the vnode interlock to keep things stable enough
1591 * for a quick test. Since there might be hundreds of
1592 * thousands of vnodes, we cannot afford even a subroutine
1593 * call unless there's a good chance that we have work to do.
1594 */
1595 if (vp->v_type == VNON) {
1596 VI_UNLOCK(vp);
1597 continue;
1598 }
1599 ip = VTOI(vp);
1600 if ((ip->i_flag &
1601 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1602 vp->v_bufobj.bo_dirty.bv_cnt == 0) {
1603 VI_UNLOCK(vp);
1604 continue;
1605 }
1606 if ((error = vget(vp, lockreq, td)) != 0) {
1607 if (error == ENOENT || error == ENOLCK) {
1608 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1609 goto loop;
1610 }
1611 continue;
1612 }
1613 #ifdef QUOTA
1614 qsyncvp(vp);
1615 #endif
1616 if ((error = ffs_syncvnode(vp, waitfor, 0)) != 0)
1617 allerror = error;
1618 vput(vp);
1619 }
1620 /*
1621 * Force stale filesystem control information to be flushed.
1622 */
1623 if (waitfor == MNT_WAIT || rebooting) {
1624 if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1625 allerror = error;
1626 /* Flushed work items may create new vnodes to clean */
1627 if (allerror == 0 && count)
1628 goto loop;
1629 }
1630
1631 devvp = ump->um_devvp;
1632 bo = &devvp->v_bufobj;
1633 BO_LOCK(bo);
1634 if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) {
1635 BO_UNLOCK(bo);
1636 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1637 error = VOP_FSYNC(devvp, waitfor, td);
1638 VOP_UNLOCK(devvp, 0);
1639 if (MOUNTEDSOFTDEP(mp) && (error == 0 || error == EAGAIN))
1640 error = ffs_sbupdate(ump, waitfor, 0);
1641 if (error != 0)
1642 allerror = error;
1643 if (allerror == 0 && waitfor == MNT_WAIT)
1644 goto loop;
1645 } else if (suspend != 0) {
1646 if (softdep_check_suspend(mp,
1647 devvp,
1648 softdep_deps,
1649 softdep_accdeps,
1650 secondary_writes,
1651 secondary_accwrites) != 0) {
1652 MNT_IUNLOCK(mp);
1653 goto loop; /* More work needed */
1654 }
1655 mtx_assert(MNT_MTX(mp), MA_OWNED);
1656 mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
1657 MNT_IUNLOCK(mp);
1658 suspended = 1;
1659 } else
1660 BO_UNLOCK(bo);
1661 /*
1662 * Write back modified superblock.
1663 */
1664 if (fs->fs_fmod != 0 &&
1665 (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
1666 allerror = error;
1667 return (allerror);
1668 }
1669
1670 int
1671 ffs_vget(mp, ino, flags, vpp)
1672 struct mount *mp;
1673 ino_t ino;
1674 int flags;
1675 struct vnode **vpp;
1676 {
1677 return (ffs_vgetf(mp, ino, flags, vpp, 0));
1678 }
1679
1680 int
1681 ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
1682 struct mount *mp;
1683 ino_t ino;
1684 int flags;
1685 struct vnode **vpp;
1686 int ffs_flags;
1687 {
1688 struct fs *fs;
1689 struct inode *ip;
1690 struct ufsmount *ump;
1691 struct buf *bp;
1692 struct vnode *vp;
1693 int error;
1694
1695 error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
1696 if (error || *vpp != NULL)
1697 return (error);
1698
1699 /*
1700 * We must promote to an exclusive lock for vnode creation. This
1701 * can happen if lookup is passed LOCKSHARED.
1702 */
1703 if ((flags & LK_TYPE_MASK) == LK_SHARED) {
1704 flags &= ~LK_TYPE_MASK;
1705 flags |= LK_EXCLUSIVE;
1706 }
1707
1708 /*
1709 * We do not lock vnode creation as it is believed to be too
1710 * expensive for such rare case as simultaneous creation of vnode
1711 * for same ino by different processes. We just allow them to race
1712 * and check later to decide who wins. Let the race begin!
1713 */
1714
1715 ump = VFSTOUFS(mp);
1716 fs = ump->um_fs;
1717 ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
1718
1719 /* Allocate a new vnode/inode. */
1720 error = getnewvnode("ufs", mp, fs->fs_magic == FS_UFS1_MAGIC ?
1721 &ffs_vnodeops1 : &ffs_vnodeops2, &vp);
1722 if (error) {
1723 *vpp = NULL;
1724 uma_zfree(uma_inode, ip);
1725 return (error);
1726 }
1727 /*
1728 * FFS supports recursive locking.
1729 */
1730 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
1731 VN_LOCK_AREC(vp);
1732 vp->v_data = ip;
1733 vp->v_bufobj.bo_bsize = fs->fs_bsize;
1734 ip->i_vnode = vp;
1735 ip->i_ump = ump;
1736 ip->i_number = ino;
1737 ip->i_ea_refs = 0;
1738 ip->i_nextclustercg = -1;
1739 ip->i_flag = fs->fs_magic == FS_UFS1_MAGIC ? 0 : IN_UFS2;
1740 #ifdef QUOTA
1741 {
1742 int i;
1743 for (i = 0; i < MAXQUOTAS; i++)
1744 ip->i_dquot[i] = NODQUOT;
1745 }
1746 #endif
1747
1748 if (ffs_flags & FFSV_FORCEINSMQ)
1749 vp->v_vflag |= VV_FORCEINSMQ;
1750 error = insmntque(vp, mp);
1751 if (error != 0) {
1752 uma_zfree(uma_inode, ip);
1753 *vpp = NULL;
1754 return (error);
1755 }
1756 vp->v_vflag &= ~VV_FORCEINSMQ;
1757 error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
1758 if (error || *vpp != NULL)
1759 return (error);
1760
1761 /* Read in the disk contents for the inode, copy into the inode. */
1762 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1763 (int)fs->fs_bsize, NOCRED, &bp);
1764 if (error) {
1765 /*
1766 * The inode does not contain anything useful, so it would
1767 * be misleading to leave it on its hash chain. With mode
1768 * still zero, it will be unlinked and returned to the free
1769 * list by vput().
1770 */
1771 brelse(bp);
1772 vput(vp);
1773 *vpp = NULL;
1774 return (error);
1775 }
1776 if (I_IS_UFS1(ip))
1777 ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1778 else
1779 ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1780 ffs_load_inode(bp, ip, fs, ino);
1781 if (DOINGSOFTDEP(vp))
1782 softdep_load_inodeblock(ip);
1783 else
1784 ip->i_effnlink = ip->i_nlink;
1785 bqrelse(bp);
1786
1787 /*
1788 * Initialize the vnode from the inode, check for aliases.
1789 * Note that the underlying vnode may have changed.
1790 */
1791 error = ufs_vinit(mp, I_IS_UFS1(ip) ? &ffs_fifoops1 : &ffs_fifoops2,
1792 &vp);
1793 if (error) {
1794 vput(vp);
1795 *vpp = NULL;
1796 return (error);
1797 }
1798
1799 /*
1800 * Finish inode initialization.
1801 */
1802 if (vp->v_type != VFIFO) {
1803 /* FFS supports shared locking for all files except fifos. */
1804 VN_LOCK_ASHARE(vp);
1805 }
1806
1807 /*
1808 * Set up a generation number for this inode if it does not
1809 * already have one. This should only happen on old filesystems.
1810 */
1811 if (ip->i_gen == 0) {
1812 while (ip->i_gen == 0)
1813 ip->i_gen = arc4random();
1814 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1815 ip->i_flag |= IN_MODIFIED;
1816 DIP_SET(ip, i_gen, ip->i_gen);
1817 }
1818 }
1819 #ifdef MAC
1820 if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1821 /*
1822 * If this vnode is already allocated, and we're running
1823 * multi-label, attempt to perform a label association
1824 * from the extended attributes on the inode.
1825 */
1826 error = mac_vnode_associate_extattr(mp, vp);
1827 if (error) {
1828 /* ufs_inactive will release ip->i_devvp ref. */
1829 vput(vp);
1830 *vpp = NULL;
1831 return (error);
1832 }
1833 }
1834 #endif
1835
1836 *vpp = vp;
1837 return (0);
1838 }
1839
1840 /*
1841 * File handle to vnode
1842 *
1843 * Have to be really careful about stale file handles:
1844 * - check that the inode number is valid
1845 * - for UFS2 check that the inode number is initialized
1846 * - call ffs_vget() to get the locked inode
1847 * - check for an unallocated inode (i_mode == 0)
1848 * - check that the given client host has export rights and return
1849 * those rights via. exflagsp and credanonp
1850 */
1851 static int
1852 ffs_fhtovp(mp, fhp, flags, vpp)
1853 struct mount *mp;
1854 struct fid *fhp;
1855 int flags;
1856 struct vnode **vpp;
1857 {
1858 struct ufid *ufhp;
1859 struct ufsmount *ump;
1860 struct fs *fs;
1861 struct cg *cgp;
1862 struct buf *bp;
1863 ino_t ino;
1864 u_int cg;
1865 int error;
1866
1867 ufhp = (struct ufid *)fhp;
1868 ino = ufhp->ufid_ino;
1869 ump = VFSTOUFS(mp);
1870 fs = ump->um_fs;
1871 if (ino < ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg)
1872 return (ESTALE);
1873 /*
1874 * Need to check if inode is initialized because UFS2 does lazy
1875 * initialization and nfs_fhtovp can offer arbitrary inode numbers.
1876 */
1877 if (fs->fs_magic != FS_UFS2_MAGIC)
1878 return (ufs_fhtovp(mp, ufhp, flags, vpp));
1879 cg = ino_to_cg(fs, ino);
1880 error = bread(ump->um_devvp, fsbtodb(fs, cgtod(fs, cg)),
1881 (int)fs->fs_cgsize, NOCRED, &bp);
1882 if (error)
1883 return (error);
1884 cgp = (struct cg *)bp->b_data;
1885 if (!cg_chkmagic(cgp) || ino >= cg * fs->fs_ipg + cgp->cg_initediblk) {
1886 brelse(bp);
1887 return (ESTALE);
1888 }
1889 brelse(bp);
1890 return (ufs_fhtovp(mp, ufhp, flags, vpp));
1891 }
1892
1893 /*
1894 * Initialize the filesystem.
1895 */
1896 static int
1897 ffs_init(vfsp)
1898 struct vfsconf *vfsp;
1899 {
1900
1901 ffs_susp_initialize();
1902 softdep_initialize();
1903 return (ufs_init(vfsp));
1904 }
1905
1906 /*
1907 * Undo the work of ffs_init().
1908 */
1909 static int
1910 ffs_uninit(vfsp)
1911 struct vfsconf *vfsp;
1912 {
1913 int ret;
1914
1915 ret = ufs_uninit(vfsp);
1916 softdep_uninitialize();
1917 ffs_susp_uninitialize();
1918 return (ret);
1919 }
1920
1921 /*
1922 * Write a superblock and associated information back to disk.
1923 */
1924 int
1925 ffs_sbupdate(ump, waitfor, suspended)
1926 struct ufsmount *ump;
1927 int waitfor;
1928 int suspended;
1929 {
1930 struct fs *fs = ump->um_fs;
1931 struct buf *sbbp;
1932 struct buf *bp;
1933 int blks;
1934 void *space;
1935 int i, size, error, allerror = 0;
1936
1937 if (fs->fs_ronly == 1 &&
1938 (ump->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1939 (MNT_RDONLY | MNT_UPDATE) && ump->um_fsckpid == 0)
1940 panic("ffs_sbupdate: write read-only filesystem");
1941 /*
1942 * We use the superblock's buf to serialize calls to ffs_sbupdate().
1943 */
1944 sbbp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
1945 (int)fs->fs_sbsize, 0, 0, 0);
1946 /*
1947 * First write back the summary information.
1948 */
1949 blks = howmany(fs->fs_cssize, fs->fs_fsize);
1950 space = fs->fs_csp;
1951 for (i = 0; i < blks; i += fs->fs_frag) {
1952 size = fs->fs_bsize;
1953 if (i + fs->fs_frag > blks)
1954 size = (blks - i) * fs->fs_fsize;
1955 bp = getblk(ump->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1956 size, 0, 0, 0);
1957 bcopy(space, bp->b_data, (u_int)size);
1958 space = (char *)space + size;
1959 if (suspended)
1960 bp->b_flags |= B_VALIDSUSPWRT;
1961 if (waitfor != MNT_WAIT)
1962 bawrite(bp);
1963 else if ((error = bwrite(bp)) != 0)
1964 allerror = error;
1965 }
1966 /*
1967 * Now write back the superblock itself. If any errors occurred
1968 * up to this point, then fail so that the superblock avoids
1969 * being written out as clean.
1970 */
1971 if (allerror) {
1972 brelse(sbbp);
1973 return (allerror);
1974 }
1975 bp = sbbp;
1976 if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1977 (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
1978 printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n",
1979 fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1980 fs->fs_sblockloc = SBLOCK_UFS1;
1981 }
1982 if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1983 (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
1984 printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n",
1985 fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1986 fs->fs_sblockloc = SBLOCK_UFS2;
1987 }
1988 fs->fs_fmod = 0;
1989 fs->fs_time = time_second;
1990 if (MOUNTEDSOFTDEP(ump->um_mountp))
1991 softdep_setup_sbupdate(ump, (struct fs *)bp->b_data, bp);
1992 bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1993 ffs_oldfscompat_write((struct fs *)bp->b_data, ump);
1994 if (suspended)
1995 bp->b_flags |= B_VALIDSUSPWRT;
1996 if (waitfor != MNT_WAIT)
1997 bawrite(bp);
1998 else if ((error = bwrite(bp)) != 0)
1999 allerror = error;
2000 return (allerror);
2001 }
2002
2003 static int
2004 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
2005 int attrnamespace, const char *attrname)
2006 {
2007
2008 #ifdef UFS_EXTATTR
2009 return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
2010 attrname));
2011 #else
2012 return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
2013 attrname));
2014 #endif
2015 }
2016
2017 static void
2018 ffs_ifree(struct ufsmount *ump, struct inode *ip)
2019 {
2020
2021 if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
2022 uma_zfree(uma_ufs1, ip->i_din1);
2023 else if (ip->i_din2 != NULL)
2024 uma_zfree(uma_ufs2, ip->i_din2);
2025 uma_zfree(uma_inode, ip);
2026 }
2027
2028 static int dobkgrdwrite = 1;
2029 SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
2030 "Do background writes (honoring the BV_BKGRDWRITE flag)?");
2031
2032 /*
2033 * Complete a background write started from bwrite.
2034 */
2035 static void
2036 ffs_backgroundwritedone(struct buf *bp)
2037 {
2038 struct bufobj *bufobj;
2039 struct buf *origbp;
2040
2041 /*
2042 * Find the original buffer that we are writing.
2043 */
2044 bufobj = bp->b_bufobj;
2045 BO_LOCK(bufobj);
2046 if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
2047 panic("backgroundwritedone: lost buffer");
2048
2049 /*
2050 * We should mark the cylinder group buffer origbp as
2051 * dirty, to not loose the failed write.
2052 */
2053 if ((bp->b_ioflags & BIO_ERROR) != 0)
2054 origbp->b_vflags |= BV_BKGRDERR;
2055 BO_UNLOCK(bufobj);
2056 /*
2057 * Process dependencies then return any unfinished ones.
2058 */
2059 if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) == 0)
2060 buf_complete(bp);
2061 #ifdef SOFTUPDATES
2062 if (!LIST_EMPTY(&bp->b_dep))
2063 softdep_move_dependencies(bp, origbp);
2064 #endif
2065 /*
2066 * This buffer is marked B_NOCACHE so when it is released
2067 * by biodone it will be tossed.
2068 */
2069 bp->b_flags |= B_NOCACHE;
2070 bp->b_flags &= ~B_CACHE;
2071 pbrelvp(bp);
2072
2073 /*
2074 * Prevent brelse() from trying to keep and re-dirtying bp on
2075 * errors. It causes b_bufobj dereference in
2076 * bdirty()/reassignbuf(), and b_bufobj was cleared in
2077 * pbrelvp() above.
2078 */
2079 if ((bp->b_ioflags & BIO_ERROR) != 0)
2080 bp->b_flags |= B_INVAL;
2081 bufdone(bp);
2082 BO_LOCK(bufobj);
2083 /*
2084 * Clear the BV_BKGRDINPROG flag in the original buffer
2085 * and awaken it if it is waiting for the write to complete.
2086 * If BV_BKGRDINPROG is not set in the original buffer it must
2087 * have been released and re-instantiated - which is not legal.
2088 */
2089 KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
2090 ("backgroundwritedone: lost buffer2"));
2091 origbp->b_vflags &= ~BV_BKGRDINPROG;
2092 if (origbp->b_vflags & BV_BKGRDWAIT) {
2093 origbp->b_vflags &= ~BV_BKGRDWAIT;
2094 wakeup(&origbp->b_xflags);
2095 }
2096 BO_UNLOCK(bufobj);
2097 }
2098
2099
2100 /*
2101 * Write, release buffer on completion. (Done by iodone
2102 * if async). Do not bother writing anything if the buffer
2103 * is invalid.
2104 *
2105 * Note that we set B_CACHE here, indicating that buffer is
2106 * fully valid and thus cacheable. This is true even of NFS
2107 * now so we set it generally. This could be set either here
2108 * or in biodone() since the I/O is synchronous. We put it
2109 * here.
2110 */
2111 static int
2112 ffs_bufwrite(struct buf *bp)
2113 {
2114 struct buf *newbp;
2115
2116 CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
2117 if (bp->b_flags & B_INVAL) {
2118 brelse(bp);
2119 return (0);
2120 }
2121
2122 if (!BUF_ISLOCKED(bp))
2123 panic("bufwrite: buffer is not busy???");
2124 /*
2125 * If a background write is already in progress, delay
2126 * writing this block if it is asynchronous. Otherwise
2127 * wait for the background write to complete.
2128 */
2129 BO_LOCK(bp->b_bufobj);
2130 if (bp->b_vflags & BV_BKGRDINPROG) {
2131 if (bp->b_flags & B_ASYNC) {
2132 BO_UNLOCK(bp->b_bufobj);
2133 bdwrite(bp);
2134 return (0);
2135 }
2136 bp->b_vflags |= BV_BKGRDWAIT;
2137 msleep(&bp->b_xflags, BO_LOCKPTR(bp->b_bufobj), PRIBIO,
2138 "bwrbg", 0);
2139 if (bp->b_vflags & BV_BKGRDINPROG)
2140 panic("bufwrite: still writing");
2141 }
2142 bp->b_vflags &= ~BV_BKGRDERR;
2143 BO_UNLOCK(bp->b_bufobj);
2144
2145 /*
2146 * If this buffer is marked for background writing and we
2147 * do not have to wait for it, make a copy and write the
2148 * copy so as to leave this buffer ready for further use.
2149 *
2150 * This optimization eats a lot of memory. If we have a page
2151 * or buffer shortfall we can't do it.
2152 */
2153 if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
2154 (bp->b_flags & B_ASYNC) &&
2155 !vm_page_count_severe() &&
2156 !buf_dirty_count_severe()) {
2157 KASSERT(bp->b_iodone == NULL,
2158 ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
2159
2160 /* get a new block */
2161 newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD);
2162 if (newbp == NULL)
2163 goto normal_write;
2164
2165 KASSERT(buf_mapped(bp), ("Unmapped cg"));
2166 memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
2167 BO_LOCK(bp->b_bufobj);
2168 bp->b_vflags |= BV_BKGRDINPROG;
2169 BO_UNLOCK(bp->b_bufobj);
2170 newbp->b_xflags |= BX_BKGRDMARKER;
2171 newbp->b_lblkno = bp->b_lblkno;
2172 newbp->b_blkno = bp->b_blkno;
2173 newbp->b_offset = bp->b_offset;
2174 newbp->b_iodone = ffs_backgroundwritedone;
2175 newbp->b_flags |= B_ASYNC;
2176 newbp->b_flags &= ~B_INVAL;
2177 pbgetvp(bp->b_vp, newbp);
2178
2179 #ifdef SOFTUPDATES
2180 /*
2181 * Move over the dependencies. If there are rollbacks,
2182 * leave the parent buffer dirtied as it will need to
2183 * be written again.
2184 */
2185 if (LIST_EMPTY(&bp->b_dep) ||
2186 softdep_move_dependencies(bp, newbp) == 0)
2187 bundirty(bp);
2188 #else
2189 bundirty(bp);
2190 #endif
2191
2192 /*
2193 * Initiate write on the copy, release the original. The
2194 * BKGRDINPROG flag prevents it from going away until
2195 * the background write completes.
2196 */
2197 bqrelse(bp);
2198 bp = newbp;
2199 } else
2200 /* Mark the buffer clean */
2201 bundirty(bp);
2202
2203
2204 /* Let the normal bufwrite do the rest for us */
2205 normal_write:
2206 return (bufwrite(bp));
2207 }
2208
2209
2210 static void
2211 ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
2212 {
2213 struct vnode *vp;
2214 int error;
2215 struct buf *tbp;
2216 int nocopy;
2217
2218 vp = bo->__bo_vnode;
2219 if (bp->b_iocmd == BIO_WRITE) {
2220 if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
2221 bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
2222 (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
2223 panic("ffs_geom_strategy: bad I/O");
2224 nocopy = bp->b_flags & B_NOCOPY;
2225 bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY);
2226 if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 &&
2227 vp->v_rdev->si_snapdata != NULL) {
2228 if ((bp->b_flags & B_CLUSTER) != 0) {
2229 runningbufwakeup(bp);
2230 TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
2231 b_cluster.cluster_entry) {
2232 error = ffs_copyonwrite(vp, tbp);
2233 if (error != 0 &&
2234 error != EOPNOTSUPP) {
2235 bp->b_error = error;
2236 bp->b_ioflags |= BIO_ERROR;
2237 bufdone(bp);
2238 return;
2239 }
2240 }
2241 bp->b_runningbufspace = bp->b_bufsize;
2242 atomic_add_long(&runningbufspace,
2243 bp->b_runningbufspace);
2244 } else {
2245 error = ffs_copyonwrite(vp, bp);
2246 if (error != 0 && error != EOPNOTSUPP) {
2247 bp->b_error = error;
2248 bp->b_ioflags |= BIO_ERROR;
2249 bufdone(bp);
2250 return;
2251 }
2252 }
2253 }
2254 #ifdef SOFTUPDATES
2255 if ((bp->b_flags & B_CLUSTER) != 0) {
2256 TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
2257 b_cluster.cluster_entry) {
2258 if (!LIST_EMPTY(&tbp->b_dep))
2259 buf_start(tbp);
2260 }
2261 } else {
2262 if (!LIST_EMPTY(&bp->b_dep))
2263 buf_start(bp);
2264 }
2265
2266 #endif
2267 }
2268 g_vfs_strategy(bo, bp);
2269 }
2270
2271 int
2272 ffs_own_mount(const struct mount *mp)
2273 {
2274
2275 if (mp->mnt_op == &ufs_vfsops)
2276 return (1);
2277 return (0);
2278 }
2279
2280 #ifdef DDB
2281 #ifdef SOFTUPDATES
2282
2283 /* defined in ffs_softdep.c */
2284 extern void db_print_ffs(struct ufsmount *ump);
2285
2286 DB_SHOW_COMMAND(ffs, db_show_ffs)
2287 {
2288 struct mount *mp;
2289 struct ufsmount *ump;
2290
2291 if (have_addr) {
2292 ump = VFSTOUFS((struct mount *)addr);
2293 db_print_ffs(ump);
2294 return;
2295 }
2296
2297 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
2298 if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name))
2299 db_print_ffs(VFSTOUFS(mp));
2300 }
2301 }
2302
2303 #endif /* SOFTUPDATES */
2304 #endif /* DDB */
Cache object: 637a784df8a96805635dfa34bdedef3d
|