FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_mount.c
1 /*-
2 * Copyright (c) 1999-2004 Poul-Henning Kamp
3 * Copyright (c) 1999 Michael Smith
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD: releng/7.4/sys/kern/vfs_mount.c 198244 2009-10-19 19:11:00Z gallatin $");
39
40 #include <sys/param.h>
41 #include <sys/conf.h>
42 #include <sys/clock.h>
43 #include <sys/jail.h>
44 #include <sys/kernel.h>
45 #include <sys/libkern.h>
46 #include <sys/malloc.h>
47 #include <sys/mount.h>
48 #include <sys/mutex.h>
49 #include <sys/namei.h>
50 #include <sys/priv.h>
51 #include <sys/proc.h>
52 #include <sys/filedesc.h>
53 #include <sys/reboot.h>
54 #include <sys/syscallsubr.h>
55 #include <sys/sysproto.h>
56 #include <sys/sx.h>
57 #include <sys/sysctl.h>
58 #include <sys/sysent.h>
59 #include <sys/systm.h>
60 #include <sys/vnode.h>
61 #include <vm/uma.h>
62
63 #include <geom/geom.h>
64
65 #include <machine/stdarg.h>
66
67 #include <security/audit/audit.h>
68 #include <security/mac/mac_framework.h>
69
70 #include "opt_rootdevname.h"
71 #include "opt_ddb.h"
72 #include "opt_mac.h"
73
74 #ifdef DDB
75 #include <ddb/ddb.h>
76 #endif
77
78 #define ROOTNAME "root_device"
79 #define VFS_MOUNTARG_SIZE_MAX (1024 * 64)
80
81 static int vfs_domount(struct thread *td, const char *fstype,
82 char *fspath, int fsflags, void *fsdata);
83 static int vfs_mountroot_ask(void);
84 static int vfs_mountroot_try(const char *mountfrom);
85 static void free_mntarg(struct mntarg *ma);
86 static int vfs_getopt_pos(struct vfsoptlist *opts, const char *name);
87
88 static int usermount = 0;
89 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
90 "Unprivileged users may mount and unmount file systems");
91
92 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
93 MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker");
94 static uma_zone_t mount_zone;
95
96 /* List of mounted filesystems. */
97 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
98
99 /* For any iteration/modification of mountlist */
100 struct mtx mountlist_mtx;
101 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF);
102
103 /*
104 * The vnode of the system's root (/ in the filesystem, without chroot
105 * active.)
106 */
107 struct vnode *rootvnode;
108
109 /*
110 * The root filesystem is detailed in the kernel environment variable
111 * vfs.root.mountfrom, which is expected to be in the general format
112 *
113 * <vfsname>:[<path>]
114 * vfsname := the name of a VFS known to the kernel and capable
115 * of being mounted as root
116 * path := disk device name or other data used by the filesystem
117 * to locate its physical store
118 */
119
120 /*
121 * Global opts, taken by all filesystems
122 */
123 static const char *global_opts[] = {
124 "errmsg",
125 "fstype",
126 "fspath",
127 "ro",
128 "rw",
129 "nosuid",
130 "noexec",
131 NULL
132 };
133
134 /*
135 * The root specifiers we will try if RB_CDROM is specified.
136 */
137 static char *cdrom_rootdevnames[] = {
138 "cd9660:cd0",
139 "cd9660:acd0",
140 NULL
141 };
142
143 /* legacy find-root code */
144 char *rootdevnames[2] = {NULL, NULL};
145 #ifndef ROOTDEVNAME
146 # define ROOTDEVNAME NULL
147 #endif
148 static const char *ctrootdevname = ROOTDEVNAME;
149
150 /*
151 * ---------------------------------------------------------------------
152 * Functions for building and sanitizing the mount options
153 */
154
155 /* Remove one mount option. */
156 static void
157 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
158 {
159
160 TAILQ_REMOVE(opts, opt, link);
161 free(opt->name, M_MOUNT);
162 if (opt->value != NULL)
163 free(opt->value, M_MOUNT);
164 #ifdef INVARIANTS
165 else if (opt->len != 0)
166 panic("%s: mount option with NULL value but length != 0",
167 __func__);
168 #endif
169 free(opt, M_MOUNT);
170 }
171
172 /* Release all resources related to the mount options. */
173 void
174 vfs_freeopts(struct vfsoptlist *opts)
175 {
176 struct vfsopt *opt;
177
178 while (!TAILQ_EMPTY(opts)) {
179 opt = TAILQ_FIRST(opts);
180 vfs_freeopt(opts, opt);
181 }
182 free(opts, M_MOUNT);
183 }
184
185 void
186 vfs_deleteopt(struct vfsoptlist *opts, const char *name)
187 {
188 struct vfsopt *opt, *temp;
189
190 if (opts == NULL)
191 return;
192 TAILQ_FOREACH_SAFE(opt, opts, link, temp) {
193 if (strcmp(opt->name, name) == 0)
194 vfs_freeopt(opts, opt);
195 }
196 }
197
198 /*
199 * Check if options are equal (with or without the "no" prefix).
200 */
201 static int
202 vfs_equalopts(const char *opt1, const char *opt2)
203 {
204
205 /* "opt" vs. "opt" or "noopt" vs. "noopt" */
206 if (strcmp(opt1, opt2) == 0)
207 return (1);
208 /* "noopt" vs. "opt" */
209 if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
210 return (1);
211 /* "opt" vs. "noopt" */
212 if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
213 return (1);
214 return (0);
215 }
216
217 /*
218 * If a mount option is specified several times,
219 * (with or without the "no" prefix) only keep
220 * the last occurence of it.
221 */
222 static void
223 vfs_sanitizeopts(struct vfsoptlist *opts)
224 {
225 struct vfsopt *opt, *opt2, *tmp;
226
227 TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
228 opt2 = TAILQ_PREV(opt, vfsoptlist, link);
229 while (opt2 != NULL) {
230 if (vfs_equalopts(opt->name, opt2->name)) {
231 tmp = TAILQ_PREV(opt2, vfsoptlist, link);
232 vfs_freeopt(opts, opt2);
233 opt2 = tmp;
234 } else {
235 opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
236 }
237 }
238 }
239 }
240
241 /*
242 * Build a linked list of mount options from a struct uio.
243 */
244 static int
245 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
246 {
247 struct vfsoptlist *opts;
248 struct vfsopt *opt;
249 size_t memused;
250 unsigned int i, iovcnt;
251 int error, namelen, optlen;
252
253 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
254 TAILQ_INIT(opts);
255 memused = 0;
256 iovcnt = auio->uio_iovcnt;
257 for (i = 0; i < iovcnt; i += 2) {
258 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
259 namelen = auio->uio_iov[i].iov_len;
260 optlen = auio->uio_iov[i + 1].iov_len;
261 opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
262 opt->value = NULL;
263 opt->len = 0;
264
265 /*
266 * Do this early, so jumps to "bad" will free the current
267 * option.
268 */
269 TAILQ_INSERT_TAIL(opts, opt, link);
270 memused += sizeof(struct vfsopt) + optlen + namelen;
271
272 /*
273 * Avoid consuming too much memory, and attempts to overflow
274 * memused.
275 */
276 if (memused > VFS_MOUNTARG_SIZE_MAX ||
277 optlen > VFS_MOUNTARG_SIZE_MAX ||
278 namelen > VFS_MOUNTARG_SIZE_MAX) {
279 error = EINVAL;
280 goto bad;
281 }
282
283 if (auio->uio_segflg == UIO_SYSSPACE) {
284 bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
285 } else {
286 error = copyin(auio->uio_iov[i].iov_base, opt->name,
287 namelen);
288 if (error)
289 goto bad;
290 }
291 /* Ensure names are null-terminated strings. */
292 if (opt->name[namelen - 1] != '\0') {
293 error = EINVAL;
294 goto bad;
295 }
296 if (optlen != 0) {
297 opt->len = optlen;
298 opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
299 if (auio->uio_segflg == UIO_SYSSPACE) {
300 bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
301 optlen);
302 } else {
303 error = copyin(auio->uio_iov[i + 1].iov_base,
304 opt->value, optlen);
305 if (error)
306 goto bad;
307 }
308 }
309 }
310 vfs_sanitizeopts(opts);
311 *options = opts;
312 return (0);
313 bad:
314 vfs_freeopts(opts);
315 return (error);
316 }
317
318 /*
319 * Merge the old mount options with the new ones passed
320 * in the MNT_UPDATE case.
321 */
322 static void
323 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
324 {
325 struct vfsopt *opt, *opt2, *new;
326
327 TAILQ_FOREACH(opt, opts, link) {
328 /*
329 * Check that this option hasn't been redefined
330 * nor cancelled with a "no" mount option.
331 */
332 opt2 = TAILQ_FIRST(toopts);
333 while (opt2 != NULL) {
334 if (strcmp(opt2->name, opt->name) == 0)
335 goto next;
336 if (strncmp(opt2->name, "no", 2) == 0 &&
337 strcmp(opt2->name + 2, opt->name) == 0) {
338 vfs_freeopt(toopts, opt2);
339 goto next;
340 }
341 opt2 = TAILQ_NEXT(opt2, link);
342 }
343 /* We want this option, duplicate it. */
344 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
345 new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
346 strcpy(new->name, opt->name);
347 if (opt->len != 0) {
348 new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
349 bcopy(opt->value, new->value, opt->len);
350 } else {
351 new->value = NULL;
352 }
353 new->len = opt->len;
354 TAILQ_INSERT_TAIL(toopts, new, link);
355 next:
356 continue;
357 }
358 }
359
360 /*
361 * Mount a filesystem.
362 */
363 int
364 nmount(td, uap)
365 struct thread *td;
366 struct nmount_args /* {
367 struct iovec *iovp;
368 unsigned int iovcnt;
369 int flags;
370 } */ *uap;
371 {
372 struct uio *auio;
373 struct iovec *iov;
374 unsigned int i;
375 int error;
376 u_int iovcnt;
377
378 AUDIT_ARG(fflags, uap->flags);
379
380 /*
381 * Filter out MNT_ROOTFS. We do not want clients of nmount() in
382 * userspace to set this flag, but we must filter it out if we want
383 * MNT_UPDATE on the root file system to work.
384 * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
385 */
386 uap->flags &= ~MNT_ROOTFS;
387
388 iovcnt = uap->iovcnt;
389 /*
390 * Check that we have an even number of iovec's
391 * and that we have at least two options.
392 */
393 if ((iovcnt & 1) || (iovcnt < 4))
394 return (EINVAL);
395
396 error = copyinuio(uap->iovp, iovcnt, &auio);
397 if (error)
398 return (error);
399 iov = auio->uio_iov;
400 for (i = 0; i < iovcnt; i++) {
401 if (iov->iov_len > MMAXOPTIONLEN) {
402 free(auio, M_IOV);
403 return (EINVAL);
404 }
405 iov++;
406 }
407 error = vfs_donmount(td, uap->flags, auio);
408
409 free(auio, M_IOV);
410 return (error);
411 }
412
413 /*
414 * ---------------------------------------------------------------------
415 * Various utility functions
416 */
417
418 void
419 vfs_ref(struct mount *mp)
420 {
421
422 MNT_ILOCK(mp);
423 MNT_REF(mp);
424 MNT_IUNLOCK(mp);
425 }
426
427 void
428 vfs_rel(struct mount *mp)
429 {
430
431 MNT_ILOCK(mp);
432 MNT_REL(mp);
433 MNT_IUNLOCK(mp);
434 }
435
436 static int
437 mount_init(void *mem, int size, int flags)
438 {
439 struct mount *mp;
440
441 mp = (struct mount *)mem;
442 mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
443 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
444 lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
445 return (0);
446 }
447
448 static void
449 mount_fini(void *mem, int size)
450 {
451 struct mount *mp;
452
453 mp = (struct mount *)mem;
454 lockdestroy(&mp->mnt_explock);
455 lockdestroy(&mp->mnt_lock);
456 mtx_destroy(&mp->mnt_mtx);
457 }
458
459 /*
460 * Allocate and initialize the mount point struct.
461 */
462 struct mount *
463 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp,
464 const char *fspath, struct thread *td)
465 {
466 struct mount *mp;
467
468 mp = uma_zalloc(mount_zone, M_WAITOK);
469 bzero(&mp->mnt_startzero,
470 __rangeof(struct mount, mnt_startzero, mnt_endzero));
471 TAILQ_INIT(&mp->mnt_nvnodelist);
472 mp->mnt_nvnodelistsize = 0;
473 mp->mnt_ref = 0;
474 (void) vfs_busy(mp, LK_NOWAIT, 0, td);
475 mp->mnt_op = vfsp->vfc_vfsops;
476 mp->mnt_vfc = vfsp;
477 vfsp->vfc_refcount++; /* XXX Unlocked */
478 mp->mnt_stat.f_type = vfsp->vfc_typenum;
479 mp->mnt_gen++;
480 strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
481 mp->mnt_vnodecovered = vp;
482 mp->mnt_cred = crdup(td->td_ucred);
483 mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
484 strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
485 mp->mnt_iosize_max = DFLTPHYS;
486 #ifdef MAC
487 mac_init_mount(mp);
488 mac_create_mount(td->td_ucred, mp);
489 #endif
490 arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
491 return (mp);
492 }
493
494 /*
495 * Destroy the mount struct previously allocated by vfs_mount_alloc().
496 */
497 void
498 vfs_mount_destroy(struct mount *mp)
499 {
500 int i;
501
502 MNT_ILOCK(mp);
503 mp->mnt_kern_flag |= MNTK_REFEXPIRE;
504 if (mp->mnt_kern_flag & MNTK_MWAIT) {
505 mp->mnt_kern_flag &= ~MNTK_MWAIT;
506 wakeup(mp);
507 }
508 for (i = 0; mp->mnt_ref && i < 3; i++)
509 msleep(mp, MNT_MTX(mp), PVFS, "mntref", hz);
510 /*
511 * This will always cause a 3 second delay in rebooting due to
512 * refs on the root mountpoint that never go away. Most of these
513 * are held by init which never exits.
514 */
515 if (i == 3 && (!rebooting || bootverbose))
516 printf("Mount point %s had %d dangling refs\n",
517 mp->mnt_stat.f_mntonname, mp->mnt_ref);
518 if (mp->mnt_holdcnt != 0) {
519 printf("Waiting for mount point to be unheld\n");
520 while (mp->mnt_holdcnt != 0) {
521 mp->mnt_holdcntwaiters++;
522 msleep(&mp->mnt_holdcnt, MNT_MTX(mp),
523 PZERO, "mntdestroy", 0);
524 mp->mnt_holdcntwaiters--;
525 }
526 printf("mount point unheld\n");
527 }
528 if (mp->mnt_writeopcount > 0) {
529 printf("Waiting for mount point write ops\n");
530 while (mp->mnt_writeopcount > 0) {
531 mp->mnt_kern_flag |= MNTK_SUSPEND;
532 msleep(&mp->mnt_writeopcount,
533 MNT_MTX(mp),
534 PZERO, "mntdestroy2", 0);
535 }
536 printf("mount point write ops completed\n");
537 }
538 if (mp->mnt_secondary_writes > 0) {
539 printf("Waiting for mount point secondary write ops\n");
540 while (mp->mnt_secondary_writes > 0) {
541 mp->mnt_kern_flag |= MNTK_SUSPEND;
542 msleep(&mp->mnt_secondary_writes,
543 MNT_MTX(mp),
544 PZERO, "mntdestroy3", 0);
545 }
546 printf("mount point secondary write ops completed\n");
547 }
548 MNT_IUNLOCK(mp);
549 mp->mnt_vfc->vfc_refcount--;
550 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
551 struct vnode *vp;
552
553 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes)
554 vprint("", vp);
555 panic("unmount: dangling vnode");
556 }
557 MNT_ILOCK(mp);
558 if (mp->mnt_kern_flag & MNTK_MWAIT)
559 wakeup(mp);
560 if (mp->mnt_writeopcount != 0)
561 panic("vfs_mount_destroy: nonzero writeopcount");
562 if (mp->mnt_secondary_writes != 0)
563 panic("vfs_mount_destroy: nonzero secondary_writes");
564 if (mp->mnt_nvnodelistsize != 0)
565 panic("vfs_mount_destroy: nonzero nvnodelistsize");
566 mp->mnt_writeopcount = -1000;
567 mp->mnt_nvnodelistsize = -1000;
568 mp->mnt_secondary_writes = -1000;
569 MNT_IUNLOCK(mp);
570 #ifdef MAC
571 mac_destroy_mount(mp);
572 #endif
573 if (mp->mnt_opt != NULL)
574 vfs_freeopts(mp->mnt_opt);
575 crfree(mp->mnt_cred);
576 uma_zfree(mount_zone, mp);
577 }
578
579 int
580 vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions)
581 {
582 struct vfsoptlist *optlist;
583 struct vfsopt *opt, *noro_opt, *tmp_opt;
584 char *fstype, *fspath, *errmsg;
585 int error, fstypelen, fspathlen, errmsg_len, errmsg_pos;
586 int has_rw, has_noro;
587
588 errmsg = NULL;
589 errmsg_len = 0;
590 errmsg_pos = -1;
591 has_rw = 0;
592 has_noro = 0;
593
594 error = vfs_buildopts(fsoptions, &optlist);
595 if (error)
596 return (error);
597
598 if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0)
599 errmsg_pos = vfs_getopt_pos(optlist, "errmsg");
600
601 /*
602 * We need these two options before the others,
603 * and they are mandatory for any filesystem.
604 * Ensure they are NUL terminated as well.
605 */
606 fstypelen = 0;
607 error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
608 if (error || fstype[fstypelen - 1] != '\0') {
609 error = EINVAL;
610 if (errmsg != NULL)
611 strncpy(errmsg, "Invalid fstype", errmsg_len);
612 goto bail;
613 }
614 fspathlen = 0;
615 error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
616 if (error || fspath[fspathlen - 1] != '\0') {
617 error = EINVAL;
618 if (errmsg != NULL)
619 strncpy(errmsg, "Invalid fspath", errmsg_len);
620 goto bail;
621 }
622
623 /*
624 * We need to see if we have the "update" option
625 * before we call vfs_domount(), since vfs_domount() has special
626 * logic based on MNT_UPDATE. This is very important
627 * when we want to update the root filesystem.
628 */
629 TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) {
630 if (strcmp(opt->name, "update") == 0) {
631 fsflags |= MNT_UPDATE;
632 vfs_freeopt(optlist, opt);
633 }
634 else if (strcmp(opt->name, "async") == 0)
635 fsflags |= MNT_ASYNC;
636 else if (strcmp(opt->name, "force") == 0) {
637 fsflags |= MNT_FORCE;
638 vfs_freeopt(optlist, opt);
639 }
640 else if (strcmp(opt->name, "reload") == 0) {
641 fsflags |= MNT_RELOAD;
642 vfs_freeopt(optlist, opt);
643 }
644 else if (strcmp(opt->name, "multilabel") == 0)
645 fsflags |= MNT_MULTILABEL;
646 else if (strcmp(opt->name, "noasync") == 0)
647 fsflags &= ~MNT_ASYNC;
648 else if (strcmp(opt->name, "noatime") == 0)
649 fsflags |= MNT_NOATIME;
650 else if (strcmp(opt->name, "atime") == 0) {
651 free(opt->name, M_MOUNT);
652 opt->name = strdup("nonoatime", M_MOUNT);
653 }
654 else if (strcmp(opt->name, "noclusterr") == 0)
655 fsflags |= MNT_NOCLUSTERR;
656 else if (strcmp(opt->name, "clusterr") == 0) {
657 free(opt->name, M_MOUNT);
658 opt->name = strdup("nonoclusterr", M_MOUNT);
659 }
660 else if (strcmp(opt->name, "noclusterw") == 0)
661 fsflags |= MNT_NOCLUSTERW;
662 else if (strcmp(opt->name, "clusterw") == 0) {
663 free(opt->name, M_MOUNT);
664 opt->name = strdup("nonoclusterw", M_MOUNT);
665 }
666 else if (strcmp(opt->name, "noexec") == 0)
667 fsflags |= MNT_NOEXEC;
668 else if (strcmp(opt->name, "exec") == 0) {
669 free(opt->name, M_MOUNT);
670 opt->name = strdup("nonoexec", M_MOUNT);
671 }
672 else if (strcmp(opt->name, "nosuid") == 0)
673 fsflags |= MNT_NOSUID;
674 else if (strcmp(opt->name, "suid") == 0) {
675 free(opt->name, M_MOUNT);
676 opt->name = strdup("nonosuid", M_MOUNT);
677 }
678 else if (strcmp(opt->name, "nosymfollow") == 0)
679 fsflags |= MNT_NOSYMFOLLOW;
680 else if (strcmp(opt->name, "symfollow") == 0) {
681 free(opt->name, M_MOUNT);
682 opt->name = strdup("nonosymfollow", M_MOUNT);
683 }
684 else if (strcmp(opt->name, "noro") == 0) {
685 fsflags &= ~MNT_RDONLY;
686 has_noro = 1;
687 }
688 else if (strcmp(opt->name, "rw") == 0) {
689 fsflags &= ~MNT_RDONLY;
690 has_rw = 1;
691 }
692 else if (strcmp(opt->name, "ro") == 0)
693 fsflags |= MNT_RDONLY;
694 else if (strcmp(opt->name, "rdonly") == 0) {
695 free(opt->name, M_MOUNT);
696 opt->name = strdup("ro", M_MOUNT);
697 fsflags |= MNT_RDONLY;
698 }
699 else if (strcmp(opt->name, "suiddir") == 0)
700 fsflags |= MNT_SUIDDIR;
701 else if (strcmp(opt->name, "sync") == 0)
702 fsflags |= MNT_SYNCHRONOUS;
703 else if (strcmp(opt->name, "union") == 0)
704 fsflags |= MNT_UNION;
705 }
706
707 /*
708 * If "rw" was specified as a mount option, and we
709 * are trying to update a mount-point from "ro" to "rw",
710 * we need a mount option "noro", since in vfs_mergeopts(),
711 * "noro" will cancel "ro", but "rw" will not do anything.
712 */
713 if (has_rw && !has_noro) {
714 noro_opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
715 noro_opt->name = strdup("noro", M_MOUNT);
716 noro_opt->value = NULL;
717 noro_opt->len = 0;
718 TAILQ_INSERT_TAIL(optlist, noro_opt, link);
719 }
720
721 /*
722 * Be ultra-paranoid about making sure the type and fspath
723 * variables will fit in our mp buffers, including the
724 * terminating NUL.
725 */
726 if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
727 error = ENAMETOOLONG;
728 goto bail;
729 }
730
731 mtx_lock(&Giant);
732 error = vfs_domount(td, fstype, fspath, fsflags, optlist);
733 mtx_unlock(&Giant);
734 bail:
735 /* copyout the errmsg */
736 if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt)
737 && errmsg_len > 0 && errmsg != NULL) {
738 if (fsoptions->uio_segflg == UIO_SYSSPACE) {
739 bcopy(errmsg,
740 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
741 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
742 } else {
743 copyout(errmsg,
744 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
745 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
746 }
747 }
748
749 if (error != 0)
750 vfs_freeopts(optlist);
751 return (error);
752 }
753
754 /*
755 * Old mount API.
756 */
757 #ifndef _SYS_SYSPROTO_H_
758 struct mount_args {
759 char *type;
760 char *path;
761 int flags;
762 caddr_t data;
763 };
764 #endif
765 /* ARGSUSED */
766 int
767 mount(td, uap)
768 struct thread *td;
769 struct mount_args /* {
770 char *type;
771 char *path;
772 int flags;
773 caddr_t data;
774 } */ *uap;
775 {
776 char *fstype;
777 struct vfsconf *vfsp = NULL;
778 struct mntarg *ma = NULL;
779 int error;
780
781 AUDIT_ARG(fflags, uap->flags);
782
783 /*
784 * Filter out MNT_ROOTFS. We do not want clients of mount() in
785 * userspace to set this flag, but we must filter it out if we want
786 * MNT_UPDATE on the root file system to work.
787 * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
788 */
789 uap->flags &= ~MNT_ROOTFS;
790
791 fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
792 error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
793 if (error) {
794 free(fstype, M_TEMP);
795 return (error);
796 }
797
798 AUDIT_ARG(text, fstype);
799 mtx_lock(&Giant);
800 vfsp = vfs_byname_kld(fstype, td, &error);
801 free(fstype, M_TEMP);
802 if (vfsp == NULL) {
803 mtx_unlock(&Giant);
804 return (ENOENT);
805 }
806 if (vfsp->vfc_vfsops->vfs_cmount == NULL) {
807 mtx_unlock(&Giant);
808 return (EOPNOTSUPP);
809 }
810
811 ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN);
812 ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
813 ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro");
814 ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid");
815 ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec");
816
817 error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td);
818 mtx_unlock(&Giant);
819 return (error);
820 }
821
822
823 /*
824 * vfs_domount(): actually attempt a filesystem mount.
825 */
826 static int
827 vfs_domount(
828 struct thread *td, /* Calling thread. */
829 const char *fstype, /* Filesystem type. */
830 char *fspath, /* Mount path. */
831 int fsflags, /* Flags common to all filesystems. */
832 void *fsdata /* Options local to the filesystem. */
833 )
834 {
835 struct vnode *vp;
836 struct mount *mp;
837 struct vfsconf *vfsp;
838 struct export_args export;
839 int error, flag = 0;
840 struct vattr va;
841 struct nameidata nd;
842
843 mtx_assert(&Giant, MA_OWNED);
844 /*
845 * Be ultra-paranoid about making sure the type and fspath
846 * variables will fit in our mp buffers, including the
847 * terminating NUL.
848 */
849 if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
850 return (ENAMETOOLONG);
851
852 if (jailed(td->td_ucred) || usermount == 0) {
853 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0)
854 return (error);
855 }
856
857 /*
858 * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
859 */
860 if (fsflags & MNT_EXPORTED) {
861 error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED);
862 if (error)
863 return (error);
864 }
865 if (fsflags & MNT_SUIDDIR) {
866 error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR);
867 if (error)
868 return (error);
869 }
870 /*
871 * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users.
872 */
873 if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) {
874 if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0)
875 fsflags |= MNT_NOSUID | MNT_USER;
876 }
877
878 /* Load KLDs before we lock the covered vnode to avoid reversals. */
879 vfsp = NULL;
880 if ((fsflags & MNT_UPDATE) == 0) {
881 /* Don't try to load KLDs if we're mounting the root. */
882 if (fsflags & MNT_ROOTFS)
883 vfsp = vfs_byname(fstype);
884 else
885 vfsp = vfs_byname_kld(fstype, td, &error);
886 if (vfsp == NULL)
887 return (ENODEV);
888 if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL))
889 return (EPERM);
890 }
891 /*
892 * Get vnode to be covered
893 */
894 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE,
895 fspath, td);
896 if ((error = namei(&nd)) != 0)
897 return (error);
898 NDFREE(&nd, NDF_ONLY_PNBUF);
899 vp = nd.ni_vp;
900 if (fsflags & MNT_UPDATE) {
901 if ((vp->v_vflag & VV_ROOT) == 0) {
902 vput(vp);
903 return (EINVAL);
904 }
905 mp = vp->v_mount;
906 MNT_ILOCK(mp);
907 flag = mp->mnt_flag;
908 /*
909 * We only allow the filesystem to be reloaded if it
910 * is currently mounted read-only.
911 */
912 if ((fsflags & MNT_RELOAD) &&
913 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
914 MNT_IUNLOCK(mp);
915 vput(vp);
916 return (EOPNOTSUPP); /* Needs translation */
917 }
918 MNT_IUNLOCK(mp);
919 /*
920 * Only privileged root, or (if MNT_USER is set) the user that
921 * did the original mount is permitted to update it.
922 */
923 error = vfs_suser(mp, td);
924 if (error) {
925 vput(vp);
926 return (error);
927 }
928 if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
929 vput(vp);
930 return (EBUSY);
931 }
932 VI_LOCK(vp);
933 if ((vp->v_iflag & VI_MOUNT) != 0 ||
934 vp->v_mountedhere != NULL) {
935 VI_UNLOCK(vp);
936 vfs_unbusy(mp, td);
937 vput(vp);
938 return (EBUSY);
939 }
940 vp->v_iflag |= VI_MOUNT;
941 VI_UNLOCK(vp);
942 MNT_ILOCK(mp);
943 mp->mnt_flag |= fsflags &
944 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS);
945 MNT_IUNLOCK(mp);
946 VOP_UNLOCK(vp, 0, td);
947 mp->mnt_optnew = fsdata;
948 vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
949 } else {
950 /*
951 * If the user is not root, ensure that they own the directory
952 * onto which we are attempting to mount.
953 */
954 error = VOP_GETATTR(vp, &va, td->td_ucred, td);
955 if (error) {
956 vput(vp);
957 return (error);
958 }
959 if (va.va_uid != td->td_ucred->cr_uid) {
960 error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN,
961 0);
962 if (error) {
963 vput(vp);
964 return (error);
965 }
966 }
967 error = vinvalbuf(vp, V_SAVE, td, 0, 0);
968 if (error != 0) {
969 vput(vp);
970 return (error);
971 }
972 if (vp->v_type != VDIR) {
973 vput(vp);
974 return (ENOTDIR);
975 }
976 VI_LOCK(vp);
977 if ((vp->v_iflag & VI_MOUNT) != 0 ||
978 vp->v_mountedhere != NULL) {
979 VI_UNLOCK(vp);
980 vput(vp);
981 return (EBUSY);
982 }
983 vp->v_iflag |= VI_MOUNT;
984 VI_UNLOCK(vp);
985
986 /*
987 * Allocate and initialize the filesystem.
988 */
989 mp = vfs_mount_alloc(vp, vfsp, fspath, td);
990 VOP_UNLOCK(vp, 0, td);
991
992 /* XXXMAC: pass to vfs_mount_alloc? */
993 mp->mnt_optnew = fsdata;
994 }
995
996 /*
997 * Set the mount level flags.
998 */
999 MNT_ILOCK(mp);
1000 mp->mnt_flag = (mp->mnt_flag & ~MNT_UPDATEMASK) |
1001 (fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS |
1002 MNT_RDONLY));
1003 if ((mp->mnt_flag & MNT_ASYNC) == 0)
1004 mp->mnt_kern_flag &= ~MNTK_ASYNC;
1005 MNT_IUNLOCK(mp);
1006 /*
1007 * Mount the filesystem.
1008 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
1009 * get. No freeing of cn_pnbuf.
1010 */
1011 error = VFS_MOUNT(mp, td);
1012
1013 /*
1014 * Process the export option only if we are
1015 * updating mount options.
1016 */
1017 if (!error && (fsflags & MNT_UPDATE)) {
1018 if (vfs_copyopt(mp->mnt_optnew, "export", &export,
1019 sizeof(export)) == 0)
1020 error = vfs_export(mp, &export);
1021 }
1022
1023 if (!error) {
1024 if (mp->mnt_opt != NULL)
1025 vfs_freeopts(mp->mnt_opt);
1026 mp->mnt_opt = mp->mnt_optnew;
1027 (void)VFS_STATFS(mp, &mp->mnt_stat, td);
1028 }
1029 /*
1030 * Prevent external consumers of mount options from reading
1031 * mnt_optnew.
1032 */
1033 mp->mnt_optnew = NULL;
1034 if (mp->mnt_flag & MNT_UPDATE) {
1035 MNT_ILOCK(mp);
1036 if (error)
1037 mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) |
1038 (flag & ~MNT_QUOTA);
1039 else
1040 mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD |
1041 MNT_FORCE | MNT_SNAPSHOT);
1042 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
1043 mp->mnt_kern_flag |= MNTK_ASYNC;
1044 else
1045 mp->mnt_kern_flag &= ~MNTK_ASYNC;
1046 MNT_IUNLOCK(mp);
1047 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1048 if (mp->mnt_syncer == NULL)
1049 error = vfs_allocate_syncvnode(mp);
1050 } else {
1051 if (mp->mnt_syncer != NULL)
1052 vrele(mp->mnt_syncer);
1053 mp->mnt_syncer = NULL;
1054 }
1055 vfs_unbusy(mp, td);
1056 VI_LOCK(vp);
1057 vp->v_iflag &= ~VI_MOUNT;
1058 VI_UNLOCK(vp);
1059 vrele(vp);
1060 return (error);
1061 }
1062 MNT_ILOCK(mp);
1063 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
1064 mp->mnt_kern_flag |= MNTK_ASYNC;
1065 else
1066 mp->mnt_kern_flag &= ~MNTK_ASYNC;
1067 MNT_IUNLOCK(mp);
1068 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1069 /*
1070 * Put the new filesystem on the mount list after root.
1071 */
1072 cache_purge(vp);
1073 if (!error) {
1074 struct vnode *newdp;
1075
1076 VI_LOCK(vp);
1077 vp->v_iflag &= ~VI_MOUNT;
1078 VI_UNLOCK(vp);
1079 vp->v_mountedhere = mp;
1080 mtx_lock(&mountlist_mtx);
1081 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1082 mtx_unlock(&mountlist_mtx);
1083 vfs_event_signal(NULL, VQ_MOUNT, 0);
1084 if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp, td))
1085 panic("mount: lost mount");
1086 VOP_UNLOCK(newdp, 0, td);
1087 VOP_UNLOCK(vp, 0, td);
1088 mountcheckdirs(vp, newdp);
1089 vrele(newdp);
1090 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1091 error = vfs_allocate_syncvnode(mp);
1092 vfs_unbusy(mp, td);
1093 if (error)
1094 vrele(vp);
1095 } else {
1096 VI_LOCK(vp);
1097 vp->v_iflag &= ~VI_MOUNT;
1098 VI_UNLOCK(vp);
1099 vfs_unbusy(mp, td);
1100 vfs_mount_destroy(mp);
1101 vput(vp);
1102 }
1103 return (error);
1104 }
1105
1106 /*
1107 * Unmount a filesystem.
1108 *
1109 * Note: unmount takes a path to the vnode mounted on as argument, not
1110 * special file (as before).
1111 */
1112 #ifndef _SYS_SYSPROTO_H_
1113 struct unmount_args {
1114 char *path;
1115 int flags;
1116 };
1117 #endif
1118 /* ARGSUSED */
1119 int
1120 unmount(td, uap)
1121 struct thread *td;
1122 register struct unmount_args /* {
1123 char *path;
1124 int flags;
1125 } */ *uap;
1126 {
1127 struct mount *mp;
1128 char *pathbuf;
1129 int error, id0, id1;
1130
1131 if (jailed(td->td_ucred) || usermount == 0) {
1132 error = priv_check(td, PRIV_VFS_UNMOUNT);
1133 if (error)
1134 return (error);
1135 }
1136
1137 pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1138 error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL);
1139 if (error) {
1140 free(pathbuf, M_TEMP);
1141 return (error);
1142 }
1143 AUDIT_ARG(upath, td, pathbuf, ARG_UPATH1);
1144 mtx_lock(&Giant);
1145 if (uap->flags & MNT_BYFSID) {
1146 /* Decode the filesystem ID. */
1147 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
1148 mtx_unlock(&Giant);
1149 free(pathbuf, M_TEMP);
1150 return (EINVAL);
1151 }
1152
1153 mtx_lock(&mountlist_mtx);
1154 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
1155 if (mp->mnt_stat.f_fsid.val[0] == id0 &&
1156 mp->mnt_stat.f_fsid.val[1] == id1)
1157 break;
1158 }
1159 mtx_unlock(&mountlist_mtx);
1160 } else {
1161 mtx_lock(&mountlist_mtx);
1162 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
1163 if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0)
1164 break;
1165 }
1166 mtx_unlock(&mountlist_mtx);
1167 }
1168 free(pathbuf, M_TEMP);
1169 if (mp == NULL) {
1170 /*
1171 * Previously we returned ENOENT for a nonexistent path and
1172 * EINVAL for a non-mountpoint. We cannot tell these apart
1173 * now, so in the !MNT_BYFSID case return the more likely
1174 * EINVAL for compatibility.
1175 */
1176 mtx_unlock(&Giant);
1177 return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL);
1178 }
1179
1180 /*
1181 * Don't allow unmounting the root filesystem.
1182 */
1183 if (mp->mnt_flag & MNT_ROOTFS) {
1184 mtx_unlock(&Giant);
1185 return (EINVAL);
1186 }
1187 error = dounmount(mp, uap->flags, td);
1188 mtx_unlock(&Giant);
1189 return (error);
1190 }
1191
1192 /*
1193 * Do the actual filesystem unmount.
1194 */
1195 int
1196 dounmount(mp, flags, td)
1197 struct mount *mp;
1198 int flags;
1199 struct thread *td;
1200 {
1201 struct vnode *coveredvp, *fsrootvp;
1202 int error;
1203 int async_flag;
1204 int mnt_gen_r;
1205
1206 mtx_assert(&Giant, MA_OWNED);
1207
1208 if ((coveredvp = mp->mnt_vnodecovered) != NULL) {
1209 mnt_gen_r = mp->mnt_gen;
1210 VI_LOCK(coveredvp);
1211 vholdl(coveredvp);
1212 vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, td);
1213 vdrop(coveredvp);
1214 /*
1215 * Check for mp being unmounted while waiting for the
1216 * covered vnode lock.
1217 */
1218 if (coveredvp->v_mountedhere != mp ||
1219 coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) {
1220 VOP_UNLOCK(coveredvp, 0, td);
1221 return (EBUSY);
1222 }
1223 }
1224 /*
1225 * Only privileged root, or (if MNT_USER is set) the user that did the
1226 * original mount is permitted to unmount this filesystem.
1227 */
1228 error = vfs_suser(mp, td);
1229 if (error) {
1230 if (coveredvp)
1231 VOP_UNLOCK(coveredvp, 0, td);
1232 return (error);
1233 }
1234
1235 MNT_ILOCK(mp);
1236 if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1237 MNT_IUNLOCK(mp);
1238 if (coveredvp)
1239 VOP_UNLOCK(coveredvp, 0, td);
1240 return (EBUSY);
1241 }
1242 mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_NOINSMNTQ;
1243 /* Allow filesystems to detect that a forced unmount is in progress. */
1244 if (flags & MNT_FORCE)
1245 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
1246 error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
1247 ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), MNT_MTX(mp), td);
1248 if (error) {
1249 MNT_ILOCK(mp);
1250 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_NOINSMNTQ |
1251 MNTK_UNMOUNTF);
1252 if (mp->mnt_kern_flag & MNTK_MWAIT)
1253 wakeup(mp);
1254 MNT_IUNLOCK(mp);
1255 if (coveredvp)
1256 VOP_UNLOCK(coveredvp, 0, td);
1257 return (error);
1258 }
1259 vn_start_write(NULL, &mp, V_WAIT);
1260
1261 if (mp->mnt_flag & MNT_EXPUBLIC)
1262 vfs_setpublicfs(NULL, NULL, NULL);
1263
1264 vfs_msync(mp, MNT_WAIT);
1265 MNT_ILOCK(mp);
1266 async_flag = mp->mnt_flag & MNT_ASYNC;
1267 mp->mnt_flag &= ~MNT_ASYNC;
1268 mp->mnt_kern_flag &= ~MNTK_ASYNC;
1269 MNT_IUNLOCK(mp);
1270 cache_purgevfs(mp); /* remove cache entries for this file sys */
1271 if (mp->mnt_syncer != NULL)
1272 vrele(mp->mnt_syncer);
1273 /*
1274 * For forced unmounts, move process cdir/rdir refs on the fs root
1275 * vnode to the covered vnode. For non-forced unmounts we want
1276 * such references to cause an EBUSY error.
1277 */
1278 if ((flags & MNT_FORCE) &&
1279 VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
1280 if (mp->mnt_vnodecovered != NULL)
1281 mountcheckdirs(fsrootvp, mp->mnt_vnodecovered);
1282 if (fsrootvp == rootvnode) {
1283 vrele(rootvnode);
1284 rootvnode = NULL;
1285 }
1286 vput(fsrootvp);
1287 }
1288 if (((mp->mnt_flag & MNT_RDONLY) ||
1289 (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) ||
1290 (flags & MNT_FORCE)) {
1291 error = VFS_UNMOUNT(mp, flags, td);
1292 }
1293 vn_finished_write(mp);
1294 /*
1295 * If we failed to flush the dirty blocks for this mount point,
1296 * undo all the cdir/rdir and rootvnode changes we made above.
1297 * Unless we failed to do so because the device is reporting that
1298 * it doesn't exist anymore.
1299 */
1300 if (error && error != ENXIO) {
1301 if ((flags & MNT_FORCE) &&
1302 VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
1303 if (mp->mnt_vnodecovered != NULL)
1304 mountcheckdirs(mp->mnt_vnodecovered, fsrootvp);
1305 if (rootvnode == NULL) {
1306 rootvnode = fsrootvp;
1307 vref(rootvnode);
1308 }
1309 vput(fsrootvp);
1310 }
1311 MNT_ILOCK(mp);
1312 mp->mnt_kern_flag &= ~MNTK_NOINSMNTQ;
1313 if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) {
1314 MNT_IUNLOCK(mp);
1315 (void) vfs_allocate_syncvnode(mp);
1316 MNT_ILOCK(mp);
1317 }
1318 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1319 mp->mnt_flag |= async_flag;
1320 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
1321 mp->mnt_kern_flag |= MNTK_ASYNC;
1322 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
1323 if (mp->mnt_kern_flag & MNTK_MWAIT)
1324 wakeup(mp);
1325 MNT_IUNLOCK(mp);
1326 if (coveredvp)
1327 VOP_UNLOCK(coveredvp, 0, td);
1328 return (error);
1329 }
1330 mtx_lock(&mountlist_mtx);
1331 TAILQ_REMOVE(&mountlist, mp, mnt_list);
1332 mtx_unlock(&mountlist_mtx);
1333 if (coveredvp != NULL) {
1334 coveredvp->v_mountedhere = NULL;
1335 vput(coveredvp);
1336 }
1337 vfs_event_signal(NULL, VQ_UNMOUNT, 0);
1338 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
1339 vfs_mount_destroy(mp);
1340 return (0);
1341 }
1342
1343 /*
1344 * ---------------------------------------------------------------------
1345 * Mounting of root filesystem
1346 *
1347 */
1348
1349 struct root_hold_token {
1350 const char *who;
1351 LIST_ENTRY(root_hold_token) list;
1352 };
1353
1354 static LIST_HEAD(, root_hold_token) root_holds =
1355 LIST_HEAD_INITIALIZER(&root_holds);
1356
1357 static int root_mount_complete;
1358
1359 /*
1360 * Hold root mount.
1361 */
1362 struct root_hold_token *
1363 root_mount_hold(const char *identifier)
1364 {
1365 struct root_hold_token *h;
1366
1367 h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
1368 h->who = identifier;
1369 mtx_lock(&mountlist_mtx);
1370 LIST_INSERT_HEAD(&root_holds, h, list);
1371 mtx_unlock(&mountlist_mtx);
1372 return (h);
1373 }
1374
1375 /*
1376 * Release root mount.
1377 */
1378 void
1379 root_mount_rel(struct root_hold_token *h)
1380 {
1381
1382 mtx_lock(&mountlist_mtx);
1383 LIST_REMOVE(h, list);
1384 wakeup(&root_holds);
1385 mtx_unlock(&mountlist_mtx);
1386 free(h, M_DEVBUF);
1387 }
1388
1389 /*
1390 * Wait for all subsystems to release root mount.
1391 */
1392 static void
1393 root_mount_prepare(void)
1394 {
1395 struct root_hold_token *h;
1396
1397 for (;;) {
1398 DROP_GIANT();
1399 g_waitidle();
1400 PICKUP_GIANT();
1401 mtx_lock(&mountlist_mtx);
1402 if (LIST_EMPTY(&root_holds)) {
1403 mtx_unlock(&mountlist_mtx);
1404 break;
1405 }
1406 printf("Root mount waiting for:");
1407 LIST_FOREACH(h, &root_holds, list)
1408 printf(" %s", h->who);
1409 printf("\n");
1410 msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold",
1411 hz);
1412 }
1413 }
1414
1415 /*
1416 * Root was mounted, share the good news.
1417 */
1418 static void
1419 root_mount_done(void)
1420 {
1421
1422 /*
1423 * Use a mutex to prevent the wakeup being missed and waiting for
1424 * an extra 1 second sleep.
1425 */
1426 mtx_lock(&mountlist_mtx);
1427 root_mount_complete = 1;
1428 wakeup(&root_mount_complete);
1429 mtx_unlock(&mountlist_mtx);
1430 }
1431
1432 /*
1433 * Return true if root is already mounted.
1434 */
1435 int
1436 root_mounted(void)
1437 {
1438
1439 /* No mutex is acquired here because int stores are atomic. */
1440 return (root_mount_complete);
1441 }
1442
1443 /*
1444 * Wait until root is mounted.
1445 */
1446 void
1447 root_mount_wait(void)
1448 {
1449
1450 /*
1451 * Panic on an obvious deadlock - the function can't be called from
1452 * a thread which is doing the whole SYSINIT stuff.
1453 */
1454 KASSERT(curthread->td_proc->p_pid != 0,
1455 ("root_mount_wait: cannot be called from the swapper thread"));
1456 mtx_lock(&mountlist_mtx);
1457 while (!root_mount_complete) {
1458 msleep(&root_mount_complete, &mountlist_mtx, PZERO, "rootwait",
1459 hz);
1460 }
1461 mtx_unlock(&mountlist_mtx);
1462 }
1463
1464 static void
1465 set_rootvnode(struct thread *td)
1466 {
1467 struct proc *p;
1468
1469 if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode, td))
1470 panic("Cannot find root vnode");
1471
1472 VOP_UNLOCK(rootvnode, 0, td);
1473
1474 p = td->td_proc;
1475 FILEDESC_XLOCK(p->p_fd);
1476
1477 if (p->p_fd->fd_cdir != NULL)
1478 vrele(p->p_fd->fd_cdir);
1479 p->p_fd->fd_cdir = rootvnode;
1480 VREF(rootvnode);
1481
1482 if (p->p_fd->fd_rdir != NULL)
1483 vrele(p->p_fd->fd_rdir);
1484 p->p_fd->fd_rdir = rootvnode;
1485 VREF(rootvnode);
1486
1487 FILEDESC_XUNLOCK(p->p_fd);
1488
1489 EVENTHANDLER_INVOKE(mountroot);
1490 }
1491
1492 /*
1493 * Mount /devfs as our root filesystem, but do not put it on the mountlist
1494 * yet. Create a /dev -> / symlink so that absolute pathnames will lookup.
1495 */
1496
1497 static void
1498 devfs_first(void)
1499 {
1500 struct thread *td = curthread;
1501 struct vfsoptlist *opts;
1502 struct vfsconf *vfsp;
1503 struct mount *mp = NULL;
1504 int error;
1505
1506 vfsp = vfs_byname("devfs");
1507 KASSERT(vfsp != NULL, ("Could not find devfs by name"));
1508 if (vfsp == NULL)
1509 return;
1510
1511 mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td);
1512
1513 error = VFS_MOUNT(mp, td);
1514 KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
1515 if (error)
1516 return;
1517
1518 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
1519 TAILQ_INIT(opts);
1520 mp->mnt_opt = opts;
1521
1522 mtx_lock(&mountlist_mtx);
1523 TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
1524 mtx_unlock(&mountlist_mtx);
1525
1526 set_rootvnode(td);
1527
1528 error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);
1529 if (error)
1530 printf("kern_symlink /dev -> / returns %d\n", error);
1531 }
1532
1533 /*
1534 * Surgically move our devfs to be mounted on /dev.
1535 */
1536
1537 static void
1538 devfs_fixup(struct thread *td)
1539 {
1540 struct nameidata nd;
1541 int error;
1542 struct vnode *vp, *dvp;
1543 struct mount *mp;
1544
1545 /* Remove our devfs mount from the mountlist and purge the cache */
1546 mtx_lock(&mountlist_mtx);
1547 mp = TAILQ_FIRST(&mountlist);
1548 TAILQ_REMOVE(&mountlist, mp, mnt_list);
1549 mtx_unlock(&mountlist_mtx);
1550 cache_purgevfs(mp);
1551
1552 VFS_ROOT(mp, LK_EXCLUSIVE, &dvp, td);
1553 VI_LOCK(dvp);
1554 dvp->v_iflag &= ~VI_MOUNT;
1555 VI_UNLOCK(dvp);
1556 dvp->v_mountedhere = NULL;
1557
1558 /* Set up the real rootvnode, and purge the cache */
1559 TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL;
1560 set_rootvnode(td);
1561 cache_purgevfs(rootvnode->v_mount);
1562
1563 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
1564 error = namei(&nd);
1565 if (error) {
1566 printf("Lookup of /dev for devfs, error: %d\n", error);
1567 return;
1568 }
1569 NDFREE(&nd, NDF_ONLY_PNBUF);
1570 vp = nd.ni_vp;
1571 if (vp->v_type != VDIR) {
1572 vput(vp);
1573 }
1574 error = vinvalbuf(vp, V_SAVE, td, 0, 0);
1575 if (error) {
1576 vput(vp);
1577 }
1578 cache_purge(vp);
1579 mp->mnt_vnodecovered = vp;
1580 vp->v_mountedhere = mp;
1581 mtx_lock(&mountlist_mtx);
1582 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1583 mtx_unlock(&mountlist_mtx);
1584 VOP_UNLOCK(vp, 0, td);
1585 vput(dvp);
1586 vfs_unbusy(mp, td);
1587
1588 /* Unlink the no longer needed /dev/dev -> / symlink */
1589 kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
1590 }
1591
1592 /*
1593 * Report errors during filesystem mounting.
1594 */
1595 void
1596 vfs_mount_error(struct mount *mp, const char *fmt, ...)
1597 {
1598 struct vfsoptlist *moptlist = mp->mnt_optnew;
1599 va_list ap;
1600 int error, len;
1601 char *errmsg;
1602
1603 error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len);
1604 if (error || errmsg == NULL || len <= 0)
1605 return;
1606
1607 va_start(ap, fmt);
1608 vsnprintf(errmsg, (size_t)len, fmt, ap);
1609 va_end(ap);
1610 }
1611
1612 /*
1613 * Find and mount the root filesystem
1614 */
1615 void
1616 vfs_mountroot(void)
1617 {
1618 char *cp;
1619 int error, i, asked = 0;
1620
1621 root_mount_prepare();
1622
1623 mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount),
1624 NULL, NULL, mount_init, mount_fini,
1625 UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1626 devfs_first();
1627
1628 /*
1629 * We are booted with instructions to prompt for the root filesystem.
1630 */
1631 if (boothowto & RB_ASKNAME) {
1632 if (!vfs_mountroot_ask())
1633 goto mounted;
1634 asked = 1;
1635 }
1636
1637 /*
1638 * The root filesystem information is compiled in, and we are
1639 * booted with instructions to use it.
1640 */
1641 if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) {
1642 if (!vfs_mountroot_try(ctrootdevname))
1643 goto mounted;
1644 ctrootdevname = NULL;
1645 }
1646
1647 /*
1648 * We've been given the generic "use CDROM as root" flag. This is
1649 * necessary because one media may be used in many different
1650 * devices, so we need to search for them.
1651 */
1652 if (boothowto & RB_CDROM) {
1653 for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
1654 if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
1655 goto mounted;
1656 }
1657 }
1658
1659 /*
1660 * Try to use the value read by the loader from /etc/fstab, or
1661 * supplied via some other means. This is the preferred
1662 * mechanism.
1663 */
1664 cp = getenv("vfs.root.mountfrom");
1665 if (cp != NULL) {
1666 error = vfs_mountroot_try(cp);
1667 freeenv(cp);
1668 if (!error)
1669 goto mounted;
1670 }
1671
1672 /*
1673 * Try values that may have been computed by code during boot
1674 */
1675 if (!vfs_mountroot_try(rootdevnames[0]))
1676 goto mounted;
1677 if (!vfs_mountroot_try(rootdevnames[1]))
1678 goto mounted;
1679
1680 /*
1681 * If we (still) have a compiled-in default, try it.
1682 */
1683 if (ctrootdevname != NULL)
1684 if (!vfs_mountroot_try(ctrootdevname))
1685 goto mounted;
1686 /*
1687 * Everything so far has failed, prompt on the console if we haven't
1688 * already tried that.
1689 */
1690 if (!asked)
1691 if (!vfs_mountroot_ask())
1692 goto mounted;
1693
1694 panic("Root mount failed, startup aborted.");
1695
1696 mounted:
1697 root_mount_done();
1698 }
1699
1700 /*
1701 * Mount (mountfrom) as the root filesystem.
1702 */
1703 static int
1704 vfs_mountroot_try(const char *mountfrom)
1705 {
1706 struct mount *mp;
1707 char *vfsname, *path;
1708 time_t timebase;
1709 int error;
1710 char patt[32];
1711
1712 vfsname = NULL;
1713 path = NULL;
1714 mp = NULL;
1715 error = EINVAL;
1716
1717 if (mountfrom == NULL)
1718 return (error); /* don't complain */
1719 printf("Trying to mount root from %s\n", mountfrom);
1720
1721 /* parse vfs name and path */
1722 vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
1723 path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
1724 vfsname[0] = path[0] = 0;
1725 sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
1726 if (sscanf(mountfrom, patt, vfsname, path) < 1)
1727 goto out;
1728
1729 if (path[0] == '\0')
1730 strcpy(path, ROOTNAME);
1731
1732 error = kernel_vmount(
1733 MNT_RDONLY | MNT_ROOTFS,
1734 "fstype", vfsname,
1735 "fspath", "/",
1736 "from", path,
1737 NULL);
1738 if (error == 0) {
1739 /*
1740 * We mount devfs prior to mounting the / FS, so the first
1741 * entry will typically be devfs.
1742 */
1743 mp = TAILQ_FIRST(&mountlist);
1744 KASSERT(mp != NULL, ("%s: mountlist is empty", __func__));
1745
1746 /*
1747 * Iterate over all currently mounted file systems and use
1748 * the time stamp found to check and/or initialize the RTC.
1749 * Typically devfs has no time stamp and the only other FS
1750 * is the actual / FS.
1751 * Call inittodr() only once and pass it the largest of the
1752 * timestamps we encounter.
1753 */
1754 timebase = 0;
1755 do {
1756 if (mp->mnt_time > timebase)
1757 timebase = mp->mnt_time;
1758 mp = TAILQ_NEXT(mp, mnt_list);
1759 } while (mp != NULL);
1760 inittodr(timebase);
1761
1762 devfs_fixup(curthread);
1763 }
1764 out:
1765 free(path, M_MOUNT);
1766 free(vfsname, M_MOUNT);
1767 return (error);
1768 }
1769
1770 /*
1771 * ---------------------------------------------------------------------
1772 * Interactive root filesystem selection code.
1773 */
1774
1775 static int
1776 vfs_mountroot_ask(void)
1777 {
1778 char name[128];
1779
1780 for(;;) {
1781 printf("\nManual root filesystem specification:\n");
1782 printf(" <fstype>:<device> Mount <device> using filesystem <fstype>\n");
1783 #if defined(__amd64__) || defined(__i386__) || defined(__ia64__)
1784 printf(" eg. ufs:da0s1a\n");
1785 #else
1786 printf(" eg. ufs:/dev/da0a\n");
1787 #endif
1788 printf(" ? List valid disk boot devices\n");
1789 printf(" <empty line> Abort manual input\n");
1790 printf("\nmountroot> ");
1791 gets(name, sizeof(name), 1);
1792 if (name[0] == '\0')
1793 return (1);
1794 if (name[0] == '?') {
1795 printf("\nList of GEOM managed disk devices:\n ");
1796 g_dev_print();
1797 continue;
1798 }
1799 if (!vfs_mountroot_try(name))
1800 return (0);
1801 }
1802 }
1803
1804 /*
1805 * ---------------------------------------------------------------------
1806 * Functions for querying mount options/arguments from filesystems.
1807 */
1808
1809 /*
1810 * Check that no unknown options are given
1811 */
1812 int
1813 vfs_filteropt(struct vfsoptlist *opts, const char **legal)
1814 {
1815 struct vfsopt *opt;
1816 char errmsg[255];
1817 const char **t, *p, *q;
1818 int ret = 0;
1819
1820 TAILQ_FOREACH(opt, opts, link) {
1821 p = opt->name;
1822 q = NULL;
1823 if (p[0] == 'n' && p[1] == 'o')
1824 q = p + 2;
1825 for(t = global_opts; *t != NULL; t++) {
1826 if (strcmp(*t, p) == 0)
1827 break;
1828 if (q != NULL) {
1829 if (strcmp(*t, q) == 0)
1830 break;
1831 }
1832 }
1833 if (*t != NULL)
1834 continue;
1835 for(t = legal; *t != NULL; t++) {
1836 if (strcmp(*t, p) == 0)
1837 break;
1838 if (q != NULL) {
1839 if (strcmp(*t, q) == 0)
1840 break;
1841 }
1842 }
1843 if (*t != NULL)
1844 continue;
1845 snprintf(errmsg, sizeof(errmsg),
1846 "mount option <%s> is unknown", p);
1847 printf("%s\n", errmsg);
1848 ret = EINVAL;
1849 }
1850 if (ret != 0) {
1851 TAILQ_FOREACH(opt, opts, link) {
1852 if (strcmp(opt->name, "errmsg") == 0) {
1853 strncpy((char *)opt->value, errmsg, opt->len);
1854 }
1855 }
1856 }
1857 return (ret);
1858 }
1859
1860 /*
1861 * Get a mount option by its name.
1862 *
1863 * Return 0 if the option was found, ENOENT otherwise.
1864 * If len is non-NULL it will be filled with the length
1865 * of the option. If buf is non-NULL, it will be filled
1866 * with the address of the option.
1867 */
1868 int
1869 vfs_getopt(opts, name, buf, len)
1870 struct vfsoptlist *opts;
1871 const char *name;
1872 void **buf;
1873 int *len;
1874 {
1875 struct vfsopt *opt;
1876
1877 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
1878
1879 TAILQ_FOREACH(opt, opts, link) {
1880 if (strcmp(name, opt->name) == 0) {
1881 if (len != NULL)
1882 *len = opt->len;
1883 if (buf != NULL)
1884 *buf = opt->value;
1885 return (0);
1886 }
1887 }
1888 return (ENOENT);
1889 }
1890
1891 static int
1892 vfs_getopt_pos(struct vfsoptlist *opts, const char *name)
1893 {
1894 struct vfsopt *opt;
1895 int i;
1896
1897 if (opts == NULL)
1898 return (-1);
1899
1900 i = 0;
1901 TAILQ_FOREACH(opt, opts, link) {
1902 if (strcmp(name, opt->name) == 0)
1903 return (i);
1904 ++i;
1905 }
1906 return (-1);
1907 }
1908
1909 char *
1910 vfs_getopts(struct vfsoptlist *opts, const char *name, int *error)
1911 {
1912 struct vfsopt *opt;
1913
1914 *error = 0;
1915 TAILQ_FOREACH(opt, opts, link) {
1916 if (strcmp(name, opt->name) != 0)
1917 continue;
1918 if (((char *)opt->value)[opt->len - 1] != '\0') {
1919 *error = EINVAL;
1920 return (NULL);
1921 }
1922 return (opt->value);
1923 }
1924 *error = ENOENT;
1925 return (NULL);
1926 }
1927
1928 int
1929 vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val)
1930 {
1931 struct vfsopt *opt;
1932
1933 TAILQ_FOREACH(opt, opts, link) {
1934 if (strcmp(name, opt->name) == 0) {
1935 if (w != NULL)
1936 *w |= val;
1937 return (1);
1938 }
1939 }
1940 if (w != NULL)
1941 *w &= ~val;
1942 return (0);
1943 }
1944
1945 int
1946 vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...)
1947 {
1948 va_list ap;
1949 struct vfsopt *opt;
1950 int ret;
1951
1952 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
1953
1954 TAILQ_FOREACH(opt, opts, link) {
1955 if (strcmp(name, opt->name) != 0)
1956 continue;
1957 if (opt->len == 0 || opt->value == NULL)
1958 return (0);
1959 if (((char *)opt->value)[opt->len - 1] != '\0')
1960 return (0);
1961 va_start(ap, fmt);
1962 ret = vsscanf(opt->value, fmt, ap);
1963 va_end(ap);
1964 return (ret);
1965 }
1966 return (0);
1967 }
1968
1969 /*
1970 * Find and copy a mount option.
1971 *
1972 * The size of the buffer has to be specified
1973 * in len, if it is not the same length as the
1974 * mount option, EINVAL is returned.
1975 * Returns ENOENT if the option is not found.
1976 */
1977 int
1978 vfs_copyopt(opts, name, dest, len)
1979 struct vfsoptlist *opts;
1980 const char *name;
1981 void *dest;
1982 int len;
1983 {
1984 struct vfsopt *opt;
1985
1986 KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
1987
1988 TAILQ_FOREACH(opt, opts, link) {
1989 if (strcmp(name, opt->name) == 0) {
1990 if (len != opt->len)
1991 return (EINVAL);
1992 bcopy(opt->value, dest, opt->len);
1993 return (0);
1994 }
1995 }
1996 return (ENOENT);
1997 }
1998
1999 /*
2000 * This is a helper function for filesystems to traverse their
2001 * vnodes. See MNT_VNODE_FOREACH() in sys/mount.h
2002 */
2003
2004 struct vnode *
2005 __mnt_vnode_next(struct vnode **mvp, struct mount *mp)
2006 {
2007 struct vnode *vp;
2008
2009 mtx_assert(MNT_MTX(mp), MA_OWNED);
2010
2011 KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
2012 if ((*mvp)->v_yield++ == 500) {
2013 MNT_IUNLOCK(mp);
2014 (*mvp)->v_yield = 0;
2015 uio_yield();
2016 MNT_ILOCK(mp);
2017 }
2018 vp = TAILQ_NEXT(*mvp, v_nmntvnodes);
2019 while (vp != NULL && vp->v_type == VMARKER)
2020 vp = TAILQ_NEXT(vp, v_nmntvnodes);
2021
2022 /* Check if we are done */
2023 if (vp == NULL) {
2024 __mnt_vnode_markerfree(mvp, mp);
2025 return (NULL);
2026 }
2027 TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
2028 TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
2029 return (vp);
2030 }
2031
2032 struct vnode *
2033 __mnt_vnode_first(struct vnode **mvp, struct mount *mp)
2034 {
2035 struct vnode *vp;
2036
2037 mtx_assert(MNT_MTX(mp), MA_OWNED);
2038
2039 vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
2040 while (vp != NULL && vp->v_type == VMARKER)
2041 vp = TAILQ_NEXT(vp, v_nmntvnodes);
2042
2043 /* Check if we are done */
2044 if (vp == NULL) {
2045 *mvp = NULL;
2046 return (NULL);
2047 }
2048 mp->mnt_holdcnt++;
2049 MNT_IUNLOCK(mp);
2050 *mvp = (struct vnode *) malloc(sizeof(struct vnode),
2051 M_VNODE_MARKER,
2052 M_WAITOK | M_ZERO);
2053 MNT_ILOCK(mp);
2054 (*mvp)->v_type = VMARKER;
2055
2056 vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
2057 while (vp != NULL && vp->v_type == VMARKER)
2058 vp = TAILQ_NEXT(vp, v_nmntvnodes);
2059
2060 /* Check if we are done */
2061 if (vp == NULL) {
2062 MNT_IUNLOCK(mp);
2063 free(*mvp, M_VNODE_MARKER);
2064 MNT_ILOCK(mp);
2065 *mvp = NULL;
2066 mp->mnt_holdcnt--;
2067 if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0)
2068 wakeup(&mp->mnt_holdcnt);
2069 return (NULL);
2070 }
2071 mp->mnt_markercnt++;
2072 (*mvp)->v_mount = mp;
2073 TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
2074 return (vp);
2075 }
2076
2077
2078 void
2079 __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp)
2080 {
2081
2082 if (*mvp == NULL)
2083 return;
2084
2085 mtx_assert(MNT_MTX(mp), MA_OWNED);
2086
2087 KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
2088 TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
2089 MNT_IUNLOCK(mp);
2090 free(*mvp, M_VNODE_MARKER);
2091 MNT_ILOCK(mp);
2092 *mvp = NULL;
2093
2094 mp->mnt_markercnt--;
2095 mp->mnt_holdcnt--;
2096 if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0)
2097 wakeup(&mp->mnt_holdcnt);
2098 }
2099
2100
2101 int
2102 __vfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
2103 {
2104 int error;
2105
2106 error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat, td);
2107 if (sbp != &mp->mnt_stat)
2108 *sbp = mp->mnt_stat;
2109 return (error);
2110 }
2111
2112 void
2113 vfs_mountedfrom(struct mount *mp, const char *from)
2114 {
2115
2116 bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname);
2117 strlcpy(mp->mnt_stat.f_mntfromname, from,
2118 sizeof mp->mnt_stat.f_mntfromname);
2119 }
2120
2121 /*
2122 * ---------------------------------------------------------------------
2123 * This is the api for building mount args and mounting filesystems from
2124 * inside the kernel.
2125 *
2126 * The API works by accumulation of individual args. First error is
2127 * latched.
2128 *
2129 * XXX: should be documented in new manpage kernel_mount(9)
2130 */
2131
2132 /* A memory allocation which must be freed when we are done */
2133 struct mntaarg {
2134 SLIST_ENTRY(mntaarg) next;
2135 };
2136
2137 /* The header for the mount arguments */
2138 struct mntarg {
2139 struct iovec *v;
2140 int len;
2141 int error;
2142 SLIST_HEAD(, mntaarg) list;
2143 };
2144
2145 /*
2146 * Add a boolean argument.
2147 *
2148 * flag is the boolean value.
2149 * name must start with "no".
2150 */
2151 struct mntarg *
2152 mount_argb(struct mntarg *ma, int flag, const char *name)
2153 {
2154
2155 KASSERT(name[0] == 'n' && name[1] == 'o',
2156 ("mount_argb(...,%s): name must start with 'no'", name));
2157
2158 return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0));
2159 }
2160
2161 /*
2162 * Add an argument printf style
2163 */
2164 struct mntarg *
2165 mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...)
2166 {
2167 va_list ap;
2168 struct mntaarg *maa;
2169 struct sbuf *sb;
2170 int len;
2171
2172 if (ma == NULL) {
2173 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
2174 SLIST_INIT(&ma->list);
2175 }
2176 if (ma->error)
2177 return (ma);
2178
2179 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
2180 M_MOUNT, M_WAITOK);
2181 ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
2182 ma->v[ma->len].iov_len = strlen(name) + 1;
2183 ma->len++;
2184
2185 sb = sbuf_new_auto();
2186 va_start(ap, fmt);
2187 sbuf_vprintf(sb, fmt, ap);
2188 va_end(ap);
2189 sbuf_finish(sb);
2190 len = sbuf_len(sb) + 1;
2191 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
2192 SLIST_INSERT_HEAD(&ma->list, maa, next);
2193 bcopy(sbuf_data(sb), maa + 1, len);
2194 sbuf_delete(sb);
2195
2196 ma->v[ma->len].iov_base = maa + 1;
2197 ma->v[ma->len].iov_len = len;
2198 ma->len++;
2199
2200 return (ma);
2201 }
2202
2203 /*
2204 * Add an argument which is a userland string.
2205 */
2206 struct mntarg *
2207 mount_argsu(struct mntarg *ma, const char *name, const void *val, int len)
2208 {
2209 struct mntaarg *maa;
2210 char *tbuf;
2211
2212 if (val == NULL)
2213 return (ma);
2214 if (ma == NULL) {
2215 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
2216 SLIST_INIT(&ma->list);
2217 }
2218 if (ma->error)
2219 return (ma);
2220 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
2221 SLIST_INSERT_HEAD(&ma->list, maa, next);
2222 tbuf = (void *)(maa + 1);
2223 ma->error = copyinstr(val, tbuf, len, NULL);
2224 return (mount_arg(ma, name, tbuf, -1));
2225 }
2226
2227 /*
2228 * Plain argument.
2229 *
2230 * If length is -1, use printf.
2231 */
2232 struct mntarg *
2233 mount_arg(struct mntarg *ma, const char *name, const void *val, int len)
2234 {
2235
2236 if (ma == NULL) {
2237 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
2238 SLIST_INIT(&ma->list);
2239 }
2240 if (ma->error)
2241 return (ma);
2242
2243 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
2244 M_MOUNT, M_WAITOK);
2245 ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
2246 ma->v[ma->len].iov_len = strlen(name) + 1;
2247 ma->len++;
2248
2249 ma->v[ma->len].iov_base = (void *)(uintptr_t)val;
2250 if (len < 0)
2251 ma->v[ma->len].iov_len = strlen(val) + 1;
2252 else
2253 ma->v[ma->len].iov_len = len;
2254 ma->len++;
2255 return (ma);
2256 }
2257
2258 /*
2259 * Free a mntarg structure
2260 */
2261 static void
2262 free_mntarg(struct mntarg *ma)
2263 {
2264 struct mntaarg *maa;
2265
2266 while (!SLIST_EMPTY(&ma->list)) {
2267 maa = SLIST_FIRST(&ma->list);
2268 SLIST_REMOVE_HEAD(&ma->list, next);
2269 free(maa, M_MOUNT);
2270 }
2271 free(ma->v, M_MOUNT);
2272 free(ma, M_MOUNT);
2273 }
2274
2275 /*
2276 * Mount a filesystem
2277 */
2278 int
2279 kernel_mount(struct mntarg *ma, int flags)
2280 {
2281 struct uio auio;
2282 int error;
2283
2284 KASSERT(ma != NULL, ("kernel_mount NULL ma"));
2285 KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v"));
2286 KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len));
2287
2288 auio.uio_iov = ma->v;
2289 auio.uio_iovcnt = ma->len;
2290 auio.uio_segflg = UIO_SYSSPACE;
2291
2292 error = ma->error;
2293 if (!error)
2294 error = vfs_donmount(curthread, flags, &auio);
2295 free_mntarg(ma);
2296 return (error);
2297 }
2298
2299 /*
2300 * A printflike function to mount a filesystem.
2301 */
2302 int
2303 kernel_vmount(int flags, ...)
2304 {
2305 struct mntarg *ma = NULL;
2306 va_list ap;
2307 const char *cp;
2308 const void *vp;
2309 int error;
2310
2311 va_start(ap, flags);
2312 for (;;) {
2313 cp = va_arg(ap, const char *);
2314 if (cp == NULL)
2315 break;
2316 vp = va_arg(ap, const void *);
2317 ma = mount_arg(ma, cp, vp, -1);
2318 }
2319 va_end(ap);
2320
2321 error = kernel_mount(ma, flags);
2322 return (error);
2323 }
Cache object: 4707be5c7fcbccb7d1d312bb8597b47b
|