FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_mount.c
1 /*-
2 * Copyright (c) 1999-2004 Poul-Henning Kamp
3 * Copyright (c) 1999 Michael Smith
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39
40 #include <sys/param.h>
41 #include <sys/conf.h>
42 #include <sys/clock.h>
43 #include <sys/jail.h>
44 #include <sys/kernel.h>
45 #include <sys/libkern.h>
46 #include <sys/malloc.h>
47 #include <sys/mount.h>
48 #include <sys/mutex.h>
49 #include <sys/namei.h>
50 #include <sys/priv.h>
51 #include <sys/proc.h>
52 #include <sys/filedesc.h>
53 #include <sys/reboot.h>
54 #include <sys/syscallsubr.h>
55 #include <sys/sysproto.h>
56 #include <sys/sx.h>
57 #include <sys/sysctl.h>
58 #include <sys/sysent.h>
59 #include <sys/systm.h>
60 #include <sys/vnode.h>
61 #include <vm/uma.h>
62
63 #include <geom/geom.h>
64
65 #include <machine/stdarg.h>
66
67 #include <security/audit/audit.h>
68 #include <security/mac/mac_framework.h>
69
70 #include "opt_rootdevname.h"
71 #include "opt_ddb.h"
72 #include "opt_mac.h"
73
74 #ifdef DDB
75 #include <ddb/ddb.h>
76 #endif
77
78 #define ROOTNAME "root_device"
79 #define VFS_MOUNTARG_SIZE_MAX (1024 * 64)
80
81 static int vfs_domount(struct thread *td, const char *fstype,
82 char *fspath, int fsflags, void *fsdata);
83 static int vfs_mountroot_ask(void);
84 static int vfs_mountroot_try(const char *mountfrom);
85 static void free_mntarg(struct mntarg *ma);
86 static int vfs_getopt_pos(struct vfsoptlist *opts, const char *name);
87
88 static int usermount = 0;
89 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
90 "Unprivileged users may mount and unmount file systems");
91
92 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
93 MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker");
94 static uma_zone_t mount_zone;
95
96 /* List of mounted filesystems. */
97 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
98
99 /* For any iteration/modification of mountlist */
100 struct mtx mountlist_mtx;
101 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF);
102
103 TAILQ_HEAD(vfsoptlist, vfsopt);
104 struct vfsopt {
105 TAILQ_ENTRY(vfsopt) link;
106 char *name;
107 void *value;
108 int len;
109 };
110
111 /*
112 * The vnode of the system's root (/ in the filesystem, without chroot
113 * active.)
114 */
115 struct vnode *rootvnode;
116
117 /*
118 * The root filesystem is detailed in the kernel environment variable
119 * vfs.root.mountfrom, which is expected to be in the general format
120 *
121 * <vfsname>:[<path>]
122 * vfsname := the name of a VFS known to the kernel and capable
123 * of being mounted as root
124 * path := disk device name or other data used by the filesystem
125 * to locate its physical store
126 */
127
128 /*
129 * Global opts, taken by all filesystems
130 */
131 static const char *global_opts[] = {
132 "errmsg",
133 "fstype",
134 "fspath",
135 "ro",
136 "rw",
137 "nosuid",
138 "noexec",
139 NULL
140 };
141
142 /*
143 * The root specifiers we will try if RB_CDROM is specified.
144 */
145 static char *cdrom_rootdevnames[] = {
146 "cd9660:cd0",
147 "cd9660:acd0",
148 NULL
149 };
150
151 /* legacy find-root code */
152 char *rootdevnames[2] = {NULL, NULL};
153 #ifndef ROOTDEVNAME
154 # define ROOTDEVNAME NULL
155 #endif
156 static const char *ctrootdevname = ROOTDEVNAME;
157
158 /*
159 * ---------------------------------------------------------------------
160 * Functions for building and sanitizing the mount options
161 */
162
163 /* Remove one mount option. */
164 static void
165 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
166 {
167
168 TAILQ_REMOVE(opts, opt, link);
169 free(opt->name, M_MOUNT);
170 if (opt->value != NULL)
171 free(opt->value, M_MOUNT);
172 #ifdef INVARIANTS
173 else if (opt->len != 0)
174 panic("%s: mount option with NULL value but length != 0",
175 __func__);
176 #endif
177 free(opt, M_MOUNT);
178 }
179
180 /* Release all resources related to the mount options. */
181 void
182 vfs_freeopts(struct vfsoptlist *opts)
183 {
184 struct vfsopt *opt;
185
186 while (!TAILQ_EMPTY(opts)) {
187 opt = TAILQ_FIRST(opts);
188 vfs_freeopt(opts, opt);
189 }
190 free(opts, M_MOUNT);
191 }
192
193 void
194 vfs_deleteopt(struct vfsoptlist *opts, const char *name)
195 {
196 struct vfsopt *opt, *temp;
197
198 if (opts == NULL)
199 return;
200 TAILQ_FOREACH_SAFE(opt, opts, link, temp) {
201 if (strcmp(opt->name, name) == 0)
202 vfs_freeopt(opts, opt);
203 }
204 }
205
206 /*
207 * Check if options are equal (with or without the "no" prefix).
208 */
209 static int
210 vfs_equalopts(const char *opt1, const char *opt2)
211 {
212
213 /* "opt" vs. "opt" or "noopt" vs. "noopt" */
214 if (strcmp(opt1, opt2) == 0)
215 return (1);
216 /* "noopt" vs. "opt" */
217 if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
218 return (1);
219 /* "opt" vs. "noopt" */
220 if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
221 return (1);
222 return (0);
223 }
224
225 /*
226 * If a mount option is specified several times,
227 * (with or without the "no" prefix) only keep
228 * the last occurence of it.
229 */
230 static void
231 vfs_sanitizeopts(struct vfsoptlist *opts)
232 {
233 struct vfsopt *opt, *opt2, *tmp;
234
235 TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
236 opt2 = TAILQ_PREV(opt, vfsoptlist, link);
237 while (opt2 != NULL) {
238 if (vfs_equalopts(opt->name, opt2->name)) {
239 tmp = TAILQ_PREV(opt2, vfsoptlist, link);
240 vfs_freeopt(opts, opt2);
241 opt2 = tmp;
242 } else {
243 opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
244 }
245 }
246 }
247 }
248
249 /*
250 * Build a linked list of mount options from a struct uio.
251 */
252 static int
253 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
254 {
255 struct vfsoptlist *opts;
256 struct vfsopt *opt;
257 size_t memused;
258 unsigned int i, iovcnt;
259 int error, namelen, optlen;
260
261 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
262 TAILQ_INIT(opts);
263 memused = 0;
264 iovcnt = auio->uio_iovcnt;
265 for (i = 0; i < iovcnt; i += 2) {
266 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
267 namelen = auio->uio_iov[i].iov_len;
268 optlen = auio->uio_iov[i + 1].iov_len;
269 opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
270 opt->value = NULL;
271 opt->len = 0;
272
273 /*
274 * Do this early, so jumps to "bad" will free the current
275 * option.
276 */
277 TAILQ_INSERT_TAIL(opts, opt, link);
278 memused += sizeof(struct vfsopt) + optlen + namelen;
279
280 /*
281 * Avoid consuming too much memory, and attempts to overflow
282 * memused.
283 */
284 if (memused > VFS_MOUNTARG_SIZE_MAX ||
285 optlen > VFS_MOUNTARG_SIZE_MAX ||
286 namelen > VFS_MOUNTARG_SIZE_MAX) {
287 error = EINVAL;
288 goto bad;
289 }
290
291 if (auio->uio_segflg == UIO_SYSSPACE) {
292 bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
293 } else {
294 error = copyin(auio->uio_iov[i].iov_base, opt->name,
295 namelen);
296 if (error)
297 goto bad;
298 }
299 /* Ensure names are null-terminated strings. */
300 if (opt->name[namelen - 1] != '\0') {
301 error = EINVAL;
302 goto bad;
303 }
304 if (optlen != 0) {
305 opt->len = optlen;
306 opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
307 if (auio->uio_segflg == UIO_SYSSPACE) {
308 bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
309 optlen);
310 } else {
311 error = copyin(auio->uio_iov[i + 1].iov_base,
312 opt->value, optlen);
313 if (error)
314 goto bad;
315 }
316 }
317 }
318 vfs_sanitizeopts(opts);
319 *options = opts;
320 return (0);
321 bad:
322 vfs_freeopts(opts);
323 return (error);
324 }
325
326 /*
327 * Merge the old mount options with the new ones passed
328 * in the MNT_UPDATE case.
329 */
330 static void
331 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
332 {
333 struct vfsopt *opt, *opt2, *new;
334
335 TAILQ_FOREACH(opt, opts, link) {
336 /*
337 * Check that this option hasn't been redefined
338 * nor cancelled with a "no" mount option.
339 */
340 opt2 = TAILQ_FIRST(toopts);
341 while (opt2 != NULL) {
342 if (strcmp(opt2->name, opt->name) == 0)
343 goto next;
344 if (strncmp(opt2->name, "no", 2) == 0 &&
345 strcmp(opt2->name + 2, opt->name) == 0) {
346 vfs_freeopt(toopts, opt2);
347 goto next;
348 }
349 opt2 = TAILQ_NEXT(opt2, link);
350 }
351 /* We want this option, duplicate it. */
352 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
353 new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
354 strcpy(new->name, opt->name);
355 if (opt->len != 0) {
356 new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
357 bcopy(opt->value, new->value, opt->len);
358 } else {
359 new->value = NULL;
360 }
361 new->len = opt->len;
362 TAILQ_INSERT_TAIL(toopts, new, link);
363 next:
364 continue;
365 }
366 }
367
368 /*
369 * Mount a filesystem.
370 */
371 int
372 nmount(td, uap)
373 struct thread *td;
374 struct nmount_args /* {
375 struct iovec *iovp;
376 unsigned int iovcnt;
377 int flags;
378 } */ *uap;
379 {
380 struct uio *auio;
381 struct iovec *iov;
382 unsigned int i;
383 int error;
384 u_int iovcnt;
385
386 AUDIT_ARG(fflags, uap->flags);
387
388 /*
389 * Filter out MNT_ROOTFS. We do not want clients of nmount() in
390 * userspace to set this flag, but we must filter it out if we want
391 * MNT_UPDATE on the root file system to work.
392 * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
393 */
394 uap->flags &= ~MNT_ROOTFS;
395
396 iovcnt = uap->iovcnt;
397 /*
398 * Check that we have an even number of iovec's
399 * and that we have at least two options.
400 */
401 if ((iovcnt & 1) || (iovcnt < 4))
402 return (EINVAL);
403
404 error = copyinuio(uap->iovp, iovcnt, &auio);
405 if (error)
406 return (error);
407 iov = auio->uio_iov;
408 for (i = 0; i < iovcnt; i++) {
409 if (iov->iov_len > MMAXOPTIONLEN) {
410 free(auio, M_IOV);
411 return (EINVAL);
412 }
413 iov++;
414 }
415 error = vfs_donmount(td, uap->flags, auio);
416
417 free(auio, M_IOV);
418 return (error);
419 }
420
421 /*
422 * ---------------------------------------------------------------------
423 * Various utility functions
424 */
425
426 void
427 vfs_ref(struct mount *mp)
428 {
429
430 MNT_ILOCK(mp);
431 MNT_REF(mp);
432 MNT_IUNLOCK(mp);
433 }
434
435 void
436 vfs_rel(struct mount *mp)
437 {
438
439 MNT_ILOCK(mp);
440 MNT_REL(mp);
441 MNT_IUNLOCK(mp);
442 }
443
444 static int
445 mount_init(void *mem, int size, int flags)
446 {
447 struct mount *mp;
448
449 mp = (struct mount *)mem;
450 mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
451 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
452 lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
453 return (0);
454 }
455
456 static void
457 mount_fini(void *mem, int size)
458 {
459 struct mount *mp;
460
461 mp = (struct mount *)mem;
462 lockdestroy(&mp->mnt_explock);
463 lockdestroy(&mp->mnt_lock);
464 mtx_destroy(&mp->mnt_mtx);
465 }
466
467 /*
468 * Allocate and initialize the mount point struct.
469 */
470 struct mount *
471 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp,
472 const char *fspath, struct thread *td)
473 {
474 struct mount *mp;
475
476 mp = uma_zalloc(mount_zone, M_WAITOK);
477 bzero(&mp->mnt_startzero,
478 __rangeof(struct mount, mnt_startzero, mnt_endzero));
479 TAILQ_INIT(&mp->mnt_nvnodelist);
480 mp->mnt_nvnodelistsize = 0;
481 mp->mnt_ref = 0;
482 (void) vfs_busy(mp, LK_NOWAIT, 0, td);
483 mp->mnt_op = vfsp->vfc_vfsops;
484 mp->mnt_vfc = vfsp;
485 vfsp->vfc_refcount++; /* XXX Unlocked */
486 mp->mnt_stat.f_type = vfsp->vfc_typenum;
487 mp->mnt_gen++;
488 strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
489 mp->mnt_vnodecovered = vp;
490 mp->mnt_cred = crdup(td->td_ucred);
491 mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
492 strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
493 mp->mnt_iosize_max = DFLTPHYS;
494 #ifdef MAC
495 mac_init_mount(mp);
496 mac_create_mount(td->td_ucred, mp);
497 #endif
498 arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
499 return (mp);
500 }
501
502 /*
503 * Destroy the mount struct previously allocated by vfs_mount_alloc().
504 */
505 void
506 vfs_mount_destroy(struct mount *mp)
507 {
508 int i;
509
510 MNT_ILOCK(mp);
511 for (i = 0; mp->mnt_ref && i < 3; i++)
512 msleep(mp, MNT_MTX(mp), PVFS, "mntref", hz);
513 /*
514 * This will always cause a 3 second delay in rebooting due to
515 * refs on the root mountpoint that never go away. Most of these
516 * are held by init which never exits.
517 */
518 if (i == 3 && (!rebooting || bootverbose))
519 printf("Mount point %s had %d dangling refs\n",
520 mp->mnt_stat.f_mntonname, mp->mnt_ref);
521 if (mp->mnt_holdcnt != 0) {
522 printf("Waiting for mount point to be unheld\n");
523 while (mp->mnt_holdcnt != 0) {
524 mp->mnt_holdcntwaiters++;
525 msleep(&mp->mnt_holdcnt, MNT_MTX(mp),
526 PZERO, "mntdestroy", 0);
527 mp->mnt_holdcntwaiters--;
528 }
529 printf("mount point unheld\n");
530 }
531 if (mp->mnt_writeopcount > 0) {
532 printf("Waiting for mount point write ops\n");
533 while (mp->mnt_writeopcount > 0) {
534 mp->mnt_kern_flag |= MNTK_SUSPEND;
535 msleep(&mp->mnt_writeopcount,
536 MNT_MTX(mp),
537 PZERO, "mntdestroy2", 0);
538 }
539 printf("mount point write ops completed\n");
540 }
541 if (mp->mnt_secondary_writes > 0) {
542 printf("Waiting for mount point secondary write ops\n");
543 while (mp->mnt_secondary_writes > 0) {
544 mp->mnt_kern_flag |= MNTK_SUSPEND;
545 msleep(&mp->mnt_secondary_writes,
546 MNT_MTX(mp),
547 PZERO, "mntdestroy3", 0);
548 }
549 printf("mount point secondary write ops completed\n");
550 }
551 MNT_IUNLOCK(mp);
552 mp->mnt_vfc->vfc_refcount--;
553 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
554 struct vnode *vp;
555
556 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes)
557 vprint("", vp);
558 panic("unmount: dangling vnode");
559 }
560 MNT_ILOCK(mp);
561 if (mp->mnt_kern_flag & MNTK_MWAIT)
562 wakeup(mp);
563 if (mp->mnt_writeopcount != 0)
564 panic("vfs_mount_destroy: nonzero writeopcount");
565 if (mp->mnt_secondary_writes != 0)
566 panic("vfs_mount_destroy: nonzero secondary_writes");
567 if (mp->mnt_nvnodelistsize != 0)
568 panic("vfs_mount_destroy: nonzero nvnodelistsize");
569 mp->mnt_writeopcount = -1000;
570 mp->mnt_nvnodelistsize = -1000;
571 mp->mnt_secondary_writes = -1000;
572 MNT_IUNLOCK(mp);
573 #ifdef MAC
574 mac_destroy_mount(mp);
575 #endif
576 if (mp->mnt_opt != NULL)
577 vfs_freeopts(mp->mnt_opt);
578 crfree(mp->mnt_cred);
579 uma_zfree(mount_zone, mp);
580 }
581
582 int
583 vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions)
584 {
585 struct vfsoptlist *optlist;
586 struct vfsopt *opt, *noro_opt, *tmp_opt;
587 char *fstype, *fspath, *errmsg;
588 int error, fstypelen, fspathlen, errmsg_len, errmsg_pos;
589 int has_rw, has_noro;
590
591 errmsg = NULL;
592 errmsg_len = 0;
593 errmsg_pos = -1;
594 has_rw = 0;
595 has_noro = 0;
596
597 error = vfs_buildopts(fsoptions, &optlist);
598 if (error)
599 return (error);
600
601 if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0)
602 errmsg_pos = vfs_getopt_pos(optlist, "errmsg");
603
604 /*
605 * We need these two options before the others,
606 * and they are mandatory for any filesystem.
607 * Ensure they are NUL terminated as well.
608 */
609 fstypelen = 0;
610 error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
611 if (error || fstype[fstypelen - 1] != '\0') {
612 error = EINVAL;
613 if (errmsg != NULL)
614 strncpy(errmsg, "Invalid fstype", errmsg_len);
615 goto bail;
616 }
617 fspathlen = 0;
618 error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
619 if (error || fspath[fspathlen - 1] != '\0') {
620 error = EINVAL;
621 if (errmsg != NULL)
622 strncpy(errmsg, "Invalid fspath", errmsg_len);
623 goto bail;
624 }
625
626 /*
627 * We need to see if we have the "update" option
628 * before we call vfs_domount(), since vfs_domount() has special
629 * logic based on MNT_UPDATE. This is very important
630 * when we want to update the root filesystem.
631 */
632 TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) {
633 if (strcmp(opt->name, "update") == 0) {
634 fsflags |= MNT_UPDATE;
635 vfs_freeopt(optlist, opt);
636 }
637 else if (strcmp(opt->name, "async") == 0)
638 fsflags |= MNT_ASYNC;
639 else if (strcmp(opt->name, "force") == 0) {
640 fsflags |= MNT_FORCE;
641 vfs_freeopt(optlist, opt);
642 }
643 else if (strcmp(opt->name, "reload") == 0) {
644 fsflags |= MNT_RELOAD;
645 vfs_freeopt(optlist, opt);
646 }
647 else if (strcmp(opt->name, "multilabel") == 0)
648 fsflags |= MNT_MULTILABEL;
649 else if (strcmp(opt->name, "noasync") == 0)
650 fsflags &= ~MNT_ASYNC;
651 else if (strcmp(opt->name, "noatime") == 0)
652 fsflags |= MNT_NOATIME;
653 else if (strcmp(opt->name, "atime") == 0) {
654 free(opt->name, M_MOUNT);
655 opt->name = strdup("nonoatime", M_MOUNT);
656 }
657 else if (strcmp(opt->name, "noclusterr") == 0)
658 fsflags |= MNT_NOCLUSTERR;
659 else if (strcmp(opt->name, "clusterr") == 0) {
660 free(opt->name, M_MOUNT);
661 opt->name = strdup("nonoclusterr", M_MOUNT);
662 }
663 else if (strcmp(opt->name, "noclusterw") == 0)
664 fsflags |= MNT_NOCLUSTERW;
665 else if (strcmp(opt->name, "clusterw") == 0) {
666 free(opt->name, M_MOUNT);
667 opt->name = strdup("nonoclusterw", M_MOUNT);
668 }
669 else if (strcmp(opt->name, "noexec") == 0)
670 fsflags |= MNT_NOEXEC;
671 else if (strcmp(opt->name, "exec") == 0) {
672 free(opt->name, M_MOUNT);
673 opt->name = strdup("nonoexec", M_MOUNT);
674 }
675 else if (strcmp(opt->name, "nosuid") == 0)
676 fsflags |= MNT_NOSUID;
677 else if (strcmp(opt->name, "suid") == 0) {
678 free(opt->name, M_MOUNT);
679 opt->name = strdup("nonosuid", M_MOUNT);
680 }
681 else if (strcmp(opt->name, "nosymfollow") == 0)
682 fsflags |= MNT_NOSYMFOLLOW;
683 else if (strcmp(opt->name, "symfollow") == 0) {
684 free(opt->name, M_MOUNT);
685 opt->name = strdup("nonosymfollow", M_MOUNT);
686 }
687 else if (strcmp(opt->name, "noro") == 0) {
688 fsflags &= ~MNT_RDONLY;
689 has_noro = 1;
690 }
691 else if (strcmp(opt->name, "rw") == 0) {
692 fsflags &= ~MNT_RDONLY;
693 has_rw = 1;
694 }
695 else if (strcmp(opt->name, "ro") == 0)
696 fsflags |= MNT_RDONLY;
697 else if (strcmp(opt->name, "rdonly") == 0) {
698 free(opt->name, M_MOUNT);
699 opt->name = strdup("ro", M_MOUNT);
700 fsflags |= MNT_RDONLY;
701 }
702 else if (strcmp(opt->name, "suiddir") == 0)
703 fsflags |= MNT_SUIDDIR;
704 else if (strcmp(opt->name, "sync") == 0)
705 fsflags |= MNT_SYNCHRONOUS;
706 else if (strcmp(opt->name, "union") == 0)
707 fsflags |= MNT_UNION;
708 }
709
710 /*
711 * If "rw" was specified as a mount option, and we
712 * are trying to update a mount-point from "ro" to "rw",
713 * we need a mount option "noro", since in vfs_mergeopts(),
714 * "noro" will cancel "ro", but "rw" will not do anything.
715 */
716 if (has_rw && !has_noro) {
717 noro_opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
718 noro_opt->name = strdup("noro", M_MOUNT);
719 noro_opt->value = NULL;
720 noro_opt->len = 0;
721 TAILQ_INSERT_TAIL(optlist, noro_opt, link);
722 }
723
724 /*
725 * Be ultra-paranoid about making sure the type and fspath
726 * variables will fit in our mp buffers, including the
727 * terminating NUL.
728 */
729 if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
730 error = ENAMETOOLONG;
731 goto bail;
732 }
733
734 mtx_lock(&Giant);
735 error = vfs_domount(td, fstype, fspath, fsflags, optlist);
736 mtx_unlock(&Giant);
737 bail:
738 /* copyout the errmsg */
739 if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt)
740 && errmsg_len > 0 && errmsg != NULL) {
741 if (fsoptions->uio_segflg == UIO_SYSSPACE) {
742 bcopy(errmsg,
743 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
744 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
745 } else {
746 copyout(errmsg,
747 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
748 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
749 }
750 }
751
752 if (error != 0)
753 vfs_freeopts(optlist);
754 return (error);
755 }
756
757 /*
758 * Old mount API.
759 */
760 #ifndef _SYS_SYSPROTO_H_
761 struct mount_args {
762 char *type;
763 char *path;
764 int flags;
765 caddr_t data;
766 };
767 #endif
768 /* ARGSUSED */
769 int
770 mount(td, uap)
771 struct thread *td;
772 struct mount_args /* {
773 char *type;
774 char *path;
775 int flags;
776 caddr_t data;
777 } */ *uap;
778 {
779 char *fstype;
780 struct vfsconf *vfsp = NULL;
781 struct mntarg *ma = NULL;
782 int error;
783
784 AUDIT_ARG(fflags, uap->flags);
785
786 /*
787 * Filter out MNT_ROOTFS. We do not want clients of mount() in
788 * userspace to set this flag, but we must filter it out if we want
789 * MNT_UPDATE on the root file system to work.
790 * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
791 */
792 uap->flags &= ~MNT_ROOTFS;
793
794 fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
795 error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
796 if (error) {
797 free(fstype, M_TEMP);
798 return (error);
799 }
800
801 AUDIT_ARG(text, fstype);
802 mtx_lock(&Giant);
803 vfsp = vfs_byname_kld(fstype, td, &error);
804 free(fstype, M_TEMP);
805 if (vfsp == NULL) {
806 mtx_unlock(&Giant);
807 return (ENOENT);
808 }
809 if (vfsp->vfc_vfsops->vfs_cmount == NULL) {
810 mtx_unlock(&Giant);
811 return (EOPNOTSUPP);
812 }
813
814 ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN);
815 ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
816 ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro");
817 ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid");
818 ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec");
819
820 error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td);
821 mtx_unlock(&Giant);
822 return (error);
823 }
824
825
826 /*
827 * vfs_domount(): actually attempt a filesystem mount.
828 */
829 static int
830 vfs_domount(
831 struct thread *td, /* Calling thread. */
832 const char *fstype, /* Filesystem type. */
833 char *fspath, /* Mount path. */
834 int fsflags, /* Flags common to all filesystems. */
835 void *fsdata /* Options local to the filesystem. */
836 )
837 {
838 struct vnode *vp;
839 struct mount *mp;
840 struct vfsconf *vfsp;
841 struct export_args export;
842 int error, flag = 0;
843 struct vattr va;
844 struct nameidata nd;
845
846 mtx_assert(&Giant, MA_OWNED);
847 /*
848 * Be ultra-paranoid about making sure the type and fspath
849 * variables will fit in our mp buffers, including the
850 * terminating NUL.
851 */
852 if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
853 return (ENAMETOOLONG);
854
855 if (jailed(td->td_ucred) || usermount == 0) {
856 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0)
857 return (error);
858 }
859
860 /*
861 * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
862 */
863 if (fsflags & MNT_EXPORTED) {
864 error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED);
865 if (error)
866 return (error);
867 }
868 if (fsflags & MNT_SUIDDIR) {
869 error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR);
870 if (error)
871 return (error);
872 }
873 /*
874 * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users.
875 */
876 if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) {
877 if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0)
878 fsflags |= MNT_NOSUID | MNT_USER;
879 }
880
881 /* Load KLDs before we lock the covered vnode to avoid reversals. */
882 vfsp = NULL;
883 if ((fsflags & MNT_UPDATE) == 0) {
884 /* Don't try to load KLDs if we're mounting the root. */
885 if (fsflags & MNT_ROOTFS)
886 vfsp = vfs_byname(fstype);
887 else
888 vfsp = vfs_byname_kld(fstype, td, &error);
889 if (vfsp == NULL)
890 return (ENODEV);
891 if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL))
892 return (EPERM);
893 }
894 /*
895 * Get vnode to be covered
896 */
897 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE,
898 fspath, td);
899 if ((error = namei(&nd)) != 0)
900 return (error);
901 NDFREE(&nd, NDF_ONLY_PNBUF);
902 vp = nd.ni_vp;
903 if (fsflags & MNT_UPDATE) {
904 if ((vp->v_vflag & VV_ROOT) == 0) {
905 vput(vp);
906 return (EINVAL);
907 }
908 mp = vp->v_mount;
909 MNT_ILOCK(mp);
910 flag = mp->mnt_flag;
911 /*
912 * We only allow the filesystem to be reloaded if it
913 * is currently mounted read-only.
914 */
915 if ((fsflags & MNT_RELOAD) &&
916 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
917 MNT_IUNLOCK(mp);
918 vput(vp);
919 return (EOPNOTSUPP); /* Needs translation */
920 }
921 MNT_IUNLOCK(mp);
922 /*
923 * Only privileged root, or (if MNT_USER is set) the user that
924 * did the original mount is permitted to update it.
925 */
926 error = vfs_suser(mp, td);
927 if (error) {
928 vput(vp);
929 return (error);
930 }
931 if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
932 vput(vp);
933 return (EBUSY);
934 }
935 VI_LOCK(vp);
936 if ((vp->v_iflag & VI_MOUNT) != 0 ||
937 vp->v_mountedhere != NULL) {
938 VI_UNLOCK(vp);
939 vfs_unbusy(mp, td);
940 vput(vp);
941 return (EBUSY);
942 }
943 vp->v_iflag |= VI_MOUNT;
944 VI_UNLOCK(vp);
945 MNT_ILOCK(mp);
946 mp->mnt_flag |= fsflags &
947 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS);
948 MNT_IUNLOCK(mp);
949 VOP_UNLOCK(vp, 0, td);
950 mp->mnt_optnew = fsdata;
951 vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
952 } else {
953 /*
954 * If the user is not root, ensure that they own the directory
955 * onto which we are attempting to mount.
956 */
957 error = VOP_GETATTR(vp, &va, td->td_ucred, td);
958 if (error) {
959 vput(vp);
960 return (error);
961 }
962 if (va.va_uid != td->td_ucred->cr_uid) {
963 error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN,
964 0);
965 if (error) {
966 vput(vp);
967 return (error);
968 }
969 }
970 error = vinvalbuf(vp, V_SAVE, td, 0, 0);
971 if (error != 0) {
972 vput(vp);
973 return (error);
974 }
975 if (vp->v_type != VDIR) {
976 vput(vp);
977 return (ENOTDIR);
978 }
979 VI_LOCK(vp);
980 if ((vp->v_iflag & VI_MOUNT) != 0 ||
981 vp->v_mountedhere != NULL) {
982 VI_UNLOCK(vp);
983 vput(vp);
984 return (EBUSY);
985 }
986 vp->v_iflag |= VI_MOUNT;
987 VI_UNLOCK(vp);
988
989 /*
990 * Allocate and initialize the filesystem.
991 */
992 mp = vfs_mount_alloc(vp, vfsp, fspath, td);
993 VOP_UNLOCK(vp, 0, td);
994
995 /* XXXMAC: pass to vfs_mount_alloc? */
996 mp->mnt_optnew = fsdata;
997 }
998
999 /*
1000 * Set the mount level flags.
1001 */
1002 MNT_ILOCK(mp);
1003 mp->mnt_flag = (mp->mnt_flag & ~MNT_UPDATEMASK) |
1004 (fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS |
1005 MNT_RDONLY));
1006 if ((mp->mnt_flag & MNT_ASYNC) == 0)
1007 mp->mnt_kern_flag &= ~MNTK_ASYNC;
1008 MNT_IUNLOCK(mp);
1009 /*
1010 * Mount the filesystem.
1011 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
1012 * get. No freeing of cn_pnbuf.
1013 */
1014 error = VFS_MOUNT(mp, td);
1015
1016 /*
1017 * Process the export option only if we are
1018 * updating mount options.
1019 */
1020 if (!error && (fsflags & MNT_UPDATE)) {
1021 if (vfs_copyopt(mp->mnt_optnew, "export", &export,
1022 sizeof(export)) == 0)
1023 error = vfs_export(mp, &export);
1024 }
1025
1026 if (!error) {
1027 if (mp->mnt_opt != NULL)
1028 vfs_freeopts(mp->mnt_opt);
1029 mp->mnt_opt = mp->mnt_optnew;
1030 (void)VFS_STATFS(mp, &mp->mnt_stat, td);
1031 }
1032 /*
1033 * Prevent external consumers of mount options from reading
1034 * mnt_optnew.
1035 */
1036 mp->mnt_optnew = NULL;
1037 if (mp->mnt_flag & MNT_UPDATE) {
1038 MNT_ILOCK(mp);
1039 if (error)
1040 mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) |
1041 (flag & ~MNT_QUOTA);
1042 else
1043 mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD |
1044 MNT_FORCE | MNT_SNAPSHOT);
1045 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
1046 mp->mnt_kern_flag |= MNTK_ASYNC;
1047 else
1048 mp->mnt_kern_flag &= ~MNTK_ASYNC;
1049 MNT_IUNLOCK(mp);
1050 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1051 if (mp->mnt_syncer == NULL)
1052 error = vfs_allocate_syncvnode(mp);
1053 } else {
1054 if (mp->mnt_syncer != NULL)
1055 vrele(mp->mnt_syncer);
1056 mp->mnt_syncer = NULL;
1057 }
1058 vfs_unbusy(mp, td);
1059 VI_LOCK(vp);
1060 vp->v_iflag &= ~VI_MOUNT;
1061 VI_UNLOCK(vp);
1062 vrele(vp);
1063 return (error);
1064 }
1065 MNT_ILOCK(mp);
1066 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
1067 mp->mnt_kern_flag |= MNTK_ASYNC;
1068 else
1069 mp->mnt_kern_flag &= ~MNTK_ASYNC;
1070 MNT_IUNLOCK(mp);
1071 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1072 /*
1073 * Put the new filesystem on the mount list after root.
1074 */
1075 cache_purge(vp);
1076 if (!error) {
1077 struct vnode *newdp;
1078
1079 VI_LOCK(vp);
1080 vp->v_iflag &= ~VI_MOUNT;
1081 VI_UNLOCK(vp);
1082 vp->v_mountedhere = mp;
1083 mtx_lock(&mountlist_mtx);
1084 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1085 mtx_unlock(&mountlist_mtx);
1086 vfs_event_signal(NULL, VQ_MOUNT, 0);
1087 if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp, td))
1088 panic("mount: lost mount");
1089 mountcheckdirs(vp, newdp);
1090 vput(newdp);
1091 VOP_UNLOCK(vp, 0, td);
1092 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1093 error = vfs_allocate_syncvnode(mp);
1094 vfs_unbusy(mp, td);
1095 if (error)
1096 vrele(vp);
1097 } else {
1098 VI_LOCK(vp);
1099 vp->v_iflag &= ~VI_MOUNT;
1100 VI_UNLOCK(vp);
1101 vfs_unbusy(mp, td);
1102 vfs_mount_destroy(mp);
1103 vput(vp);
1104 }
1105 return (error);
1106 }
1107
1108 /*
1109 * Unmount a filesystem.
1110 *
1111 * Note: unmount takes a path to the vnode mounted on as argument, not
1112 * special file (as before).
1113 */
1114 #ifndef _SYS_SYSPROTO_H_
1115 struct unmount_args {
1116 char *path;
1117 int flags;
1118 };
1119 #endif
1120 /* ARGSUSED */
1121 int
1122 unmount(td, uap)
1123 struct thread *td;
1124 register struct unmount_args /* {
1125 char *path;
1126 int flags;
1127 } */ *uap;
1128 {
1129 struct mount *mp;
1130 char *pathbuf;
1131 int error, id0, id1;
1132
1133 if (jailed(td->td_ucred) || usermount == 0) {
1134 error = priv_check(td, PRIV_VFS_UNMOUNT);
1135 if (error)
1136 return (error);
1137 }
1138
1139 pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1140 error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL);
1141 if (error) {
1142 free(pathbuf, M_TEMP);
1143 return (error);
1144 }
1145 AUDIT_ARG(upath, td, pathbuf, ARG_UPATH1);
1146 mtx_lock(&Giant);
1147 if (uap->flags & MNT_BYFSID) {
1148 /* Decode the filesystem ID. */
1149 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
1150 mtx_unlock(&Giant);
1151 free(pathbuf, M_TEMP);
1152 return (EINVAL);
1153 }
1154
1155 mtx_lock(&mountlist_mtx);
1156 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
1157 if (mp->mnt_stat.f_fsid.val[0] == id0 &&
1158 mp->mnt_stat.f_fsid.val[1] == id1)
1159 break;
1160 }
1161 mtx_unlock(&mountlist_mtx);
1162 } else {
1163 mtx_lock(&mountlist_mtx);
1164 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
1165 if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0)
1166 break;
1167 }
1168 mtx_unlock(&mountlist_mtx);
1169 }
1170 free(pathbuf, M_TEMP);
1171 if (mp == NULL) {
1172 /*
1173 * Previously we returned ENOENT for a nonexistent path and
1174 * EINVAL for a non-mountpoint. We cannot tell these apart
1175 * now, so in the !MNT_BYFSID case return the more likely
1176 * EINVAL for compatibility.
1177 */
1178 mtx_unlock(&Giant);
1179 return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL);
1180 }
1181
1182 /*
1183 * Don't allow unmounting the root filesystem.
1184 */
1185 if (mp->mnt_flag & MNT_ROOTFS) {
1186 mtx_unlock(&Giant);
1187 return (EINVAL);
1188 }
1189 error = dounmount(mp, uap->flags, td);
1190 mtx_unlock(&Giant);
1191 return (error);
1192 }
1193
1194 /*
1195 * Do the actual filesystem unmount.
1196 */
1197 int
1198 dounmount(mp, flags, td)
1199 struct mount *mp;
1200 int flags;
1201 struct thread *td;
1202 {
1203 struct vnode *coveredvp, *fsrootvp;
1204 int error;
1205 int async_flag;
1206 int mnt_gen_r;
1207
1208 mtx_assert(&Giant, MA_OWNED);
1209
1210 if ((coveredvp = mp->mnt_vnodecovered) != NULL) {
1211 mnt_gen_r = mp->mnt_gen;
1212 VI_LOCK(coveredvp);
1213 vholdl(coveredvp);
1214 vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, td);
1215 vdrop(coveredvp);
1216 /*
1217 * Check for mp being unmounted while waiting for the
1218 * covered vnode lock.
1219 */
1220 if (coveredvp->v_mountedhere != mp ||
1221 coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) {
1222 VOP_UNLOCK(coveredvp, 0, td);
1223 return (EBUSY);
1224 }
1225 }
1226 /*
1227 * Only privileged root, or (if MNT_USER is set) the user that did the
1228 * original mount is permitted to unmount this filesystem.
1229 */
1230 error = vfs_suser(mp, td);
1231 if (error) {
1232 if (coveredvp)
1233 VOP_UNLOCK(coveredvp, 0, td);
1234 return (error);
1235 }
1236
1237 MNT_ILOCK(mp);
1238 if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1239 MNT_IUNLOCK(mp);
1240 if (coveredvp)
1241 VOP_UNLOCK(coveredvp, 0, td);
1242 return (EBUSY);
1243 }
1244 mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_NOINSMNTQ;
1245 /* Allow filesystems to detect that a forced unmount is in progress. */
1246 if (flags & MNT_FORCE)
1247 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
1248 error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
1249 ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), MNT_MTX(mp), td);
1250 if (error) {
1251 MNT_ILOCK(mp);
1252 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_NOINSMNTQ |
1253 MNTK_UNMOUNTF);
1254 if (mp->mnt_kern_flag & MNTK_MWAIT)
1255 wakeup(mp);
1256 MNT_IUNLOCK(mp);
1257 if (coveredvp)
1258 VOP_UNLOCK(coveredvp, 0, td);
1259 return (error);
1260 }
1261 vn_start_write(NULL, &mp, V_WAIT);
1262
1263 if (mp->mnt_flag & MNT_EXPUBLIC)
1264 vfs_setpublicfs(NULL, NULL, NULL);
1265
1266 vfs_msync(mp, MNT_WAIT);
1267 MNT_ILOCK(mp);
1268 async_flag = mp->mnt_flag & MNT_ASYNC;
1269 mp->mnt_flag &= ~MNT_ASYNC;
1270 mp->mnt_kern_flag &= ~MNTK_ASYNC;
1271 MNT_IUNLOCK(mp);
1272 cache_purgevfs(mp); /* remove cache entries for this file sys */
1273 if (mp->mnt_syncer != NULL)
1274 vrele(mp->mnt_syncer);
1275 /*
1276 * For forced unmounts, move process cdir/rdir refs on the fs root
1277 * vnode to the covered vnode. For non-forced unmounts we want
1278 * such references to cause an EBUSY error.
1279 */
1280 if ((flags & MNT_FORCE) &&
1281 VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
1282 if (mp->mnt_vnodecovered != NULL)
1283 mountcheckdirs(fsrootvp, mp->mnt_vnodecovered);
1284 if (fsrootvp == rootvnode) {
1285 vrele(rootvnode);
1286 rootvnode = NULL;
1287 }
1288 vput(fsrootvp);
1289 }
1290 if (((mp->mnt_flag & MNT_RDONLY) ||
1291 (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) ||
1292 (flags & MNT_FORCE)) {
1293 error = VFS_UNMOUNT(mp, flags, td);
1294 }
1295 vn_finished_write(mp);
1296 if (error) {
1297 /* Undo cdir/rdir and rootvnode changes made above. */
1298 if ((flags & MNT_FORCE) &&
1299 VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
1300 if (mp->mnt_vnodecovered != NULL)
1301 mountcheckdirs(mp->mnt_vnodecovered, fsrootvp);
1302 if (rootvnode == NULL) {
1303 rootvnode = fsrootvp;
1304 vref(rootvnode);
1305 }
1306 vput(fsrootvp);
1307 }
1308 MNT_ILOCK(mp);
1309 mp->mnt_kern_flag &= ~MNTK_NOINSMNTQ;
1310 if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) {
1311 MNT_IUNLOCK(mp);
1312 (void) vfs_allocate_syncvnode(mp);
1313 MNT_ILOCK(mp);
1314 }
1315 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1316 mp->mnt_flag |= async_flag;
1317 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
1318 mp->mnt_kern_flag |= MNTK_ASYNC;
1319 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
1320 if (mp->mnt_kern_flag & MNTK_MWAIT)
1321 wakeup(mp);
1322 MNT_IUNLOCK(mp);
1323 if (coveredvp)
1324 VOP_UNLOCK(coveredvp, 0, td);
1325 return (error);
1326 }
1327 mtx_lock(&mountlist_mtx);
1328 TAILQ_REMOVE(&mountlist, mp, mnt_list);
1329 mtx_unlock(&mountlist_mtx);
1330 if (coveredvp != NULL) {
1331 coveredvp->v_mountedhere = NULL;
1332 vput(coveredvp);
1333 }
1334 vfs_event_signal(NULL, VQ_UNMOUNT, 0);
1335 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
1336 vfs_mount_destroy(mp);
1337 return (0);
1338 }
1339
1340 /*
1341 * ---------------------------------------------------------------------
1342 * Mounting of root filesystem
1343 *
1344 */
1345
1346 struct root_hold_token {
1347 const char *who;
1348 LIST_ENTRY(root_hold_token) list;
1349 };
1350
1351 static LIST_HEAD(, root_hold_token) root_holds =
1352 LIST_HEAD_INITIALIZER(&root_holds);
1353
1354 static int root_mount_complete;
1355
1356 /*
1357 * Hold root mount.
1358 */
1359 struct root_hold_token *
1360 root_mount_hold(const char *identifier)
1361 {
1362 struct root_hold_token *h;
1363
1364 h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
1365 h->who = identifier;
1366 mtx_lock(&mountlist_mtx);
1367 LIST_INSERT_HEAD(&root_holds, h, list);
1368 mtx_unlock(&mountlist_mtx);
1369 return (h);
1370 }
1371
1372 /*
1373 * Release root mount.
1374 */
1375 void
1376 root_mount_rel(struct root_hold_token *h)
1377 {
1378
1379 mtx_lock(&mountlist_mtx);
1380 LIST_REMOVE(h, list);
1381 wakeup(&root_holds);
1382 mtx_unlock(&mountlist_mtx);
1383 free(h, M_DEVBUF);
1384 }
1385
1386 /*
1387 * Wait for all subsystems to release root mount.
1388 */
1389 static void
1390 root_mount_prepare(void)
1391 {
1392 struct root_hold_token *h;
1393
1394 for (;;) {
1395 DROP_GIANT();
1396 g_waitidle();
1397 PICKUP_GIANT();
1398 mtx_lock(&mountlist_mtx);
1399 if (LIST_EMPTY(&root_holds)) {
1400 mtx_unlock(&mountlist_mtx);
1401 break;
1402 }
1403 printf("Root mount waiting for:");
1404 LIST_FOREACH(h, &root_holds, list)
1405 printf(" %s", h->who);
1406 printf("\n");
1407 msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold",
1408 hz);
1409 }
1410 }
1411
1412 /*
1413 * Root was mounted, share the good news.
1414 */
1415 static void
1416 root_mount_done(void)
1417 {
1418
1419 /*
1420 * Use a mutex to prevent the wakeup being missed and waiting for
1421 * an extra 1 second sleep.
1422 */
1423 mtx_lock(&mountlist_mtx);
1424 root_mount_complete = 1;
1425 wakeup(&root_mount_complete);
1426 mtx_unlock(&mountlist_mtx);
1427 }
1428
1429 /*
1430 * Return true if root is already mounted.
1431 */
1432 int
1433 root_mounted(void)
1434 {
1435
1436 /* No mutex is acquired here because int stores are atomic. */
1437 return (root_mount_complete);
1438 }
1439
1440 /*
1441 * Wait until root is mounted.
1442 */
1443 void
1444 root_mount_wait(void)
1445 {
1446
1447 /*
1448 * Panic on an obvious deadlock - the function can't be called from
1449 * a thread which is doing the whole SYSINIT stuff.
1450 */
1451 KASSERT(curthread->td_proc->p_pid != 0,
1452 ("root_mount_wait: cannot be called from the swapper thread"));
1453 mtx_lock(&mountlist_mtx);
1454 while (!root_mount_complete) {
1455 msleep(&root_mount_complete, &mountlist_mtx, PZERO, "rootwait",
1456 hz);
1457 }
1458 mtx_unlock(&mountlist_mtx);
1459 }
1460
1461 static void
1462 set_rootvnode(struct thread *td)
1463 {
1464 struct proc *p;
1465
1466 if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode, td))
1467 panic("Cannot find root vnode");
1468
1469 p = td->td_proc;
1470 FILEDESC_SLOCK(p->p_fd);
1471
1472 if (p->p_fd->fd_cdir != NULL)
1473 vrele(p->p_fd->fd_cdir);
1474 p->p_fd->fd_cdir = rootvnode;
1475 VREF(rootvnode);
1476
1477 if (p->p_fd->fd_rdir != NULL)
1478 vrele(p->p_fd->fd_rdir);
1479 p->p_fd->fd_rdir = rootvnode;
1480 VREF(rootvnode);
1481
1482 FILEDESC_SUNLOCK(p->p_fd);
1483
1484 VOP_UNLOCK(rootvnode, 0, td);
1485 }
1486
1487 /*
1488 * Mount /devfs as our root filesystem, but do not put it on the mountlist
1489 * yet. Create a /dev -> / symlink so that absolute pathnames will lookup.
1490 */
1491
1492 static void
1493 devfs_first(void)
1494 {
1495 struct thread *td = curthread;
1496 struct vfsoptlist *opts;
1497 struct vfsconf *vfsp;
1498 struct mount *mp = NULL;
1499 int error;
1500
1501 vfsp = vfs_byname("devfs");
1502 KASSERT(vfsp != NULL, ("Could not find devfs by name"));
1503 if (vfsp == NULL)
1504 return;
1505
1506 mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td);
1507
1508 error = VFS_MOUNT(mp, td);
1509 KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
1510 if (error)
1511 return;
1512
1513 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
1514 TAILQ_INIT(opts);
1515 mp->mnt_opt = opts;
1516
1517 mtx_lock(&mountlist_mtx);
1518 TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
1519 mtx_unlock(&mountlist_mtx);
1520
1521 set_rootvnode(td);
1522
1523 error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);
1524 if (error)
1525 printf("kern_symlink /dev -> / returns %d\n", error);
1526 }
1527
1528 /*
1529 * Surgically move our devfs to be mounted on /dev.
1530 */
1531
1532 static void
1533 devfs_fixup(struct thread *td)
1534 {
1535 struct nameidata nd;
1536 int error;
1537 struct vnode *vp, *dvp;
1538 struct mount *mp;
1539
1540 /* Remove our devfs mount from the mountlist and purge the cache */
1541 mtx_lock(&mountlist_mtx);
1542 mp = TAILQ_FIRST(&mountlist);
1543 TAILQ_REMOVE(&mountlist, mp, mnt_list);
1544 mtx_unlock(&mountlist_mtx);
1545 cache_purgevfs(mp);
1546
1547 VFS_ROOT(mp, LK_EXCLUSIVE, &dvp, td);
1548 VI_LOCK(dvp);
1549 dvp->v_iflag &= ~VI_MOUNT;
1550 VI_UNLOCK(dvp);
1551 dvp->v_mountedhere = NULL;
1552
1553 /* Set up the real rootvnode, and purge the cache */
1554 TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL;
1555 set_rootvnode(td);
1556 cache_purgevfs(rootvnode->v_mount);
1557
1558 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
1559 error = namei(&nd);
1560 if (error) {
1561 printf("Lookup of /dev for devfs, error: %d\n", error);
1562 return;
1563 }
1564 NDFREE(&nd, NDF_ONLY_PNBUF);
1565 vp = nd.ni_vp;
1566 if (vp->v_type != VDIR) {
1567 vput(vp);
1568 }
1569 error = vinvalbuf(vp, V_SAVE, td, 0, 0);
1570 if (error) {
1571 vput(vp);
1572 }
1573 cache_purge(vp);
1574 mp->mnt_vnodecovered = vp;
1575 vp->v_mountedhere = mp;
1576 mtx_lock(&mountlist_mtx);
1577 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1578 mtx_unlock(&mountlist_mtx);
1579 VOP_UNLOCK(vp, 0, td);
1580 vput(dvp);
1581 vfs_unbusy(mp, td);
1582
1583 /* Unlink the no longer needed /dev/dev -> / symlink */
1584 kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
1585 }
1586
1587 /*
1588 * Report errors during filesystem mounting.
1589 */
1590 void
1591 vfs_mount_error(struct mount *mp, const char *fmt, ...)
1592 {
1593 struct vfsoptlist *moptlist = mp->mnt_optnew;
1594 va_list ap;
1595 int error, len;
1596 char *errmsg;
1597
1598 error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len);
1599 if (error || errmsg == NULL || len <= 0)
1600 return;
1601
1602 va_start(ap, fmt);
1603 vsnprintf(errmsg, (size_t)len, fmt, ap);
1604 va_end(ap);
1605 }
1606
1607 /*
1608 * Find and mount the root filesystem
1609 */
1610 void
1611 vfs_mountroot(void)
1612 {
1613 char *cp;
1614 int error, i, asked = 0;
1615
1616 root_mount_prepare();
1617
1618 mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount),
1619 NULL, NULL, mount_init, mount_fini,
1620 UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1621 devfs_first();
1622
1623 /*
1624 * We are booted with instructions to prompt for the root filesystem.
1625 */
1626 if (boothowto & RB_ASKNAME) {
1627 if (!vfs_mountroot_ask())
1628 goto mounted;
1629 asked = 1;
1630 }
1631
1632 /*
1633 * The root filesystem information is compiled in, and we are
1634 * booted with instructions to use it.
1635 */
1636 if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) {
1637 if (!vfs_mountroot_try(ctrootdevname))
1638 goto mounted;
1639 ctrootdevname = NULL;
1640 }
1641
1642 /*
1643 * We've been given the generic "use CDROM as root" flag. This is
1644 * necessary because one media may be used in many different
1645 * devices, so we need to search for them.
1646 */
1647 if (boothowto & RB_CDROM) {
1648 for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
1649 if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
1650 goto mounted;
1651 }
1652 }
1653
1654 /*
1655 * Try to use the value read by the loader from /etc/fstab, or
1656 * supplied via some other means. This is the preferred
1657 * mechanism.
1658 */
1659 cp = getenv("vfs.root.mountfrom");
1660 if (cp != NULL) {
1661 error = vfs_mountroot_try(cp);
1662 freeenv(cp);
1663 if (!error)
1664 goto mounted;
1665 }
1666
1667 /*
1668 * Try values that may have been computed by code during boot
1669 */
1670 if (!vfs_mountroot_try(rootdevnames[0]))
1671 goto mounted;
1672 if (!vfs_mountroot_try(rootdevnames[1]))
1673 goto mounted;
1674
1675 /*
1676 * If we (still) have a compiled-in default, try it.
1677 */
1678 if (ctrootdevname != NULL)
1679 if (!vfs_mountroot_try(ctrootdevname))
1680 goto mounted;
1681 /*
1682 * Everything so far has failed, prompt on the console if we haven't
1683 * already tried that.
1684 */
1685 if (!asked)
1686 if (!vfs_mountroot_ask())
1687 goto mounted;
1688
1689 panic("Root mount failed, startup aborted.");
1690
1691 mounted:
1692 root_mount_done();
1693 }
1694
1695 /*
1696 * Mount (mountfrom) as the root filesystem.
1697 */
1698 static int
1699 vfs_mountroot_try(const char *mountfrom)
1700 {
1701 struct mount *mp;
1702 char *vfsname, *path;
1703 time_t timebase;
1704 int error;
1705 char patt[32];
1706
1707 vfsname = NULL;
1708 path = NULL;
1709 mp = NULL;
1710 error = EINVAL;
1711
1712 if (mountfrom == NULL)
1713 return (error); /* don't complain */
1714 printf("Trying to mount root from %s\n", mountfrom);
1715
1716 /* parse vfs name and path */
1717 vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
1718 path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
1719 vfsname[0] = path[0] = 0;
1720 sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
1721 if (sscanf(mountfrom, patt, vfsname, path) < 1)
1722 goto out;
1723
1724 if (path[0] == '\0')
1725 strcpy(path, ROOTNAME);
1726
1727 error = kernel_vmount(
1728 MNT_RDONLY | MNT_ROOTFS,
1729 "fstype", vfsname,
1730 "fspath", "/",
1731 "from", path,
1732 NULL);
1733 if (error == 0) {
1734 /*
1735 * We mount devfs prior to mounting the / FS, so the first
1736 * entry will typically be devfs.
1737 */
1738 mp = TAILQ_FIRST(&mountlist);
1739 KASSERT(mp != NULL, ("%s: mountlist is empty", __func__));
1740
1741 /*
1742 * Iterate over all currently mounted file systems and use
1743 * the time stamp found to check and/or initialize the RTC.
1744 * Typically devfs has no time stamp and the only other FS
1745 * is the actual / FS.
1746 * Call inittodr() only once and pass it the largest of the
1747 * timestamps we encounter.
1748 */
1749 timebase = 0;
1750 do {
1751 if (mp->mnt_time > timebase)
1752 timebase = mp->mnt_time;
1753 mp = TAILQ_NEXT(mp, mnt_list);
1754 } while (mp != NULL);
1755 inittodr(timebase);
1756
1757 devfs_fixup(curthread);
1758 }
1759 out:
1760 free(path, M_MOUNT);
1761 free(vfsname, M_MOUNT);
1762 return (error);
1763 }
1764
1765 /*
1766 * ---------------------------------------------------------------------
1767 * Interactive root filesystem selection code.
1768 */
1769
1770 static int
1771 vfs_mountroot_ask(void)
1772 {
1773 char name[128];
1774
1775 for(;;) {
1776 printf("\nManual root filesystem specification:\n");
1777 printf(" <fstype>:<device> Mount <device> using filesystem <fstype>\n");
1778 #if defined(__amd64__) || defined(__i386__) || defined(__ia64__)
1779 printf(" eg. ufs:da0s1a\n");
1780 #else
1781 printf(" eg. ufs:/dev/da0a\n");
1782 #endif
1783 printf(" ? List valid disk boot devices\n");
1784 printf(" <empty line> Abort manual input\n");
1785 printf("\nmountroot> ");
1786 gets(name, sizeof(name), 1);
1787 if (name[0] == '\0')
1788 return (1);
1789 if (name[0] == '?') {
1790 printf("\nList of GEOM managed disk devices:\n ");
1791 g_dev_print();
1792 continue;
1793 }
1794 if (!vfs_mountroot_try(name))
1795 return (0);
1796 }
1797 }
1798
1799 /*
1800 * ---------------------------------------------------------------------
1801 * Functions for querying mount options/arguments from filesystems.
1802 */
1803
1804 /*
1805 * Check that no unknown options are given
1806 */
1807 int
1808 vfs_filteropt(struct vfsoptlist *opts, const char **legal)
1809 {
1810 struct vfsopt *opt;
1811 char errmsg[255];
1812 const char **t, *p, *q;
1813 int ret = 0;
1814
1815 TAILQ_FOREACH(opt, opts, link) {
1816 p = opt->name;
1817 q = NULL;
1818 if (p[0] == 'n' && p[1] == 'o')
1819 q = p + 2;
1820 for(t = global_opts; *t != NULL; t++) {
1821 if (strcmp(*t, p) == 0)
1822 break;
1823 if (q != NULL) {
1824 if (strcmp(*t, q) == 0)
1825 break;
1826 }
1827 }
1828 if (*t != NULL)
1829 continue;
1830 for(t = legal; *t != NULL; t++) {
1831 if (strcmp(*t, p) == 0)
1832 break;
1833 if (q != NULL) {
1834 if (strcmp(*t, q) == 0)
1835 break;
1836 }
1837 }
1838 if (*t != NULL)
1839 continue;
1840 snprintf(errmsg, sizeof(errmsg),
1841 "mount option <%s> is unknown", p);
1842 printf("%s\n", errmsg);
1843 ret = EINVAL;
1844 }
1845 if (ret != 0) {
1846 TAILQ_FOREACH(opt, opts, link) {
1847 if (strcmp(opt->name, "errmsg") == 0) {
1848 strncpy((char *)opt->value, errmsg, opt->len);
1849 }
1850 }
1851 }
1852 return (ret);
1853 }
1854
1855 /*
1856 * Get a mount option by its name.
1857 *
1858 * Return 0 if the option was found, ENOENT otherwise.
1859 * If len is non-NULL it will be filled with the length
1860 * of the option. If buf is non-NULL, it will be filled
1861 * with the address of the option.
1862 */
1863 int
1864 vfs_getopt(opts, name, buf, len)
1865 struct vfsoptlist *opts;
1866 const char *name;
1867 void **buf;
1868 int *len;
1869 {
1870 struct vfsopt *opt;
1871
1872 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
1873
1874 TAILQ_FOREACH(opt, opts, link) {
1875 if (strcmp(name, opt->name) == 0) {
1876 if (len != NULL)
1877 *len = opt->len;
1878 if (buf != NULL)
1879 *buf = opt->value;
1880 return (0);
1881 }
1882 }
1883 return (ENOENT);
1884 }
1885
1886 static int
1887 vfs_getopt_pos(struct vfsoptlist *opts, const char *name)
1888 {
1889 struct vfsopt *opt;
1890 int i;
1891
1892 if (opts == NULL)
1893 return (-1);
1894
1895 i = 0;
1896 TAILQ_FOREACH(opt, opts, link) {
1897 if (strcmp(name, opt->name) == 0)
1898 return (i);
1899 ++i;
1900 }
1901 return (-1);
1902 }
1903
1904 char *
1905 vfs_getopts(struct vfsoptlist *opts, const char *name, int *error)
1906 {
1907 struct vfsopt *opt;
1908
1909 *error = 0;
1910 TAILQ_FOREACH(opt, opts, link) {
1911 if (strcmp(name, opt->name) != 0)
1912 continue;
1913 if (((char *)opt->value)[opt->len - 1] != '\0') {
1914 *error = EINVAL;
1915 return (NULL);
1916 }
1917 return (opt->value);
1918 }
1919 *error = ENOENT;
1920 return (NULL);
1921 }
1922
1923 int
1924 vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val)
1925 {
1926 struct vfsopt *opt;
1927
1928 TAILQ_FOREACH(opt, opts, link) {
1929 if (strcmp(name, opt->name) == 0) {
1930 if (w != NULL)
1931 *w |= val;
1932 return (1);
1933 }
1934 }
1935 if (w != NULL)
1936 *w &= ~val;
1937 return (0);
1938 }
1939
1940 int
1941 vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...)
1942 {
1943 va_list ap;
1944 struct vfsopt *opt;
1945 int ret;
1946
1947 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
1948
1949 TAILQ_FOREACH(opt, opts, link) {
1950 if (strcmp(name, opt->name) != 0)
1951 continue;
1952 if (opt->len == 0 || opt->value == NULL)
1953 return (0);
1954 if (((char *)opt->value)[opt->len - 1] != '\0')
1955 return (0);
1956 va_start(ap, fmt);
1957 ret = vsscanf(opt->value, fmt, ap);
1958 va_end(ap);
1959 return (ret);
1960 }
1961 return (0);
1962 }
1963
1964 /*
1965 * Find and copy a mount option.
1966 *
1967 * The size of the buffer has to be specified
1968 * in len, if it is not the same length as the
1969 * mount option, EINVAL is returned.
1970 * Returns ENOENT if the option is not found.
1971 */
1972 int
1973 vfs_copyopt(opts, name, dest, len)
1974 struct vfsoptlist *opts;
1975 const char *name;
1976 void *dest;
1977 int len;
1978 {
1979 struct vfsopt *opt;
1980
1981 KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
1982
1983 TAILQ_FOREACH(opt, opts, link) {
1984 if (strcmp(name, opt->name) == 0) {
1985 if (len != opt->len)
1986 return (EINVAL);
1987 bcopy(opt->value, dest, opt->len);
1988 return (0);
1989 }
1990 }
1991 return (ENOENT);
1992 }
1993
1994 /*
1995 * This is a helper function for filesystems to traverse their
1996 * vnodes. See MNT_VNODE_FOREACH() in sys/mount.h
1997 */
1998
1999 struct vnode *
2000 __mnt_vnode_next(struct vnode **mvp, struct mount *mp)
2001 {
2002 struct vnode *vp;
2003
2004 mtx_assert(MNT_MTX(mp), MA_OWNED);
2005
2006 KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
2007 if ((*mvp)->v_yield++ == 500) {
2008 MNT_IUNLOCK(mp);
2009 (*mvp)->v_yield = 0;
2010 uio_yield();
2011 MNT_ILOCK(mp);
2012 }
2013 vp = TAILQ_NEXT(*mvp, v_nmntvnodes);
2014 while (vp != NULL && vp->v_type == VMARKER)
2015 vp = TAILQ_NEXT(vp, v_nmntvnodes);
2016
2017 /* Check if we are done */
2018 if (vp == NULL) {
2019 __mnt_vnode_markerfree(mvp, mp);
2020 return (NULL);
2021 }
2022 TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
2023 TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
2024 return (vp);
2025 }
2026
2027 struct vnode *
2028 __mnt_vnode_first(struct vnode **mvp, struct mount *mp)
2029 {
2030 struct vnode *vp;
2031
2032 mtx_assert(MNT_MTX(mp), MA_OWNED);
2033
2034 vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
2035 while (vp != NULL && vp->v_type == VMARKER)
2036 vp = TAILQ_NEXT(vp, v_nmntvnodes);
2037
2038 /* Check if we are done */
2039 if (vp == NULL) {
2040 *mvp = NULL;
2041 return (NULL);
2042 }
2043 mp->mnt_holdcnt++;
2044 MNT_IUNLOCK(mp);
2045 *mvp = (struct vnode *) malloc(sizeof(struct vnode),
2046 M_VNODE_MARKER,
2047 M_WAITOK | M_ZERO);
2048 MNT_ILOCK(mp);
2049 (*mvp)->v_type = VMARKER;
2050
2051 vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
2052 while (vp != NULL && vp->v_type == VMARKER)
2053 vp = TAILQ_NEXT(vp, v_nmntvnodes);
2054
2055 /* Check if we are done */
2056 if (vp == NULL) {
2057 MNT_IUNLOCK(mp);
2058 free(*mvp, M_VNODE_MARKER);
2059 MNT_ILOCK(mp);
2060 *mvp = NULL;
2061 mp->mnt_holdcnt--;
2062 if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0)
2063 wakeup(&mp->mnt_holdcnt);
2064 return (NULL);
2065 }
2066 mp->mnt_markercnt++;
2067 (*mvp)->v_mount = mp;
2068 TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
2069 return (vp);
2070 }
2071
2072
2073 void
2074 __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp)
2075 {
2076
2077 if (*mvp == NULL)
2078 return;
2079
2080 mtx_assert(MNT_MTX(mp), MA_OWNED);
2081
2082 KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
2083 TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
2084 MNT_IUNLOCK(mp);
2085 free(*mvp, M_VNODE_MARKER);
2086 MNT_ILOCK(mp);
2087 *mvp = NULL;
2088
2089 mp->mnt_markercnt--;
2090 mp->mnt_holdcnt--;
2091 if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0)
2092 wakeup(&mp->mnt_holdcnt);
2093 }
2094
2095
2096 int
2097 __vfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
2098 {
2099 int error;
2100
2101 error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat, td);
2102 if (sbp != &mp->mnt_stat)
2103 *sbp = mp->mnt_stat;
2104 return (error);
2105 }
2106
2107 void
2108 vfs_mountedfrom(struct mount *mp, const char *from)
2109 {
2110
2111 bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname);
2112 strlcpy(mp->mnt_stat.f_mntfromname, from,
2113 sizeof mp->mnt_stat.f_mntfromname);
2114 }
2115
2116 /*
2117 * ---------------------------------------------------------------------
2118 * This is the api for building mount args and mounting filesystems from
2119 * inside the kernel.
2120 *
2121 * The API works by accumulation of individual args. First error is
2122 * latched.
2123 *
2124 * XXX: should be documented in new manpage kernel_mount(9)
2125 */
2126
2127 /* A memory allocation which must be freed when we are done */
2128 struct mntaarg {
2129 SLIST_ENTRY(mntaarg) next;
2130 };
2131
2132 /* The header for the mount arguments */
2133 struct mntarg {
2134 struct iovec *v;
2135 int len;
2136 int error;
2137 SLIST_HEAD(, mntaarg) list;
2138 };
2139
2140 /*
2141 * Add a boolean argument.
2142 *
2143 * flag is the boolean value.
2144 * name must start with "no".
2145 */
2146 struct mntarg *
2147 mount_argb(struct mntarg *ma, int flag, const char *name)
2148 {
2149
2150 KASSERT(name[0] == 'n' && name[1] == 'o',
2151 ("mount_argb(...,%s): name must start with 'no'", name));
2152
2153 return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0));
2154 }
2155
2156 /*
2157 * Add an argument printf style
2158 */
2159 struct mntarg *
2160 mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...)
2161 {
2162 va_list ap;
2163 struct mntaarg *maa;
2164 struct sbuf *sb;
2165 int len;
2166
2167 if (ma == NULL) {
2168 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
2169 SLIST_INIT(&ma->list);
2170 }
2171 if (ma->error)
2172 return (ma);
2173
2174 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
2175 M_MOUNT, M_WAITOK);
2176 ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
2177 ma->v[ma->len].iov_len = strlen(name) + 1;
2178 ma->len++;
2179
2180 sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
2181 va_start(ap, fmt);
2182 sbuf_vprintf(sb, fmt, ap);
2183 va_end(ap);
2184 sbuf_finish(sb);
2185 len = sbuf_len(sb) + 1;
2186 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
2187 SLIST_INSERT_HEAD(&ma->list, maa, next);
2188 bcopy(sbuf_data(sb), maa + 1, len);
2189 sbuf_delete(sb);
2190
2191 ma->v[ma->len].iov_base = maa + 1;
2192 ma->v[ma->len].iov_len = len;
2193 ma->len++;
2194
2195 return (ma);
2196 }
2197
2198 /*
2199 * Add an argument which is a userland string.
2200 */
2201 struct mntarg *
2202 mount_argsu(struct mntarg *ma, const char *name, const void *val, int len)
2203 {
2204 struct mntaarg *maa;
2205 char *tbuf;
2206
2207 if (val == NULL)
2208 return (ma);
2209 if (ma == NULL) {
2210 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
2211 SLIST_INIT(&ma->list);
2212 }
2213 if (ma->error)
2214 return (ma);
2215 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
2216 SLIST_INSERT_HEAD(&ma->list, maa, next);
2217 tbuf = (void *)(maa + 1);
2218 ma->error = copyinstr(val, tbuf, len, NULL);
2219 return (mount_arg(ma, name, tbuf, -1));
2220 }
2221
2222 /*
2223 * Plain argument.
2224 *
2225 * If length is -1, use printf.
2226 */
2227 struct mntarg *
2228 mount_arg(struct mntarg *ma, const char *name, const void *val, int len)
2229 {
2230
2231 if (ma == NULL) {
2232 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
2233 SLIST_INIT(&ma->list);
2234 }
2235 if (ma->error)
2236 return (ma);
2237
2238 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
2239 M_MOUNT, M_WAITOK);
2240 ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
2241 ma->v[ma->len].iov_len = strlen(name) + 1;
2242 ma->len++;
2243
2244 ma->v[ma->len].iov_base = (void *)(uintptr_t)val;
2245 if (len < 0)
2246 ma->v[ma->len].iov_len = strlen(val) + 1;
2247 else
2248 ma->v[ma->len].iov_len = len;
2249 ma->len++;
2250 return (ma);
2251 }
2252
2253 /*
2254 * Free a mntarg structure
2255 */
2256 static void
2257 free_mntarg(struct mntarg *ma)
2258 {
2259 struct mntaarg *maa;
2260
2261 while (!SLIST_EMPTY(&ma->list)) {
2262 maa = SLIST_FIRST(&ma->list);
2263 SLIST_REMOVE_HEAD(&ma->list, next);
2264 free(maa, M_MOUNT);
2265 }
2266 free(ma->v, M_MOUNT);
2267 free(ma, M_MOUNT);
2268 }
2269
2270 /*
2271 * Mount a filesystem
2272 */
2273 int
2274 kernel_mount(struct mntarg *ma, int flags)
2275 {
2276 struct uio auio;
2277 int error;
2278
2279 KASSERT(ma != NULL, ("kernel_mount NULL ma"));
2280 KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v"));
2281 KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len));
2282
2283 auio.uio_iov = ma->v;
2284 auio.uio_iovcnt = ma->len;
2285 auio.uio_segflg = UIO_SYSSPACE;
2286
2287 error = ma->error;
2288 if (!error)
2289 error = vfs_donmount(curthread, flags, &auio);
2290 free_mntarg(ma);
2291 return (error);
2292 }
2293
2294 /*
2295 * A printflike function to mount a filesystem.
2296 */
2297 int
2298 kernel_vmount(int flags, ...)
2299 {
2300 struct mntarg *ma = NULL;
2301 va_list ap;
2302 const char *cp;
2303 const void *vp;
2304 int error;
2305
2306 va_start(ap, flags);
2307 for (;;) {
2308 cp = va_arg(ap, const char *);
2309 if (cp == NULL)
2310 break;
2311 vp = va_arg(ap, const void *);
2312 ma = mount_arg(ma, cp, vp, -1);
2313 }
2314 va_end(ap);
2315
2316 error = kernel_mount(ma, flags);
2317 return (error);
2318 }
Cache object: 36e5fe63eec8e86e93c1c681f9c10b11
|