FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_mount.c
1 /*-
2 * Copyright (c) 1999-2004 Poul-Henning Kamp
3 * Copyright (c) 1999 Michael Smith
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39
40 #include <sys/param.h>
41 #include <sys/conf.h>
42 #include <sys/clock.h>
43 #include <sys/jail.h>
44 #include <sys/kernel.h>
45 #include <sys/libkern.h>
46 #include <sys/malloc.h>
47 #include <sys/mount.h>
48 #include <sys/mutex.h>
49 #include <sys/namei.h>
50 #include <sys/priv.h>
51 #include <sys/proc.h>
52 #include <sys/filedesc.h>
53 #include <sys/reboot.h>
54 #include <sys/syscallsubr.h>
55 #include <sys/sysproto.h>
56 #include <sys/sx.h>
57 #include <sys/sysctl.h>
58 #include <sys/sysent.h>
59 #include <sys/systm.h>
60 #include <sys/vnode.h>
61 #include <vm/uma.h>
62
63 #include <geom/geom.h>
64
65 #include <machine/stdarg.h>
66
67 #include <security/audit/audit.h>
68 #include <security/mac/mac_framework.h>
69
70 #include "opt_rootdevname.h"
71 #include "opt_ddb.h"
72 #include "opt_mac.h"
73
74 #ifdef DDB
75 #include <ddb/ddb.h>
76 #endif
77
78 #define ROOTNAME "root_device"
79 #define VFS_MOUNTARG_SIZE_MAX (1024 * 64)
80
81 static int vfs_domount(struct thread *td, const char *fstype,
82 char *fspath, int fsflags, void *fsdata);
83 static int vfs_mountroot_ask(void);
84 static int vfs_mountroot_try(const char *mountfrom);
85 static void free_mntarg(struct mntarg *ma);
86 static int vfs_getopt_pos(struct vfsoptlist *opts, const char *name);
87
88 static int usermount = 0;
89 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
90 "Unprivileged users may mount and unmount file systems");
91
92 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
93 MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker");
94 static uma_zone_t mount_zone;
95
96 /* List of mounted filesystems. */
97 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
98
99 /* For any iteration/modification of mountlist */
100 struct mtx mountlist_mtx;
101 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF);
102
103 /*
104 * The vnode of the system's root (/ in the filesystem, without chroot
105 * active.)
106 */
107 struct vnode *rootvnode;
108
109 /*
110 * The root filesystem is detailed in the kernel environment variable
111 * vfs.root.mountfrom, which is expected to be in the general format
112 *
113 * <vfsname>:[<path>]
114 * vfsname := the name of a VFS known to the kernel and capable
115 * of being mounted as root
116 * path := disk device name or other data used by the filesystem
117 * to locate its physical store
118 */
119
120 /*
121 * Global opts, taken by all filesystems
122 */
123 static const char *global_opts[] = {
124 "errmsg",
125 "fstype",
126 "fspath",
127 "ro",
128 "rw",
129 "nosuid",
130 "noexec",
131 NULL
132 };
133
134 /*
135 * The root specifiers we will try if RB_CDROM is specified.
136 */
137 static char *cdrom_rootdevnames[] = {
138 "cd9660:cd0",
139 "cd9660:acd0",
140 NULL
141 };
142
143 /* legacy find-root code */
144 char *rootdevnames[2] = {NULL, NULL};
145 #ifndef ROOTDEVNAME
146 # define ROOTDEVNAME NULL
147 #endif
148 static const char *ctrootdevname = ROOTDEVNAME;
149
150 /*
151 * ---------------------------------------------------------------------
152 * Functions for building and sanitizing the mount options
153 */
154
155 /* Remove one mount option. */
156 static void
157 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
158 {
159
160 TAILQ_REMOVE(opts, opt, link);
161 free(opt->name, M_MOUNT);
162 if (opt->value != NULL)
163 free(opt->value, M_MOUNT);
164 #ifdef INVARIANTS
165 else if (opt->len != 0)
166 panic("%s: mount option with NULL value but length != 0",
167 __func__);
168 #endif
169 free(opt, M_MOUNT);
170 }
171
172 /* Release all resources related to the mount options. */
173 void
174 vfs_freeopts(struct vfsoptlist *opts)
175 {
176 struct vfsopt *opt;
177
178 while (!TAILQ_EMPTY(opts)) {
179 opt = TAILQ_FIRST(opts);
180 vfs_freeopt(opts, opt);
181 }
182 free(opts, M_MOUNT);
183 }
184
185 void
186 vfs_deleteopt(struct vfsoptlist *opts, const char *name)
187 {
188 struct vfsopt *opt, *temp;
189
190 if (opts == NULL)
191 return;
192 TAILQ_FOREACH_SAFE(opt, opts, link, temp) {
193 if (strcmp(opt->name, name) == 0)
194 vfs_freeopt(opts, opt);
195 }
196 }
197
198 static int
199 vfs_isopt_ro(const char *opt)
200 {
201
202 if (strcmp(opt, "ro") == 0 || strcmp(opt, "rdonly") == 0 ||
203 strcmp(opt, "norw") == 0)
204 return (1);
205 return (0);
206 }
207
208 static int
209 vfs_isopt_rw(const char *opt)
210 {
211
212 if (strcmp(opt, "rw") == 0 || strcmp(opt, "noro") == 0)
213 return (1);
214 return (0);
215 }
216
217 /*
218 * Check if options are equal (with or without the "no" prefix).
219 */
220 static int
221 vfs_equalopts(const char *opt1, const char *opt2)
222 {
223
224 /* "opt" vs. "opt" or "noopt" vs. "noopt" */
225 if (strcmp(opt1, opt2) == 0)
226 return (1);
227 /* "noopt" vs. "opt" */
228 if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
229 return (1);
230 /* "opt" vs. "noopt" */
231 if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
232 return (1);
233 /* "ro" / "rdonly" / "norw" / "rw" / "noro" */
234 if ((vfs_isopt_ro(opt1) || vfs_isopt_rw(opt1)) &&
235 (vfs_isopt_ro(opt2) || vfs_isopt_rw(opt2)))
236 return (1);
237 return (0);
238 }
239
240 /*
241 * If a mount option is specified several times,
242 * (with or without the "no" prefix) only keep
243 * the last occurence of it.
244 */
245 static void
246 vfs_sanitizeopts(struct vfsoptlist *opts)
247 {
248 struct vfsopt *opt, *opt2, *tmp;
249
250 TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
251 opt2 = TAILQ_PREV(opt, vfsoptlist, link);
252 while (opt2 != NULL) {
253 if (vfs_equalopts(opt->name, opt2->name)) {
254 tmp = TAILQ_PREV(opt2, vfsoptlist, link);
255 vfs_freeopt(opts, opt2);
256 opt2 = tmp;
257 } else {
258 opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
259 }
260 }
261 }
262 }
263
264 /*
265 * Build a linked list of mount options from a struct uio.
266 */
267 static int
268 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
269 {
270 struct vfsoptlist *opts;
271 struct vfsopt *opt;
272 size_t memused;
273 unsigned int i, iovcnt;
274 int error, namelen, optlen;
275
276 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
277 TAILQ_INIT(opts);
278 memused = 0;
279 iovcnt = auio->uio_iovcnt;
280 for (i = 0; i < iovcnt; i += 2) {
281 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
282 namelen = auio->uio_iov[i].iov_len;
283 optlen = auio->uio_iov[i + 1].iov_len;
284 opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
285 opt->value = NULL;
286 opt->len = 0;
287
288 /*
289 * Do this early, so jumps to "bad" will free the current
290 * option.
291 */
292 TAILQ_INSERT_TAIL(opts, opt, link);
293 memused += sizeof(struct vfsopt) + optlen + namelen;
294
295 /*
296 * Avoid consuming too much memory, and attempts to overflow
297 * memused.
298 */
299 if (memused > VFS_MOUNTARG_SIZE_MAX ||
300 optlen > VFS_MOUNTARG_SIZE_MAX ||
301 namelen > VFS_MOUNTARG_SIZE_MAX) {
302 error = EINVAL;
303 goto bad;
304 }
305
306 if (auio->uio_segflg == UIO_SYSSPACE) {
307 bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
308 } else {
309 error = copyin(auio->uio_iov[i].iov_base, opt->name,
310 namelen);
311 if (error)
312 goto bad;
313 }
314 /* Ensure names are null-terminated strings. */
315 if (opt->name[namelen - 1] != '\0') {
316 error = EINVAL;
317 goto bad;
318 }
319 if (optlen != 0) {
320 opt->len = optlen;
321 opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
322 if (auio->uio_segflg == UIO_SYSSPACE) {
323 bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
324 optlen);
325 } else {
326 error = copyin(auio->uio_iov[i + 1].iov_base,
327 opt->value, optlen);
328 if (error)
329 goto bad;
330 }
331 }
332 }
333 vfs_sanitizeopts(opts);
334 *options = opts;
335 return (0);
336 bad:
337 vfs_freeopts(opts);
338 return (error);
339 }
340
341 /*
342 * Merge the old mount options with the new ones passed
343 * in the MNT_UPDATE case.
344 */
345 static void
346 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
347 {
348 struct vfsopt *opt, *opt2, *new;
349
350 TAILQ_FOREACH(opt, opts, link) {
351 /*
352 * Check that this option hasn't been redefined
353 * nor cancelled with a "no" mount option.
354 */
355 opt2 = TAILQ_FIRST(toopts);
356 while (opt2 != NULL) {
357 if (strcmp(opt2->name, opt->name) == 0)
358 goto next;
359 if (strncmp(opt2->name, "no", 2) == 0 &&
360 strcmp(opt2->name + 2, opt->name) == 0) {
361 vfs_freeopt(toopts, opt2);
362 goto next;
363 }
364 opt2 = TAILQ_NEXT(opt2, link);
365 }
366 /* We want this option, duplicate it. */
367 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
368 new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
369 strcpy(new->name, opt->name);
370 if (opt->len != 0) {
371 new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
372 bcopy(opt->value, new->value, opt->len);
373 } else {
374 new->value = NULL;
375 }
376 new->len = opt->len;
377 TAILQ_INSERT_TAIL(toopts, new, link);
378 next:
379 continue;
380 }
381 }
382
383 /*
384 * Mount a filesystem.
385 */
386 int
387 nmount(td, uap)
388 struct thread *td;
389 struct nmount_args /* {
390 struct iovec *iovp;
391 unsigned int iovcnt;
392 int flags;
393 } */ *uap;
394 {
395 struct uio *auio;
396 struct iovec *iov;
397 unsigned int i;
398 int error;
399 u_int iovcnt;
400
401 AUDIT_ARG(fflags, uap->flags);
402
403 /*
404 * Filter out MNT_ROOTFS. We do not want clients of nmount() in
405 * userspace to set this flag, but we must filter it out if we want
406 * MNT_UPDATE on the root file system to work.
407 * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
408 */
409 uap->flags &= ~MNT_ROOTFS;
410
411 iovcnt = uap->iovcnt;
412 /*
413 * Check that we have an even number of iovec's
414 * and that we have at least two options.
415 */
416 if ((iovcnt & 1) || (iovcnt < 4))
417 return (EINVAL);
418
419 error = copyinuio(uap->iovp, iovcnt, &auio);
420 if (error)
421 return (error);
422 iov = auio->uio_iov;
423 for (i = 0; i < iovcnt; i++) {
424 if (iov->iov_len > MMAXOPTIONLEN) {
425 free(auio, M_IOV);
426 return (EINVAL);
427 }
428 iov++;
429 }
430 error = vfs_donmount(td, uap->flags, auio);
431
432 free(auio, M_IOV);
433 return (error);
434 }
435
436 /*
437 * ---------------------------------------------------------------------
438 * Various utility functions
439 */
440
441 void
442 vfs_ref(struct mount *mp)
443 {
444
445 MNT_ILOCK(mp);
446 MNT_REF(mp);
447 MNT_IUNLOCK(mp);
448 }
449
450 void
451 vfs_rel(struct mount *mp)
452 {
453
454 MNT_ILOCK(mp);
455 MNT_REL(mp);
456 MNT_IUNLOCK(mp);
457 }
458
459 static int
460 mount_init(void *mem, int size, int flags)
461 {
462 struct mount *mp;
463
464 mp = (struct mount *)mem;
465 mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
466 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
467 lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
468 return (0);
469 }
470
471 static void
472 mount_fini(void *mem, int size)
473 {
474 struct mount *mp;
475
476 mp = (struct mount *)mem;
477 lockdestroy(&mp->mnt_explock);
478 lockdestroy(&mp->mnt_lock);
479 mtx_destroy(&mp->mnt_mtx);
480 }
481
482 /*
483 * Allocate and initialize the mount point struct.
484 */
485 struct mount *
486 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp,
487 const char *fspath, struct thread *td)
488 {
489 struct mount *mp;
490
491 mp = uma_zalloc(mount_zone, M_WAITOK);
492 bzero(&mp->mnt_startzero,
493 __rangeof(struct mount, mnt_startzero, mnt_endzero));
494 TAILQ_INIT(&mp->mnt_nvnodelist);
495 mp->mnt_nvnodelistsize = 0;
496 mp->mnt_ref = 0;
497 (void) vfs_busy(mp, LK_NOWAIT, 0, td);
498 mp->mnt_op = vfsp->vfc_vfsops;
499 mp->mnt_vfc = vfsp;
500 vfsp->vfc_refcount++; /* XXX Unlocked */
501 mp->mnt_stat.f_type = vfsp->vfc_typenum;
502 mp->mnt_gen++;
503 strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
504 mp->mnt_vnodecovered = vp;
505 mp->mnt_cred = crdup(td->td_ucred);
506 mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
507 strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
508 mp->mnt_iosize_max = DFLTPHYS;
509 #ifdef MAC
510 mac_init_mount(mp);
511 mac_create_mount(td->td_ucred, mp);
512 #endif
513 arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
514 return (mp);
515 }
516
517 /*
518 * Destroy the mount struct previously allocated by vfs_mount_alloc().
519 */
520 void
521 vfs_mount_destroy(struct mount *mp)
522 {
523 int i;
524
525 MNT_ILOCK(mp);
526 mp->mnt_kern_flag |= MNTK_REFEXPIRE;
527 if (mp->mnt_kern_flag & MNTK_MWAIT) {
528 mp->mnt_kern_flag &= ~MNTK_MWAIT;
529 wakeup(mp);
530 }
531 for (i = 0; mp->mnt_ref && i < 3; i++)
532 msleep(mp, MNT_MTX(mp), PVFS, "mntref", hz);
533 /*
534 * This will always cause a 3 second delay in rebooting due to
535 * refs on the root mountpoint that never go away. Most of these
536 * are held by init which never exits.
537 */
538 if (i == 3 && (!rebooting || bootverbose))
539 printf("Mount point %s had %d dangling refs\n",
540 mp->mnt_stat.f_mntonname, mp->mnt_ref);
541 if (mp->mnt_holdcnt != 0) {
542 printf("Waiting for mount point to be unheld\n");
543 while (mp->mnt_holdcnt != 0) {
544 mp->mnt_holdcntwaiters++;
545 msleep(&mp->mnt_holdcnt, MNT_MTX(mp),
546 PZERO, "mntdestroy", 0);
547 mp->mnt_holdcntwaiters--;
548 }
549 printf("mount point unheld\n");
550 }
551 if (mp->mnt_writeopcount > 0) {
552 printf("Waiting for mount point write ops\n");
553 while (mp->mnt_writeopcount > 0) {
554 mp->mnt_kern_flag |= MNTK_SUSPEND;
555 msleep(&mp->mnt_writeopcount,
556 MNT_MTX(mp),
557 PZERO, "mntdestroy2", 0);
558 }
559 printf("mount point write ops completed\n");
560 }
561 if (mp->mnt_secondary_writes > 0) {
562 printf("Waiting for mount point secondary write ops\n");
563 while (mp->mnt_secondary_writes > 0) {
564 mp->mnt_kern_flag |= MNTK_SUSPEND;
565 msleep(&mp->mnt_secondary_writes,
566 MNT_MTX(mp),
567 PZERO, "mntdestroy3", 0);
568 }
569 printf("mount point secondary write ops completed\n");
570 }
571 MNT_IUNLOCK(mp);
572 mp->mnt_vfc->vfc_refcount--;
573 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
574 struct vnode *vp;
575
576 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes)
577 vprint("", vp);
578 panic("unmount: dangling vnode");
579 }
580 MNT_ILOCK(mp);
581 if (mp->mnt_kern_flag & MNTK_MWAIT)
582 wakeup(mp);
583 if (mp->mnt_writeopcount != 0)
584 panic("vfs_mount_destroy: nonzero writeopcount");
585 if (mp->mnt_secondary_writes != 0)
586 panic("vfs_mount_destroy: nonzero secondary_writes");
587 if (mp->mnt_nvnodelistsize != 0)
588 panic("vfs_mount_destroy: nonzero nvnodelistsize");
589 mp->mnt_writeopcount = -1000;
590 mp->mnt_nvnodelistsize = -1000;
591 mp->mnt_secondary_writes = -1000;
592 MNT_IUNLOCK(mp);
593 #ifdef MAC
594 mac_destroy_mount(mp);
595 #endif
596 if (mp->mnt_opt != NULL)
597 vfs_freeopts(mp->mnt_opt);
598 crfree(mp->mnt_cred);
599 uma_zfree(mount_zone, mp);
600 }
601
602 int
603 vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions)
604 {
605 struct vfsoptlist *optlist;
606 struct vfsopt *opt, *noro_opt, *tmp_opt;
607 char *fstype, *fspath, *errmsg;
608 int error, fstypelen, fspathlen, errmsg_len, errmsg_pos;
609 int has_rw, has_noro;
610
611 errmsg = NULL;
612 errmsg_len = 0;
613 errmsg_pos = -1;
614 has_rw = 0;
615 has_noro = 0;
616
617 error = vfs_buildopts(fsoptions, &optlist);
618 if (error)
619 return (error);
620
621 if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0)
622 errmsg_pos = vfs_getopt_pos(optlist, "errmsg");
623
624 /*
625 * We need these two options before the others,
626 * and they are mandatory for any filesystem.
627 * Ensure they are NUL terminated as well.
628 */
629 fstypelen = 0;
630 error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
631 if (error || fstype[fstypelen - 1] != '\0') {
632 error = EINVAL;
633 if (errmsg != NULL)
634 strncpy(errmsg, "Invalid fstype", errmsg_len);
635 goto bail;
636 }
637 fspathlen = 0;
638 error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
639 if (error || fspath[fspathlen - 1] != '\0') {
640 error = EINVAL;
641 if (errmsg != NULL)
642 strncpy(errmsg, "Invalid fspath", errmsg_len);
643 goto bail;
644 }
645
646 /*
647 * We need to see if we have the "update" option
648 * before we call vfs_domount(), since vfs_domount() has special
649 * logic based on MNT_UPDATE. This is very important
650 * when we want to update the root filesystem.
651 */
652 TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) {
653 if (strcmp(opt->name, "update") == 0) {
654 fsflags |= MNT_UPDATE;
655 vfs_freeopt(optlist, opt);
656 }
657 else if (strcmp(opt->name, "async") == 0)
658 fsflags |= MNT_ASYNC;
659 else if (strcmp(opt->name, "force") == 0) {
660 fsflags |= MNT_FORCE;
661 vfs_freeopt(optlist, opt);
662 }
663 else if (strcmp(opt->name, "reload") == 0) {
664 fsflags |= MNT_RELOAD;
665 vfs_freeopt(optlist, opt);
666 }
667 else if (strcmp(opt->name, "multilabel") == 0)
668 fsflags |= MNT_MULTILABEL;
669 else if (strcmp(opt->name, "noasync") == 0)
670 fsflags &= ~MNT_ASYNC;
671 else if (strcmp(opt->name, "noatime") == 0)
672 fsflags |= MNT_NOATIME;
673 else if (strcmp(opt->name, "atime") == 0) {
674 free(opt->name, M_MOUNT);
675 opt->name = strdup("nonoatime", M_MOUNT);
676 }
677 else if (strcmp(opt->name, "noclusterr") == 0)
678 fsflags |= MNT_NOCLUSTERR;
679 else if (strcmp(opt->name, "clusterr") == 0) {
680 free(opt->name, M_MOUNT);
681 opt->name = strdup("nonoclusterr", M_MOUNT);
682 }
683 else if (strcmp(opt->name, "noclusterw") == 0)
684 fsflags |= MNT_NOCLUSTERW;
685 else if (strcmp(opt->name, "clusterw") == 0) {
686 free(opt->name, M_MOUNT);
687 opt->name = strdup("nonoclusterw", M_MOUNT);
688 }
689 else if (strcmp(opt->name, "noexec") == 0)
690 fsflags |= MNT_NOEXEC;
691 else if (strcmp(opt->name, "exec") == 0) {
692 free(opt->name, M_MOUNT);
693 opt->name = strdup("nonoexec", M_MOUNT);
694 }
695 else if (strcmp(opt->name, "nosuid") == 0)
696 fsflags |= MNT_NOSUID;
697 else if (strcmp(opt->name, "suid") == 0) {
698 free(opt->name, M_MOUNT);
699 opt->name = strdup("nonosuid", M_MOUNT);
700 }
701 else if (strcmp(opt->name, "nosymfollow") == 0)
702 fsflags |= MNT_NOSYMFOLLOW;
703 else if (strcmp(opt->name, "symfollow") == 0) {
704 free(opt->name, M_MOUNT);
705 opt->name = strdup("nonosymfollow", M_MOUNT);
706 }
707 else if (strcmp(opt->name, "noro") == 0) {
708 fsflags &= ~MNT_RDONLY;
709 has_noro = 1;
710 }
711 else if (strcmp(opt->name, "rw") == 0) {
712 fsflags &= ~MNT_RDONLY;
713 has_rw = 1;
714 }
715 else if (strcmp(opt->name, "ro") == 0)
716 fsflags |= MNT_RDONLY;
717 else if (strcmp(opt->name, "rdonly") == 0) {
718 free(opt->name, M_MOUNT);
719 opt->name = strdup("ro", M_MOUNT);
720 fsflags |= MNT_RDONLY;
721 }
722 else if (strcmp(opt->name, "suiddir") == 0)
723 fsflags |= MNT_SUIDDIR;
724 else if (strcmp(opt->name, "sync") == 0)
725 fsflags |= MNT_SYNCHRONOUS;
726 else if (strcmp(opt->name, "union") == 0)
727 fsflags |= MNT_UNION;
728 }
729
730 /*
731 * If "rw" was specified as a mount option, and we
732 * are trying to update a mount-point from "ro" to "rw",
733 * we need a mount option "noro", since in vfs_mergeopts(),
734 * "noro" will cancel "ro", but "rw" will not do anything.
735 */
736 if (has_rw && !has_noro) {
737 noro_opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
738 noro_opt->name = strdup("noro", M_MOUNT);
739 noro_opt->value = NULL;
740 noro_opt->len = 0;
741 TAILQ_INSERT_TAIL(optlist, noro_opt, link);
742 }
743
744 /*
745 * Be ultra-paranoid about making sure the type and fspath
746 * variables will fit in our mp buffers, including the
747 * terminating NUL.
748 */
749 if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
750 error = ENAMETOOLONG;
751 goto bail;
752 }
753
754 mtx_lock(&Giant);
755 error = vfs_domount(td, fstype, fspath, fsflags, optlist);
756 mtx_unlock(&Giant);
757 bail:
758 /* copyout the errmsg */
759 if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt)
760 && errmsg_len > 0 && errmsg != NULL) {
761 if (fsoptions->uio_segflg == UIO_SYSSPACE) {
762 bcopy(errmsg,
763 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
764 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
765 } else {
766 copyout(errmsg,
767 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
768 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
769 }
770 }
771
772 if (error != 0)
773 vfs_freeopts(optlist);
774 return (error);
775 }
776
777 /*
778 * Old mount API.
779 */
780 #ifndef _SYS_SYSPROTO_H_
781 struct mount_args {
782 char *type;
783 char *path;
784 int flags;
785 caddr_t data;
786 };
787 #endif
788 /* ARGSUSED */
789 int
790 mount(td, uap)
791 struct thread *td;
792 struct mount_args /* {
793 char *type;
794 char *path;
795 int flags;
796 caddr_t data;
797 } */ *uap;
798 {
799 char *fstype;
800 struct vfsconf *vfsp = NULL;
801 struct mntarg *ma = NULL;
802 int error;
803
804 AUDIT_ARG(fflags, uap->flags);
805
806 /*
807 * Filter out MNT_ROOTFS. We do not want clients of mount() in
808 * userspace to set this flag, but we must filter it out if we want
809 * MNT_UPDATE on the root file system to work.
810 * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
811 */
812 uap->flags &= ~MNT_ROOTFS;
813
814 fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
815 error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
816 if (error) {
817 free(fstype, M_TEMP);
818 return (error);
819 }
820
821 AUDIT_ARG(text, fstype);
822 mtx_lock(&Giant);
823 vfsp = vfs_byname_kld(fstype, td, &error);
824 free(fstype, M_TEMP);
825 if (vfsp == NULL) {
826 mtx_unlock(&Giant);
827 return (ENOENT);
828 }
829 if (vfsp->vfc_vfsops->vfs_cmount == NULL) {
830 mtx_unlock(&Giant);
831 return (EOPNOTSUPP);
832 }
833
834 ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN);
835 ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
836 ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro");
837 ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid");
838 ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec");
839
840 error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td);
841 mtx_unlock(&Giant);
842 return (error);
843 }
844
845
846 /*
847 * vfs_domount(): actually attempt a filesystem mount.
848 */
849 static int
850 vfs_domount(
851 struct thread *td, /* Calling thread. */
852 const char *fstype, /* Filesystem type. */
853 char *fspath, /* Mount path. */
854 int fsflags, /* Flags common to all filesystems. */
855 void *fsdata /* Options local to the filesystem. */
856 )
857 {
858 struct vnode *vp;
859 struct mount *mp;
860 struct vfsconf *vfsp;
861 struct export_args export;
862 int error, flag = 0;
863 struct vattr va;
864 struct nameidata nd;
865
866 mtx_assert(&Giant, MA_OWNED);
867 /*
868 * Be ultra-paranoid about making sure the type and fspath
869 * variables will fit in our mp buffers, including the
870 * terminating NUL.
871 */
872 if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
873 return (ENAMETOOLONG);
874
875 if (jailed(td->td_ucred) || usermount == 0) {
876 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0)
877 return (error);
878 }
879
880 /*
881 * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
882 */
883 if (fsflags & MNT_EXPORTED) {
884 error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED);
885 if (error)
886 return (error);
887 }
888 if (fsflags & MNT_SUIDDIR) {
889 error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR);
890 if (error)
891 return (error);
892 }
893 /*
894 * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users.
895 */
896 if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) {
897 if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0)
898 fsflags |= MNT_NOSUID | MNT_USER;
899 }
900
901 /* Load KLDs before we lock the covered vnode to avoid reversals. */
902 vfsp = NULL;
903 if ((fsflags & MNT_UPDATE) == 0) {
904 /* Don't try to load KLDs if we're mounting the root. */
905 if (fsflags & MNT_ROOTFS)
906 vfsp = vfs_byname(fstype);
907 else
908 vfsp = vfs_byname_kld(fstype, td, &error);
909 if (vfsp == NULL)
910 return (ENODEV);
911 if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL))
912 return (EPERM);
913 }
914 /*
915 * Get vnode to be covered
916 */
917 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE,
918 fspath, td);
919 if ((error = namei(&nd)) != 0)
920 return (error);
921 NDFREE(&nd, NDF_ONLY_PNBUF);
922 vp = nd.ni_vp;
923 if (fsflags & MNT_UPDATE) {
924 if ((vp->v_vflag & VV_ROOT) == 0) {
925 vput(vp);
926 return (EINVAL);
927 }
928 mp = vp->v_mount;
929 MNT_ILOCK(mp);
930 flag = mp->mnt_flag;
931 /*
932 * We only allow the filesystem to be reloaded if it
933 * is currently mounted read-only.
934 */
935 if ((fsflags & MNT_RELOAD) &&
936 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
937 MNT_IUNLOCK(mp);
938 vput(vp);
939 return (EOPNOTSUPP); /* Needs translation */
940 }
941 MNT_IUNLOCK(mp);
942 /*
943 * Only privileged root, or (if MNT_USER is set) the user that
944 * did the original mount is permitted to update it.
945 */
946 error = vfs_suser(mp, td);
947 if (error) {
948 vput(vp);
949 return (error);
950 }
951 if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
952 vput(vp);
953 return (EBUSY);
954 }
955 VI_LOCK(vp);
956 if ((vp->v_iflag & VI_MOUNT) != 0 ||
957 vp->v_mountedhere != NULL) {
958 VI_UNLOCK(vp);
959 vfs_unbusy(mp, td);
960 vput(vp);
961 return (EBUSY);
962 }
963 vp->v_iflag |= VI_MOUNT;
964 VI_UNLOCK(vp);
965 MNT_ILOCK(mp);
966 mp->mnt_flag |= fsflags &
967 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS);
968 MNT_IUNLOCK(mp);
969 VOP_UNLOCK(vp, 0, td);
970 mp->mnt_optnew = fsdata;
971 vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
972 } else {
973 /*
974 * If the user is not root, ensure that they own the directory
975 * onto which we are attempting to mount.
976 */
977 error = VOP_GETATTR(vp, &va, td->td_ucred, td);
978 if (error) {
979 vput(vp);
980 return (error);
981 }
982 if (va.va_uid != td->td_ucred->cr_uid) {
983 error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN,
984 0);
985 if (error) {
986 vput(vp);
987 return (error);
988 }
989 }
990 error = vinvalbuf(vp, V_SAVE, td, 0, 0);
991 if (error != 0) {
992 vput(vp);
993 return (error);
994 }
995 if (vp->v_type != VDIR) {
996 vput(vp);
997 return (ENOTDIR);
998 }
999 VI_LOCK(vp);
1000 if ((vp->v_iflag & VI_MOUNT) != 0 ||
1001 vp->v_mountedhere != NULL) {
1002 VI_UNLOCK(vp);
1003 vput(vp);
1004 return (EBUSY);
1005 }
1006 vp->v_iflag |= VI_MOUNT;
1007 VI_UNLOCK(vp);
1008
1009 /*
1010 * Allocate and initialize the filesystem.
1011 */
1012 mp = vfs_mount_alloc(vp, vfsp, fspath, td);
1013 VOP_UNLOCK(vp, 0, td);
1014
1015 /* XXXMAC: pass to vfs_mount_alloc? */
1016 mp->mnt_optnew = fsdata;
1017 }
1018
1019 /*
1020 * Set the mount level flags.
1021 */
1022 MNT_ILOCK(mp);
1023 mp->mnt_flag = (mp->mnt_flag & ~MNT_UPDATEMASK) |
1024 (fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS |
1025 MNT_RDONLY));
1026 if ((mp->mnt_flag & MNT_ASYNC) == 0)
1027 mp->mnt_kern_flag &= ~MNTK_ASYNC;
1028 MNT_IUNLOCK(mp);
1029 /*
1030 * Mount the filesystem.
1031 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
1032 * get. No freeing of cn_pnbuf.
1033 */
1034 error = VFS_MOUNT(mp, td);
1035
1036 /*
1037 * Process the export option only if we are
1038 * updating mount options.
1039 */
1040 if (!error && (fsflags & MNT_UPDATE)) {
1041 if (vfs_copyopt(mp->mnt_optnew, "export", &export,
1042 sizeof(export)) == 0)
1043 error = vfs_export(mp, &export);
1044 }
1045
1046 if (!error) {
1047 if (mp->mnt_opt != NULL)
1048 vfs_freeopts(mp->mnt_opt);
1049 mp->mnt_opt = mp->mnt_optnew;
1050 (void)VFS_STATFS(mp, &mp->mnt_stat, td);
1051 }
1052 /*
1053 * Prevent external consumers of mount options from reading
1054 * mnt_optnew.
1055 */
1056 mp->mnt_optnew = NULL;
1057 if (mp->mnt_flag & MNT_UPDATE) {
1058 MNT_ILOCK(mp);
1059 if (error)
1060 mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) |
1061 (flag & ~MNT_QUOTA);
1062 else
1063 mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD |
1064 MNT_FORCE | MNT_SNAPSHOT);
1065 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
1066 mp->mnt_kern_flag |= MNTK_ASYNC;
1067 else
1068 mp->mnt_kern_flag &= ~MNTK_ASYNC;
1069 MNT_IUNLOCK(mp);
1070 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1071 if (mp->mnt_syncer == NULL)
1072 error = vfs_allocate_syncvnode(mp);
1073 } else {
1074 if (mp->mnt_syncer != NULL)
1075 vrele(mp->mnt_syncer);
1076 mp->mnt_syncer = NULL;
1077 }
1078 vfs_unbusy(mp, td);
1079 VI_LOCK(vp);
1080 vp->v_iflag &= ~VI_MOUNT;
1081 VI_UNLOCK(vp);
1082 vrele(vp);
1083 return (error);
1084 }
1085 MNT_ILOCK(mp);
1086 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
1087 mp->mnt_kern_flag |= MNTK_ASYNC;
1088 else
1089 mp->mnt_kern_flag &= ~MNTK_ASYNC;
1090 MNT_IUNLOCK(mp);
1091 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1092 /*
1093 * Put the new filesystem on the mount list after root.
1094 */
1095 cache_purge(vp);
1096 if (!error) {
1097 struct vnode *newdp;
1098
1099 VI_LOCK(vp);
1100 vp->v_iflag &= ~VI_MOUNT;
1101 VI_UNLOCK(vp);
1102 vp->v_mountedhere = mp;
1103 mtx_lock(&mountlist_mtx);
1104 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1105 mtx_unlock(&mountlist_mtx);
1106 vfs_event_signal(NULL, VQ_MOUNT, 0);
1107 if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp, td))
1108 panic("mount: lost mount");
1109 VOP_UNLOCK(newdp, 0, td);
1110 VOP_UNLOCK(vp, 0, td);
1111 mountcheckdirs(vp, newdp);
1112 vrele(newdp);
1113 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1114 error = vfs_allocate_syncvnode(mp);
1115 vfs_unbusy(mp, td);
1116 if (error)
1117 vrele(vp);
1118 } else {
1119 VI_LOCK(vp);
1120 vp->v_iflag &= ~VI_MOUNT;
1121 VI_UNLOCK(vp);
1122 vfs_unbusy(mp, td);
1123 vfs_mount_destroy(mp);
1124 vput(vp);
1125 }
1126 return (error);
1127 }
1128
1129 /*
1130 * Unmount a filesystem.
1131 *
1132 * Note: unmount takes a path to the vnode mounted on as argument, not
1133 * special file (as before).
1134 */
1135 #ifndef _SYS_SYSPROTO_H_
1136 struct unmount_args {
1137 char *path;
1138 int flags;
1139 };
1140 #endif
1141 /* ARGSUSED */
1142 int
1143 unmount(td, uap)
1144 struct thread *td;
1145 register struct unmount_args /* {
1146 char *path;
1147 int flags;
1148 } */ *uap;
1149 {
1150 struct mount *mp;
1151 char *pathbuf;
1152 int error, id0, id1;
1153
1154 if (jailed(td->td_ucred) || usermount == 0) {
1155 error = priv_check(td, PRIV_VFS_UNMOUNT);
1156 if (error)
1157 return (error);
1158 }
1159
1160 pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1161 error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL);
1162 if (error) {
1163 free(pathbuf, M_TEMP);
1164 return (error);
1165 }
1166 AUDIT_ARG(upath, td, pathbuf, ARG_UPATH1);
1167 mtx_lock(&Giant);
1168 if (uap->flags & MNT_BYFSID) {
1169 /* Decode the filesystem ID. */
1170 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
1171 mtx_unlock(&Giant);
1172 free(pathbuf, M_TEMP);
1173 return (EINVAL);
1174 }
1175
1176 mtx_lock(&mountlist_mtx);
1177 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
1178 if (mp->mnt_stat.f_fsid.val[0] == id0 &&
1179 mp->mnt_stat.f_fsid.val[1] == id1)
1180 break;
1181 }
1182 mtx_unlock(&mountlist_mtx);
1183 } else {
1184 mtx_lock(&mountlist_mtx);
1185 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
1186 if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0)
1187 break;
1188 }
1189 mtx_unlock(&mountlist_mtx);
1190 }
1191 free(pathbuf, M_TEMP);
1192 if (mp == NULL) {
1193 /*
1194 * Previously we returned ENOENT for a nonexistent path and
1195 * EINVAL for a non-mountpoint. We cannot tell these apart
1196 * now, so in the !MNT_BYFSID case return the more likely
1197 * EINVAL for compatibility.
1198 */
1199 mtx_unlock(&Giant);
1200 return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL);
1201 }
1202
1203 /*
1204 * Don't allow unmounting the root filesystem.
1205 */
1206 if (mp->mnt_flag & MNT_ROOTFS) {
1207 mtx_unlock(&Giant);
1208 return (EINVAL);
1209 }
1210 error = dounmount(mp, uap->flags, td);
1211 mtx_unlock(&Giant);
1212 return (error);
1213 }
1214
1215 /*
1216 * Do the actual filesystem unmount.
1217 */
1218 int
1219 dounmount(mp, flags, td)
1220 struct mount *mp;
1221 int flags;
1222 struct thread *td;
1223 {
1224 struct vnode *coveredvp, *fsrootvp;
1225 int error;
1226 int async_flag;
1227 int mnt_gen_r;
1228
1229 mtx_assert(&Giant, MA_OWNED);
1230
1231 if ((coveredvp = mp->mnt_vnodecovered) != NULL) {
1232 mnt_gen_r = mp->mnt_gen;
1233 VI_LOCK(coveredvp);
1234 vholdl(coveredvp);
1235 vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, td);
1236 vdrop(coveredvp);
1237 /*
1238 * Check for mp being unmounted while waiting for the
1239 * covered vnode lock.
1240 */
1241 if (coveredvp->v_mountedhere != mp ||
1242 coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) {
1243 VOP_UNLOCK(coveredvp, 0, td);
1244 return (EBUSY);
1245 }
1246 }
1247 /*
1248 * Only privileged root, or (if MNT_USER is set) the user that did the
1249 * original mount is permitted to unmount this filesystem.
1250 */
1251 error = vfs_suser(mp, td);
1252 if (error) {
1253 if (coveredvp)
1254 VOP_UNLOCK(coveredvp, 0, td);
1255 return (error);
1256 }
1257
1258 MNT_ILOCK(mp);
1259 if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1260 MNT_IUNLOCK(mp);
1261 if (coveredvp)
1262 VOP_UNLOCK(coveredvp, 0, td);
1263 return (EBUSY);
1264 }
1265 mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_NOINSMNTQ;
1266 /* Allow filesystems to detect that a forced unmount is in progress. */
1267 if (flags & MNT_FORCE)
1268 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
1269 error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
1270 ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), MNT_MTX(mp), td);
1271 if (error) {
1272 MNT_ILOCK(mp);
1273 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_NOINSMNTQ |
1274 MNTK_UNMOUNTF);
1275 if (mp->mnt_kern_flag & MNTK_MWAIT)
1276 wakeup(mp);
1277 MNT_IUNLOCK(mp);
1278 if (coveredvp)
1279 VOP_UNLOCK(coveredvp, 0, td);
1280 return (error);
1281 }
1282 vn_start_write(NULL, &mp, V_WAIT);
1283
1284 if (mp->mnt_flag & MNT_EXPUBLIC)
1285 vfs_setpublicfs(NULL, NULL, NULL);
1286
1287 vfs_msync(mp, MNT_WAIT);
1288 MNT_ILOCK(mp);
1289 async_flag = mp->mnt_flag & MNT_ASYNC;
1290 mp->mnt_flag &= ~MNT_ASYNC;
1291 mp->mnt_kern_flag &= ~MNTK_ASYNC;
1292 MNT_IUNLOCK(mp);
1293 cache_purgevfs(mp); /* remove cache entries for this file sys */
1294 if (mp->mnt_syncer != NULL)
1295 vrele(mp->mnt_syncer);
1296 /*
1297 * For forced unmounts, move process cdir/rdir refs on the fs root
1298 * vnode to the covered vnode. For non-forced unmounts we want
1299 * such references to cause an EBUSY error.
1300 */
1301 if ((flags & MNT_FORCE) &&
1302 VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
1303 if (mp->mnt_vnodecovered != NULL)
1304 mountcheckdirs(fsrootvp, mp->mnt_vnodecovered);
1305 if (fsrootvp == rootvnode) {
1306 vrele(rootvnode);
1307 rootvnode = NULL;
1308 }
1309 vput(fsrootvp);
1310 }
1311 if (((mp->mnt_flag & MNT_RDONLY) ||
1312 (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) ||
1313 (flags & MNT_FORCE)) {
1314 error = VFS_UNMOUNT(mp, flags, td);
1315 }
1316 vn_finished_write(mp);
1317 /*
1318 * If we failed to flush the dirty blocks for this mount point,
1319 * undo all the cdir/rdir and rootvnode changes we made above.
1320 * Unless we failed to do so because the device is reporting that
1321 * it doesn't exist anymore.
1322 */
1323 if (error && error != ENXIO) {
1324 if ((flags & MNT_FORCE) &&
1325 VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
1326 if (mp->mnt_vnodecovered != NULL)
1327 mountcheckdirs(mp->mnt_vnodecovered, fsrootvp);
1328 if (rootvnode == NULL) {
1329 rootvnode = fsrootvp;
1330 vref(rootvnode);
1331 }
1332 vput(fsrootvp);
1333 }
1334 MNT_ILOCK(mp);
1335 mp->mnt_kern_flag &= ~MNTK_NOINSMNTQ;
1336 if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) {
1337 MNT_IUNLOCK(mp);
1338 (void) vfs_allocate_syncvnode(mp);
1339 MNT_ILOCK(mp);
1340 }
1341 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1342 mp->mnt_flag |= async_flag;
1343 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
1344 mp->mnt_kern_flag |= MNTK_ASYNC;
1345 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
1346 if (mp->mnt_kern_flag & MNTK_MWAIT)
1347 wakeup(mp);
1348 MNT_IUNLOCK(mp);
1349 if (coveredvp)
1350 VOP_UNLOCK(coveredvp, 0, td);
1351 return (error);
1352 }
1353 mtx_lock(&mountlist_mtx);
1354 TAILQ_REMOVE(&mountlist, mp, mnt_list);
1355 mtx_unlock(&mountlist_mtx);
1356 if (coveredvp != NULL) {
1357 coveredvp->v_mountedhere = NULL;
1358 vput(coveredvp);
1359 }
1360 vfs_event_signal(NULL, VQ_UNMOUNT, 0);
1361 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
1362 vfs_mount_destroy(mp);
1363 return (0);
1364 }
1365
1366 /*
1367 * ---------------------------------------------------------------------
1368 * Mounting of root filesystem
1369 *
1370 */
1371
1372 struct root_hold_token {
1373 const char *who;
1374 LIST_ENTRY(root_hold_token) list;
1375 };
1376
1377 static LIST_HEAD(, root_hold_token) root_holds =
1378 LIST_HEAD_INITIALIZER(&root_holds);
1379
1380 static int root_mount_complete;
1381
1382 /*
1383 * Hold root mount.
1384 */
1385 struct root_hold_token *
1386 root_mount_hold(const char *identifier)
1387 {
1388 struct root_hold_token *h;
1389
1390 h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
1391 h->who = identifier;
1392 mtx_lock(&mountlist_mtx);
1393 LIST_INSERT_HEAD(&root_holds, h, list);
1394 mtx_unlock(&mountlist_mtx);
1395 return (h);
1396 }
1397
1398 /*
1399 * Release root mount.
1400 */
1401 void
1402 root_mount_rel(struct root_hold_token *h)
1403 {
1404
1405 mtx_lock(&mountlist_mtx);
1406 LIST_REMOVE(h, list);
1407 wakeup(&root_holds);
1408 mtx_unlock(&mountlist_mtx);
1409 free(h, M_DEVBUF);
1410 }
1411
1412 /*
1413 * Wait for all subsystems to release root mount.
1414 */
1415 static void
1416 root_mount_prepare(void)
1417 {
1418 struct root_hold_token *h;
1419
1420 for (;;) {
1421 DROP_GIANT();
1422 g_waitidle();
1423 PICKUP_GIANT();
1424 mtx_lock(&mountlist_mtx);
1425 if (LIST_EMPTY(&root_holds)) {
1426 mtx_unlock(&mountlist_mtx);
1427 break;
1428 }
1429 printf("Root mount waiting for:");
1430 LIST_FOREACH(h, &root_holds, list)
1431 printf(" %s", h->who);
1432 printf("\n");
1433 msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold",
1434 hz);
1435 }
1436 }
1437
1438 /*
1439 * Root was mounted, share the good news.
1440 */
1441 static void
1442 root_mount_done(void)
1443 {
1444
1445 /*
1446 * Use a mutex to prevent the wakeup being missed and waiting for
1447 * an extra 1 second sleep.
1448 */
1449 mtx_lock(&mountlist_mtx);
1450 root_mount_complete = 1;
1451 wakeup(&root_mount_complete);
1452 mtx_unlock(&mountlist_mtx);
1453 }
1454
1455 /*
1456 * Return true if root is already mounted.
1457 */
1458 int
1459 root_mounted(void)
1460 {
1461
1462 /* No mutex is acquired here because int stores are atomic. */
1463 return (root_mount_complete);
1464 }
1465
1466 /*
1467 * Wait until root is mounted.
1468 */
1469 void
1470 root_mount_wait(void)
1471 {
1472
1473 /*
1474 * Panic on an obvious deadlock - the function can't be called from
1475 * a thread which is doing the whole SYSINIT stuff.
1476 */
1477 KASSERT(curthread->td_proc->p_pid != 0,
1478 ("root_mount_wait: cannot be called from the swapper thread"));
1479 mtx_lock(&mountlist_mtx);
1480 while (!root_mount_complete) {
1481 msleep(&root_mount_complete, &mountlist_mtx, PZERO, "rootwait",
1482 hz);
1483 }
1484 mtx_unlock(&mountlist_mtx);
1485 }
1486
1487 static void
1488 set_rootvnode(struct thread *td)
1489 {
1490 struct proc *p;
1491
1492 if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode, td))
1493 panic("Cannot find root vnode");
1494
1495 VOP_UNLOCK(rootvnode, 0, td);
1496
1497 p = td->td_proc;
1498 FILEDESC_XLOCK(p->p_fd);
1499
1500 if (p->p_fd->fd_cdir != NULL)
1501 vrele(p->p_fd->fd_cdir);
1502 p->p_fd->fd_cdir = rootvnode;
1503 VREF(rootvnode);
1504
1505 if (p->p_fd->fd_rdir != NULL)
1506 vrele(p->p_fd->fd_rdir);
1507 p->p_fd->fd_rdir = rootvnode;
1508 VREF(rootvnode);
1509
1510 FILEDESC_XUNLOCK(p->p_fd);
1511
1512 EVENTHANDLER_INVOKE(mountroot);
1513 }
1514
1515 /*
1516 * Mount /devfs as our root filesystem, but do not put it on the mountlist
1517 * yet. Create a /dev -> / symlink so that absolute pathnames will lookup.
1518 */
1519
1520 static void
1521 devfs_first(void)
1522 {
1523 struct thread *td = curthread;
1524 struct vfsoptlist *opts;
1525 struct vfsconf *vfsp;
1526 struct mount *mp = NULL;
1527 int error;
1528
1529 vfsp = vfs_byname("devfs");
1530 KASSERT(vfsp != NULL, ("Could not find devfs by name"));
1531 if (vfsp == NULL)
1532 return;
1533
1534 mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td);
1535
1536 error = VFS_MOUNT(mp, td);
1537 KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
1538 if (error)
1539 return;
1540
1541 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
1542 TAILQ_INIT(opts);
1543 mp->mnt_opt = opts;
1544
1545 mtx_lock(&mountlist_mtx);
1546 TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
1547 mtx_unlock(&mountlist_mtx);
1548
1549 set_rootvnode(td);
1550
1551 error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);
1552 if (error)
1553 printf("kern_symlink /dev -> / returns %d\n", error);
1554 }
1555
1556 /*
1557 * Surgically move our devfs to be mounted on /dev.
1558 */
1559
1560 static void
1561 devfs_fixup(struct thread *td)
1562 {
1563 struct nameidata nd;
1564 int error;
1565 struct vnode *vp, *dvp;
1566 struct mount *mp;
1567
1568 /* Remove our devfs mount from the mountlist and purge the cache */
1569 mtx_lock(&mountlist_mtx);
1570 mp = TAILQ_FIRST(&mountlist);
1571 TAILQ_REMOVE(&mountlist, mp, mnt_list);
1572 mtx_unlock(&mountlist_mtx);
1573 cache_purgevfs(mp);
1574
1575 VFS_ROOT(mp, LK_EXCLUSIVE, &dvp, td);
1576 VI_LOCK(dvp);
1577 dvp->v_iflag &= ~VI_MOUNT;
1578 VI_UNLOCK(dvp);
1579 dvp->v_mountedhere = NULL;
1580
1581 /* Set up the real rootvnode, and purge the cache */
1582 TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL;
1583 set_rootvnode(td);
1584 cache_purgevfs(rootvnode->v_mount);
1585
1586 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
1587 error = namei(&nd);
1588 if (error) {
1589 printf("Lookup of /dev for devfs, error: %d\n", error);
1590 return;
1591 }
1592 NDFREE(&nd, NDF_ONLY_PNBUF);
1593 vp = nd.ni_vp;
1594 if (vp->v_type != VDIR) {
1595 vput(vp);
1596 }
1597 error = vinvalbuf(vp, V_SAVE, td, 0, 0);
1598 if (error) {
1599 vput(vp);
1600 }
1601 cache_purge(vp);
1602 mp->mnt_vnodecovered = vp;
1603 vp->v_mountedhere = mp;
1604 mtx_lock(&mountlist_mtx);
1605 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1606 mtx_unlock(&mountlist_mtx);
1607 VOP_UNLOCK(vp, 0, td);
1608 vput(dvp);
1609 vfs_unbusy(mp, td);
1610
1611 /* Unlink the no longer needed /dev/dev -> / symlink */
1612 kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
1613 }
1614
1615 /*
1616 * Report errors during filesystem mounting.
1617 */
1618 void
1619 vfs_mount_error(struct mount *mp, const char *fmt, ...)
1620 {
1621 struct vfsoptlist *moptlist = mp->mnt_optnew;
1622 va_list ap;
1623 int error, len;
1624 char *errmsg;
1625
1626 error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len);
1627 if (error || errmsg == NULL || len <= 0)
1628 return;
1629
1630 va_start(ap, fmt);
1631 vsnprintf(errmsg, (size_t)len, fmt, ap);
1632 va_end(ap);
1633 }
1634
1635 /*
1636 * Find and mount the root filesystem
1637 */
1638 void
1639 vfs_mountroot(void)
1640 {
1641 char *cp;
1642 int error, i, asked = 0;
1643
1644 root_mount_prepare();
1645
1646 mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount),
1647 NULL, NULL, mount_init, mount_fini,
1648 UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1649 devfs_first();
1650
1651 /*
1652 * We are booted with instructions to prompt for the root filesystem.
1653 */
1654 if (boothowto & RB_ASKNAME) {
1655 if (!vfs_mountroot_ask())
1656 goto mounted;
1657 asked = 1;
1658 }
1659
1660 /*
1661 * The root filesystem information is compiled in, and we are
1662 * booted with instructions to use it.
1663 */
1664 if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) {
1665 if (!vfs_mountroot_try(ctrootdevname))
1666 goto mounted;
1667 ctrootdevname = NULL;
1668 }
1669
1670 /*
1671 * We've been given the generic "use CDROM as root" flag. This is
1672 * necessary because one media may be used in many different
1673 * devices, so we need to search for them.
1674 */
1675 if (boothowto & RB_CDROM) {
1676 for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
1677 if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
1678 goto mounted;
1679 }
1680 }
1681
1682 /*
1683 * Try to use the value read by the loader from /etc/fstab, or
1684 * supplied via some other means. This is the preferred
1685 * mechanism.
1686 */
1687 cp = getenv("vfs.root.mountfrom");
1688 if (cp != NULL) {
1689 error = vfs_mountroot_try(cp);
1690 freeenv(cp);
1691 if (!error)
1692 goto mounted;
1693 }
1694
1695 /*
1696 * Try values that may have been computed by code during boot
1697 */
1698 if (!vfs_mountroot_try(rootdevnames[0]))
1699 goto mounted;
1700 if (!vfs_mountroot_try(rootdevnames[1]))
1701 goto mounted;
1702
1703 /*
1704 * If we (still) have a compiled-in default, try it.
1705 */
1706 if (ctrootdevname != NULL)
1707 if (!vfs_mountroot_try(ctrootdevname))
1708 goto mounted;
1709 /*
1710 * Everything so far has failed, prompt on the console if we haven't
1711 * already tried that.
1712 */
1713 if (!asked)
1714 if (!vfs_mountroot_ask())
1715 goto mounted;
1716
1717 panic("Root mount failed, startup aborted.");
1718
1719 mounted:
1720 root_mount_done();
1721 }
1722
1723 /*
1724 * Mount (mountfrom) as the root filesystem.
1725 */
1726 static int
1727 vfs_mountroot_try(const char *mountfrom)
1728 {
1729 struct mount *mp;
1730 char *vfsname, *path;
1731 time_t timebase;
1732 int error;
1733 char patt[32];
1734
1735 vfsname = NULL;
1736 path = NULL;
1737 mp = NULL;
1738 error = EINVAL;
1739
1740 if (mountfrom == NULL)
1741 return (error); /* don't complain */
1742 printf("Trying to mount root from %s\n", mountfrom);
1743
1744 /* parse vfs name and path */
1745 vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
1746 path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
1747 vfsname[0] = path[0] = 0;
1748 sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
1749 if (sscanf(mountfrom, patt, vfsname, path) < 1)
1750 goto out;
1751
1752 if (path[0] == '\0')
1753 strcpy(path, ROOTNAME);
1754
1755 error = kernel_vmount(
1756 MNT_RDONLY | MNT_ROOTFS,
1757 "fstype", vfsname,
1758 "fspath", "/",
1759 "from", path,
1760 NULL);
1761 if (error == 0) {
1762 /*
1763 * We mount devfs prior to mounting the / FS, so the first
1764 * entry will typically be devfs.
1765 */
1766 mp = TAILQ_FIRST(&mountlist);
1767 KASSERT(mp != NULL, ("%s: mountlist is empty", __func__));
1768
1769 /*
1770 * Iterate over all currently mounted file systems and use
1771 * the time stamp found to check and/or initialize the RTC.
1772 * Typically devfs has no time stamp and the only other FS
1773 * is the actual / FS.
1774 * Call inittodr() only once and pass it the largest of the
1775 * timestamps we encounter.
1776 */
1777 timebase = 0;
1778 do {
1779 if (mp->mnt_time > timebase)
1780 timebase = mp->mnt_time;
1781 mp = TAILQ_NEXT(mp, mnt_list);
1782 } while (mp != NULL);
1783 inittodr(timebase);
1784
1785 devfs_fixup(curthread);
1786 }
1787 out:
1788 free(path, M_MOUNT);
1789 free(vfsname, M_MOUNT);
1790 return (error);
1791 }
1792
1793 /*
1794 * ---------------------------------------------------------------------
1795 * Interactive root filesystem selection code.
1796 */
1797
1798 static int
1799 vfs_mountroot_ask(void)
1800 {
1801 char name[128];
1802
1803 for(;;) {
1804 printf("\nManual root filesystem specification:\n");
1805 printf(" <fstype>:<device> Mount <device> using filesystem <fstype>\n");
1806 #if defined(__amd64__) || defined(__i386__) || defined(__ia64__)
1807 printf(" eg. ufs:da0s1a\n");
1808 #else
1809 printf(" eg. ufs:/dev/da0a\n");
1810 #endif
1811 printf(" ? List valid disk boot devices\n");
1812 printf(" <empty line> Abort manual input\n");
1813 printf("\nmountroot> ");
1814 gets(name, sizeof(name), 1);
1815 if (name[0] == '\0')
1816 return (1);
1817 if (name[0] == '?') {
1818 printf("\nList of GEOM managed disk devices:\n ");
1819 g_dev_print();
1820 continue;
1821 }
1822 if (!vfs_mountroot_try(name))
1823 return (0);
1824 }
1825 }
1826
1827 /*
1828 * ---------------------------------------------------------------------
1829 * Functions for querying mount options/arguments from filesystems.
1830 */
1831
1832 /*
1833 * Check that no unknown options are given
1834 */
1835 int
1836 vfs_filteropt(struct vfsoptlist *opts, const char **legal)
1837 {
1838 struct vfsopt *opt;
1839 char errmsg[255];
1840 const char **t, *p, *q;
1841 int ret = 0;
1842
1843 TAILQ_FOREACH(opt, opts, link) {
1844 p = opt->name;
1845 q = NULL;
1846 if (p[0] == 'n' && p[1] == 'o')
1847 q = p + 2;
1848 for(t = global_opts; *t != NULL; t++) {
1849 if (strcmp(*t, p) == 0)
1850 break;
1851 if (q != NULL) {
1852 if (strcmp(*t, q) == 0)
1853 break;
1854 }
1855 }
1856 if (*t != NULL)
1857 continue;
1858 for(t = legal; *t != NULL; t++) {
1859 if (strcmp(*t, p) == 0)
1860 break;
1861 if (q != NULL) {
1862 if (strcmp(*t, q) == 0)
1863 break;
1864 }
1865 }
1866 if (*t != NULL)
1867 continue;
1868 snprintf(errmsg, sizeof(errmsg),
1869 "mount option <%s> is unknown", p);
1870 printf("%s\n", errmsg);
1871 ret = EINVAL;
1872 }
1873 if (ret != 0) {
1874 TAILQ_FOREACH(opt, opts, link) {
1875 if (strcmp(opt->name, "errmsg") == 0) {
1876 strncpy((char *)opt->value, errmsg, opt->len);
1877 }
1878 }
1879 }
1880 return (ret);
1881 }
1882
1883 /*
1884 * Get a mount option by its name.
1885 *
1886 * Return 0 if the option was found, ENOENT otherwise.
1887 * If len is non-NULL it will be filled with the length
1888 * of the option. If buf is non-NULL, it will be filled
1889 * with the address of the option.
1890 */
1891 int
1892 vfs_getopt(opts, name, buf, len)
1893 struct vfsoptlist *opts;
1894 const char *name;
1895 void **buf;
1896 int *len;
1897 {
1898 struct vfsopt *opt;
1899
1900 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
1901
1902 TAILQ_FOREACH(opt, opts, link) {
1903 if (strcmp(name, opt->name) == 0) {
1904 if (len != NULL)
1905 *len = opt->len;
1906 if (buf != NULL)
1907 *buf = opt->value;
1908 return (0);
1909 }
1910 }
1911 return (ENOENT);
1912 }
1913
1914 static int
1915 vfs_getopt_pos(struct vfsoptlist *opts, const char *name)
1916 {
1917 struct vfsopt *opt;
1918 int i;
1919
1920 if (opts == NULL)
1921 return (-1);
1922
1923 i = 0;
1924 TAILQ_FOREACH(opt, opts, link) {
1925 if (strcmp(name, opt->name) == 0)
1926 return (i);
1927 ++i;
1928 }
1929 return (-1);
1930 }
1931
1932 char *
1933 vfs_getopts(struct vfsoptlist *opts, const char *name, int *error)
1934 {
1935 struct vfsopt *opt;
1936
1937 *error = 0;
1938 TAILQ_FOREACH(opt, opts, link) {
1939 if (strcmp(name, opt->name) != 0)
1940 continue;
1941 if (((char *)opt->value)[opt->len - 1] != '\0') {
1942 *error = EINVAL;
1943 return (NULL);
1944 }
1945 return (opt->value);
1946 }
1947 *error = ENOENT;
1948 return (NULL);
1949 }
1950
1951 int
1952 vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val)
1953 {
1954 struct vfsopt *opt;
1955
1956 TAILQ_FOREACH(opt, opts, link) {
1957 if (strcmp(name, opt->name) == 0) {
1958 if (w != NULL)
1959 *w |= val;
1960 return (1);
1961 }
1962 }
1963 if (w != NULL)
1964 *w &= ~val;
1965 return (0);
1966 }
1967
1968 int
1969 vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...)
1970 {
1971 va_list ap;
1972 struct vfsopt *opt;
1973 int ret;
1974
1975 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
1976
1977 TAILQ_FOREACH(opt, opts, link) {
1978 if (strcmp(name, opt->name) != 0)
1979 continue;
1980 if (opt->len == 0 || opt->value == NULL)
1981 return (0);
1982 if (((char *)opt->value)[opt->len - 1] != '\0')
1983 return (0);
1984 va_start(ap, fmt);
1985 ret = vsscanf(opt->value, fmt, ap);
1986 va_end(ap);
1987 return (ret);
1988 }
1989 return (0);
1990 }
1991
1992 /*
1993 * Find and copy a mount option.
1994 *
1995 * The size of the buffer has to be specified
1996 * in len, if it is not the same length as the
1997 * mount option, EINVAL is returned.
1998 * Returns ENOENT if the option is not found.
1999 */
2000 int
2001 vfs_copyopt(opts, name, dest, len)
2002 struct vfsoptlist *opts;
2003 const char *name;
2004 void *dest;
2005 int len;
2006 {
2007 struct vfsopt *opt;
2008
2009 KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
2010
2011 TAILQ_FOREACH(opt, opts, link) {
2012 if (strcmp(name, opt->name) == 0) {
2013 if (len != opt->len)
2014 return (EINVAL);
2015 bcopy(opt->value, dest, opt->len);
2016 return (0);
2017 }
2018 }
2019 return (ENOENT);
2020 }
2021
2022 /*
2023 * This is a helper function for filesystems to traverse their
2024 * vnodes. See MNT_VNODE_FOREACH() in sys/mount.h
2025 */
2026
2027 struct vnode *
2028 __mnt_vnode_next(struct vnode **mvp, struct mount *mp)
2029 {
2030 struct vnode *vp;
2031
2032 mtx_assert(MNT_MTX(mp), MA_OWNED);
2033
2034 KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
2035 if ((*mvp)->v_yield++ == 500) {
2036 MNT_IUNLOCK(mp);
2037 (*mvp)->v_yield = 0;
2038 uio_yield();
2039 MNT_ILOCK(mp);
2040 }
2041 vp = TAILQ_NEXT(*mvp, v_nmntvnodes);
2042 while (vp != NULL && vp->v_type == VMARKER)
2043 vp = TAILQ_NEXT(vp, v_nmntvnodes);
2044
2045 /* Check if we are done */
2046 if (vp == NULL) {
2047 __mnt_vnode_markerfree(mvp, mp);
2048 return (NULL);
2049 }
2050 TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
2051 TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
2052 return (vp);
2053 }
2054
2055 struct vnode *
2056 __mnt_vnode_first(struct vnode **mvp, struct mount *mp)
2057 {
2058 struct vnode *vp;
2059
2060 mtx_assert(MNT_MTX(mp), MA_OWNED);
2061
2062 vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
2063 while (vp != NULL && vp->v_type == VMARKER)
2064 vp = TAILQ_NEXT(vp, v_nmntvnodes);
2065
2066 /* Check if we are done */
2067 if (vp == NULL) {
2068 *mvp = NULL;
2069 return (NULL);
2070 }
2071 mp->mnt_holdcnt++;
2072 MNT_IUNLOCK(mp);
2073 *mvp = (struct vnode *) malloc(sizeof(struct vnode),
2074 M_VNODE_MARKER,
2075 M_WAITOK | M_ZERO);
2076 MNT_ILOCK(mp);
2077 (*mvp)->v_type = VMARKER;
2078
2079 vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
2080 while (vp != NULL && vp->v_type == VMARKER)
2081 vp = TAILQ_NEXT(vp, v_nmntvnodes);
2082
2083 /* Check if we are done */
2084 if (vp == NULL) {
2085 MNT_IUNLOCK(mp);
2086 free(*mvp, M_VNODE_MARKER);
2087 MNT_ILOCK(mp);
2088 *mvp = NULL;
2089 mp->mnt_holdcnt--;
2090 if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0)
2091 wakeup(&mp->mnt_holdcnt);
2092 return (NULL);
2093 }
2094 mp->mnt_markercnt++;
2095 (*mvp)->v_mount = mp;
2096 TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
2097 return (vp);
2098 }
2099
2100
2101 void
2102 __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp)
2103 {
2104
2105 if (*mvp == NULL)
2106 return;
2107
2108 mtx_assert(MNT_MTX(mp), MA_OWNED);
2109
2110 KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
2111 TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
2112 MNT_IUNLOCK(mp);
2113 free(*mvp, M_VNODE_MARKER);
2114 MNT_ILOCK(mp);
2115 *mvp = NULL;
2116
2117 mp->mnt_markercnt--;
2118 mp->mnt_holdcnt--;
2119 if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0)
2120 wakeup(&mp->mnt_holdcnt);
2121 }
2122
2123
2124 int
2125 __vfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
2126 {
2127 int error;
2128
2129 error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat, td);
2130 if (sbp != &mp->mnt_stat)
2131 *sbp = mp->mnt_stat;
2132 return (error);
2133 }
2134
2135 void
2136 vfs_mountedfrom(struct mount *mp, const char *from)
2137 {
2138
2139 bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname);
2140 strlcpy(mp->mnt_stat.f_mntfromname, from,
2141 sizeof mp->mnt_stat.f_mntfromname);
2142 }
2143
2144 /*
2145 * ---------------------------------------------------------------------
2146 * This is the api for building mount args and mounting filesystems from
2147 * inside the kernel.
2148 *
2149 * The API works by accumulation of individual args. First error is
2150 * latched.
2151 *
2152 * XXX: should be documented in new manpage kernel_mount(9)
2153 */
2154
2155 /* A memory allocation which must be freed when we are done */
2156 struct mntaarg {
2157 SLIST_ENTRY(mntaarg) next;
2158 };
2159
2160 /* The header for the mount arguments */
2161 struct mntarg {
2162 struct iovec *v;
2163 int len;
2164 int error;
2165 SLIST_HEAD(, mntaarg) list;
2166 };
2167
2168 /*
2169 * Add a boolean argument.
2170 *
2171 * flag is the boolean value.
2172 * name must start with "no".
2173 */
2174 struct mntarg *
2175 mount_argb(struct mntarg *ma, int flag, const char *name)
2176 {
2177
2178 KASSERT(name[0] == 'n' && name[1] == 'o',
2179 ("mount_argb(...,%s): name must start with 'no'", name));
2180
2181 return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0));
2182 }
2183
2184 /*
2185 * Add an argument printf style
2186 */
2187 struct mntarg *
2188 mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...)
2189 {
2190 va_list ap;
2191 struct mntaarg *maa;
2192 struct sbuf *sb;
2193 int len;
2194
2195 if (ma == NULL) {
2196 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
2197 SLIST_INIT(&ma->list);
2198 }
2199 if (ma->error)
2200 return (ma);
2201
2202 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
2203 M_MOUNT, M_WAITOK);
2204 ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
2205 ma->v[ma->len].iov_len = strlen(name) + 1;
2206 ma->len++;
2207
2208 sb = sbuf_new_auto();
2209 va_start(ap, fmt);
2210 sbuf_vprintf(sb, fmt, ap);
2211 va_end(ap);
2212 sbuf_finish(sb);
2213 len = sbuf_len(sb) + 1;
2214 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
2215 SLIST_INSERT_HEAD(&ma->list, maa, next);
2216 bcopy(sbuf_data(sb), maa + 1, len);
2217 sbuf_delete(sb);
2218
2219 ma->v[ma->len].iov_base = maa + 1;
2220 ma->v[ma->len].iov_len = len;
2221 ma->len++;
2222
2223 return (ma);
2224 }
2225
2226 /*
2227 * Add an argument which is a userland string.
2228 */
2229 struct mntarg *
2230 mount_argsu(struct mntarg *ma, const char *name, const void *val, int len)
2231 {
2232 struct mntaarg *maa;
2233 char *tbuf;
2234
2235 if (val == NULL)
2236 return (ma);
2237 if (ma == NULL) {
2238 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
2239 SLIST_INIT(&ma->list);
2240 }
2241 if (ma->error)
2242 return (ma);
2243 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
2244 SLIST_INSERT_HEAD(&ma->list, maa, next);
2245 tbuf = (void *)(maa + 1);
2246 ma->error = copyinstr(val, tbuf, len, NULL);
2247 return (mount_arg(ma, name, tbuf, -1));
2248 }
2249
2250 /*
2251 * Plain argument.
2252 *
2253 * If length is -1, use printf.
2254 */
2255 struct mntarg *
2256 mount_arg(struct mntarg *ma, const char *name, const void *val, int len)
2257 {
2258
2259 if (ma == NULL) {
2260 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
2261 SLIST_INIT(&ma->list);
2262 }
2263 if (ma->error)
2264 return (ma);
2265
2266 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
2267 M_MOUNT, M_WAITOK);
2268 ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
2269 ma->v[ma->len].iov_len = strlen(name) + 1;
2270 ma->len++;
2271
2272 ma->v[ma->len].iov_base = (void *)(uintptr_t)val;
2273 if (len < 0)
2274 ma->v[ma->len].iov_len = strlen(val) + 1;
2275 else
2276 ma->v[ma->len].iov_len = len;
2277 ma->len++;
2278 return (ma);
2279 }
2280
2281 /*
2282 * Free a mntarg structure
2283 */
2284 static void
2285 free_mntarg(struct mntarg *ma)
2286 {
2287 struct mntaarg *maa;
2288
2289 while (!SLIST_EMPTY(&ma->list)) {
2290 maa = SLIST_FIRST(&ma->list);
2291 SLIST_REMOVE_HEAD(&ma->list, next);
2292 free(maa, M_MOUNT);
2293 }
2294 free(ma->v, M_MOUNT);
2295 free(ma, M_MOUNT);
2296 }
2297
2298 /*
2299 * Mount a filesystem
2300 */
2301 int
2302 kernel_mount(struct mntarg *ma, int flags)
2303 {
2304 struct uio auio;
2305 int error;
2306
2307 KASSERT(ma != NULL, ("kernel_mount NULL ma"));
2308 KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v"));
2309 KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len));
2310
2311 auio.uio_iov = ma->v;
2312 auio.uio_iovcnt = ma->len;
2313 auio.uio_segflg = UIO_SYSSPACE;
2314
2315 error = ma->error;
2316 if (!error)
2317 error = vfs_donmount(curthread, flags, &auio);
2318 free_mntarg(ma);
2319 return (error);
2320 }
2321
2322 /*
2323 * A printflike function to mount a filesystem.
2324 */
2325 int
2326 kernel_vmount(int flags, ...)
2327 {
2328 struct mntarg *ma = NULL;
2329 va_list ap;
2330 const char *cp;
2331 const void *vp;
2332 int error;
2333
2334 va_start(ap, flags);
2335 for (;;) {
2336 cp = va_arg(ap, const char *);
2337 if (cp == NULL)
2338 break;
2339 vp = va_arg(ap, const void *);
2340 ma = mount_arg(ma, cp, vp, -1);
2341 }
2342 va_end(ap);
2343
2344 error = kernel_mount(ma, flags);
2345 return (error);
2346 }
Cache object: 9ae70ff5dd77eb97c6977c9bdb645707
|