FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_mount.c
1 /*-
2 * Copyright (c) 1999-2004 Poul-Henning Kamp
3 * Copyright (c) 1999 Michael Smith
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD: src/sys/kern/vfs_mount.c,v 1.296 2008/11/03 20:00:35 attilio Exp $");
39
40 #include <sys/param.h>
41 #include <sys/conf.h>
42 #include <sys/fcntl.h>
43 #include <sys/jail.h>
44 #include <sys/kernel.h>
45 #include <sys/libkern.h>
46 #include <sys/malloc.h>
47 #include <sys/mount.h>
48 #include <sys/mutex.h>
49 #include <sys/namei.h>
50 #include <sys/priv.h>
51 #include <sys/proc.h>
52 #include <sys/filedesc.h>
53 #include <sys/reboot.h>
54 #include <sys/syscallsubr.h>
55 #include <sys/sysproto.h>
56 #include <sys/sx.h>
57 #include <sys/sysctl.h>
58 #include <sys/sysent.h>
59 #include <sys/systm.h>
60 #include <sys/vnode.h>
61 #include <vm/uma.h>
62
63 #include <geom/geom.h>
64
65 #include <machine/stdarg.h>
66
67 #include <security/audit/audit.h>
68 #include <security/mac/mac_framework.h>
69
70 #include "opt_rootdevname.h"
71 #include "opt_mac.h"
72
73 #define ROOTNAME "root_device"
74 #define VFS_MOUNTARG_SIZE_MAX (1024 * 64)
75
76 static int vfs_domount(struct thread *td, const char *fstype,
77 char *fspath, int fsflags, void *fsdata);
78 static int vfs_mountroot_ask(void);
79 static int vfs_mountroot_try(const char *mountfrom);
80 static void free_mntarg(struct mntarg *ma);
81 static int vfs_getopt_pos(struct vfsoptlist *opts, const char *name);
82
83 static int usermount = 0;
84 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
85 "Unprivileged users may mount and unmount file systems");
86
87 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
88 MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker");
89 static uma_zone_t mount_zone;
90
91 /* List of mounted filesystems. */
92 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
93
94 /* For any iteration/modification of mountlist */
95 struct mtx mountlist_mtx;
96 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF);
97
98 TAILQ_HEAD(vfsoptlist, vfsopt);
99 struct vfsopt {
100 TAILQ_ENTRY(vfsopt) link;
101 char *name;
102 void *value;
103 int len;
104 };
105
106 /*
107 * The vnode of the system's root (/ in the filesystem, without chroot
108 * active.)
109 */
110 struct vnode *rootvnode;
111
112 /*
113 * The root filesystem is detailed in the kernel environment variable
114 * vfs.root.mountfrom, which is expected to be in the general format
115 *
116 * <vfsname>:[<path>]
117 * vfsname := the name of a VFS known to the kernel and capable
118 * of being mounted as root
119 * path := disk device name or other data used by the filesystem
120 * to locate its physical store
121 */
122
123 /*
124 * Global opts, taken by all filesystems
125 */
126 static const char *global_opts[] = {
127 "errmsg",
128 "fstype",
129 "fspath",
130 "ro",
131 "rw",
132 "nosuid",
133 "noexec",
134 NULL
135 };
136
137 /*
138 * The root specifiers we will try if RB_CDROM is specified.
139 */
140 static char *cdrom_rootdevnames[] = {
141 "cd9660:cd0",
142 "cd9660:acd0",
143 NULL
144 };
145
146 /* legacy find-root code */
147 char *rootdevnames[2] = {NULL, NULL};
148 #ifndef ROOTDEVNAME
149 # define ROOTDEVNAME NULL
150 #endif
151 static const char *ctrootdevname = ROOTDEVNAME;
152
153 /*
154 * ---------------------------------------------------------------------
155 * Functions for building and sanitizing the mount options
156 */
157
158 /* Remove one mount option. */
159 static void
160 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
161 {
162
163 TAILQ_REMOVE(opts, opt, link);
164 free(opt->name, M_MOUNT);
165 if (opt->value != NULL)
166 free(opt->value, M_MOUNT);
167 #ifdef INVARIANTS
168 else if (opt->len != 0)
169 panic("%s: mount option with NULL value but length != 0",
170 __func__);
171 #endif
172 free(opt, M_MOUNT);
173 }
174
175 /* Release all resources related to the mount options. */
176 void
177 vfs_freeopts(struct vfsoptlist *opts)
178 {
179 struct vfsopt *opt;
180
181 while (!TAILQ_EMPTY(opts)) {
182 opt = TAILQ_FIRST(opts);
183 vfs_freeopt(opts, opt);
184 }
185 free(opts, M_MOUNT);
186 }
187
188 void
189 vfs_deleteopt(struct vfsoptlist *opts, const char *name)
190 {
191 struct vfsopt *opt, *temp;
192
193 if (opts == NULL)
194 return;
195 TAILQ_FOREACH_SAFE(opt, opts, link, temp) {
196 if (strcmp(opt->name, name) == 0)
197 vfs_freeopt(opts, opt);
198 }
199 }
200
201 /*
202 * Check if options are equal (with or without the "no" prefix).
203 */
204 static int
205 vfs_equalopts(const char *opt1, const char *opt2)
206 {
207
208 /* "opt" vs. "opt" or "noopt" vs. "noopt" */
209 if (strcmp(opt1, opt2) == 0)
210 return (1);
211 /* "noopt" vs. "opt" */
212 if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
213 return (1);
214 /* "opt" vs. "noopt" */
215 if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
216 return (1);
217 return (0);
218 }
219
220 /*
221 * If a mount option is specified several times,
222 * (with or without the "no" prefix) only keep
223 * the last occurence of it.
224 */
225 static void
226 vfs_sanitizeopts(struct vfsoptlist *opts)
227 {
228 struct vfsopt *opt, *opt2, *tmp;
229
230 TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
231 opt2 = TAILQ_PREV(opt, vfsoptlist, link);
232 while (opt2 != NULL) {
233 if (vfs_equalopts(opt->name, opt2->name)) {
234 tmp = TAILQ_PREV(opt2, vfsoptlist, link);
235 vfs_freeopt(opts, opt2);
236 opt2 = tmp;
237 } else {
238 opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
239 }
240 }
241 }
242 }
243
244 /*
245 * Build a linked list of mount options from a struct uio.
246 */
247 static int
248 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
249 {
250 struct vfsoptlist *opts;
251 struct vfsopt *opt;
252 size_t memused;
253 unsigned int i, iovcnt;
254 int error, namelen, optlen;
255
256 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
257 TAILQ_INIT(opts);
258 memused = 0;
259 iovcnt = auio->uio_iovcnt;
260 for (i = 0; i < iovcnt; i += 2) {
261 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
262 namelen = auio->uio_iov[i].iov_len;
263 optlen = auio->uio_iov[i + 1].iov_len;
264 opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
265 opt->value = NULL;
266 opt->len = 0;
267
268 /*
269 * Do this early, so jumps to "bad" will free the current
270 * option.
271 */
272 TAILQ_INSERT_TAIL(opts, opt, link);
273 memused += sizeof(struct vfsopt) + optlen + namelen;
274
275 /*
276 * Avoid consuming too much memory, and attempts to overflow
277 * memused.
278 */
279 if (memused > VFS_MOUNTARG_SIZE_MAX ||
280 optlen > VFS_MOUNTARG_SIZE_MAX ||
281 namelen > VFS_MOUNTARG_SIZE_MAX) {
282 error = EINVAL;
283 goto bad;
284 }
285
286 if (auio->uio_segflg == UIO_SYSSPACE) {
287 bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
288 } else {
289 error = copyin(auio->uio_iov[i].iov_base, opt->name,
290 namelen);
291 if (error)
292 goto bad;
293 }
294 /* Ensure names are null-terminated strings. */
295 if (opt->name[namelen - 1] != '\0') {
296 error = EINVAL;
297 goto bad;
298 }
299 if (optlen != 0) {
300 opt->len = optlen;
301 opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
302 if (auio->uio_segflg == UIO_SYSSPACE) {
303 bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
304 optlen);
305 } else {
306 error = copyin(auio->uio_iov[i + 1].iov_base,
307 opt->value, optlen);
308 if (error)
309 goto bad;
310 }
311 }
312 }
313 vfs_sanitizeopts(opts);
314 *options = opts;
315 return (0);
316 bad:
317 vfs_freeopts(opts);
318 return (error);
319 }
320
321 /*
322 * Merge the old mount options with the new ones passed
323 * in the MNT_UPDATE case.
324 *
325 * XXX This function will keep a "nofoo" option in the
326 * new options if there is no matching "foo" option
327 * to be cancelled in the old options. This is a bug
328 * if the option's canonical name is "foo". E.g., "noro"
329 * shouldn't end up in the mount point's active options,
330 * but it can.
331 */
332 static void
333 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
334 {
335 struct vfsopt *opt, *opt2, *new;
336
337 TAILQ_FOREACH(opt, opts, link) {
338 /*
339 * Check that this option hasn't been redefined
340 * nor cancelled with a "no" mount option.
341 */
342 opt2 = TAILQ_FIRST(toopts);
343 while (opt2 != NULL) {
344 if (strcmp(opt2->name, opt->name) == 0)
345 goto next;
346 if (strncmp(opt2->name, "no", 2) == 0 &&
347 strcmp(opt2->name + 2, opt->name) == 0) {
348 vfs_freeopt(toopts, opt2);
349 goto next;
350 }
351 opt2 = TAILQ_NEXT(opt2, link);
352 }
353 /* We want this option, duplicate it. */
354 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
355 new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
356 strcpy(new->name, opt->name);
357 if (opt->len != 0) {
358 new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
359 bcopy(opt->value, new->value, opt->len);
360 } else {
361 new->value = NULL;
362 }
363 new->len = opt->len;
364 TAILQ_INSERT_TAIL(toopts, new, link);
365 next:
366 continue;
367 }
368 }
369
370 /*
371 * Mount a filesystem.
372 */
373 int
374 nmount(td, uap)
375 struct thread *td;
376 struct nmount_args /* {
377 struct iovec *iovp;
378 unsigned int iovcnt;
379 int flags;
380 } */ *uap;
381 {
382 struct uio *auio;
383 struct iovec *iov;
384 unsigned int i;
385 int error;
386 u_int iovcnt;
387
388 AUDIT_ARG(fflags, uap->flags);
389
390 /*
391 * Filter out MNT_ROOTFS. We do not want clients of nmount() in
392 * userspace to set this flag, but we must filter it out if we want
393 * MNT_UPDATE on the root file system to work.
394 * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
395 */
396 uap->flags &= ~MNT_ROOTFS;
397
398 iovcnt = uap->iovcnt;
399 /*
400 * Check that we have an even number of iovec's
401 * and that we have at least two options.
402 */
403 if ((iovcnt & 1) || (iovcnt < 4))
404 return (EINVAL);
405
406 error = copyinuio(uap->iovp, iovcnt, &auio);
407 if (error)
408 return (error);
409 iov = auio->uio_iov;
410 for (i = 0; i < iovcnt; i++) {
411 if (iov->iov_len > MMAXOPTIONLEN) {
412 free(auio, M_IOV);
413 return (EINVAL);
414 }
415 iov++;
416 }
417 error = vfs_donmount(td, uap->flags, auio);
418
419 free(auio, M_IOV);
420 return (error);
421 }
422
423 /*
424 * ---------------------------------------------------------------------
425 * Various utility functions
426 */
427
428 void
429 vfs_ref(struct mount *mp)
430 {
431
432 MNT_ILOCK(mp);
433 MNT_REF(mp);
434 MNT_IUNLOCK(mp);
435 }
436
437 void
438 vfs_rel(struct mount *mp)
439 {
440
441 MNT_ILOCK(mp);
442 MNT_REL(mp);
443 MNT_IUNLOCK(mp);
444 }
445
446 static int
447 mount_init(void *mem, int size, int flags)
448 {
449 struct mount *mp;
450
451 mp = (struct mount *)mem;
452 mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
453 lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
454 return (0);
455 }
456
457 static void
458 mount_fini(void *mem, int size)
459 {
460 struct mount *mp;
461
462 mp = (struct mount *)mem;
463 lockdestroy(&mp->mnt_explock);
464 mtx_destroy(&mp->mnt_mtx);
465 }
466
467 /*
468 * Allocate and initialize the mount point struct.
469 */
470 struct mount *
471 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath,
472 struct ucred *cred)
473 {
474 struct mount *mp;
475
476 mp = uma_zalloc(mount_zone, M_WAITOK);
477 bzero(&mp->mnt_startzero,
478 __rangeof(struct mount, mnt_startzero, mnt_endzero));
479 TAILQ_INIT(&mp->mnt_nvnodelist);
480 mp->mnt_nvnodelistsize = 0;
481 mp->mnt_ref = 0;
482 (void) vfs_busy(mp, MBF_NOWAIT);
483 mp->mnt_op = vfsp->vfc_vfsops;
484 mp->mnt_vfc = vfsp;
485 vfsp->vfc_refcount++; /* XXX Unlocked */
486 mp->mnt_stat.f_type = vfsp->vfc_typenum;
487 mp->mnt_gen++;
488 strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
489 mp->mnt_vnodecovered = vp;
490 mp->mnt_cred = crdup(cred);
491 mp->mnt_stat.f_owner = cred->cr_uid;
492 strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
493 mp->mnt_iosize_max = DFLTPHYS;
494 #ifdef MAC
495 mac_mount_init(mp);
496 mac_mount_create(cred, mp);
497 #endif
498 arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
499 return (mp);
500 }
501
502 /*
503 * Destroy the mount struct previously allocated by vfs_mount_alloc().
504 */
505 void
506 vfs_mount_destroy(struct mount *mp)
507 {
508
509 MNT_ILOCK(mp);
510 while (mp->mnt_ref)
511 msleep(mp, MNT_MTX(mp), PVFS, "mntref", 0);
512 if (mp->mnt_writeopcount > 0) {
513 printf("Waiting for mount point write ops\n");
514 while (mp->mnt_writeopcount > 0) {
515 mp->mnt_kern_flag |= MNTK_SUSPEND;
516 msleep(&mp->mnt_writeopcount,
517 MNT_MTX(mp),
518 PZERO, "mntdestroy2", 0);
519 }
520 printf("mount point write ops completed\n");
521 }
522 if (mp->mnt_secondary_writes > 0) {
523 printf("Waiting for mount point secondary write ops\n");
524 while (mp->mnt_secondary_writes > 0) {
525 mp->mnt_kern_flag |= MNTK_SUSPEND;
526 msleep(&mp->mnt_secondary_writes,
527 MNT_MTX(mp),
528 PZERO, "mntdestroy3", 0);
529 }
530 printf("mount point secondary write ops completed\n");
531 }
532 MNT_IUNLOCK(mp);
533 mp->mnt_vfc->vfc_refcount--;
534 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
535 struct vnode *vp;
536
537 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes)
538 vprint("", vp);
539 panic("unmount: dangling vnode");
540 }
541 MNT_ILOCK(mp);
542 if (mp->mnt_kern_flag & MNTK_MWAIT)
543 wakeup(mp);
544 if (mp->mnt_writeopcount != 0)
545 panic("vfs_mount_destroy: nonzero writeopcount");
546 if (mp->mnt_secondary_writes != 0)
547 panic("vfs_mount_destroy: nonzero secondary_writes");
548 if (mp->mnt_nvnodelistsize != 0)
549 panic("vfs_mount_destroy: nonzero nvnodelistsize");
550 mp->mnt_writeopcount = -1000;
551 mp->mnt_nvnodelistsize = -1000;
552 mp->mnt_secondary_writes = -1000;
553 MNT_IUNLOCK(mp);
554 #ifdef MAC
555 mac_mount_destroy(mp);
556 #endif
557 if (mp->mnt_opt != NULL)
558 vfs_freeopts(mp->mnt_opt);
559 crfree(mp->mnt_cred);
560 uma_zfree(mount_zone, mp);
561 }
562
563 int
564 vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions)
565 {
566 struct vfsoptlist *optlist;
567 struct vfsopt *opt, *noro_opt, *tmp_opt;
568 char *fstype, *fspath, *errmsg;
569 int error, fstypelen, fspathlen, errmsg_len, errmsg_pos;
570 int has_rw, has_noro;
571
572 errmsg = fspath = NULL;
573 errmsg_len = has_noro = has_rw = fspathlen = 0;
574 errmsg_pos = -1;
575
576 error = vfs_buildopts(fsoptions, &optlist);
577 if (error)
578 return (error);
579
580 if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0)
581 errmsg_pos = vfs_getopt_pos(optlist, "errmsg");
582
583 /*
584 * We need these two options before the others,
585 * and they are mandatory for any filesystem.
586 * Ensure they are NUL terminated as well.
587 */
588 fstypelen = 0;
589 error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
590 if (error || fstype[fstypelen - 1] != '\0') {
591 error = EINVAL;
592 if (errmsg != NULL)
593 strncpy(errmsg, "Invalid fstype", errmsg_len);
594 goto bail;
595 }
596 fspathlen = 0;
597 error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
598 if (error || fspath[fspathlen - 1] != '\0') {
599 error = EINVAL;
600 if (errmsg != NULL)
601 strncpy(errmsg, "Invalid fspath", errmsg_len);
602 goto bail;
603 }
604
605 /*
606 * We need to see if we have the "update" option
607 * before we call vfs_domount(), since vfs_domount() has special
608 * logic based on MNT_UPDATE. This is very important
609 * when we want to update the root filesystem.
610 */
611 TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) {
612 if (strcmp(opt->name, "update") == 0) {
613 fsflags |= MNT_UPDATE;
614 vfs_freeopt(optlist, opt);
615 }
616 else if (strcmp(opt->name, "async") == 0)
617 fsflags |= MNT_ASYNC;
618 else if (strcmp(opt->name, "force") == 0) {
619 fsflags |= MNT_FORCE;
620 vfs_freeopt(optlist, opt);
621 }
622 else if (strcmp(opt->name, "reload") == 0) {
623 fsflags |= MNT_RELOAD;
624 vfs_freeopt(optlist, opt);
625 }
626 else if (strcmp(opt->name, "multilabel") == 0)
627 fsflags |= MNT_MULTILABEL;
628 else if (strcmp(opt->name, "noasync") == 0)
629 fsflags &= ~MNT_ASYNC;
630 else if (strcmp(opt->name, "noatime") == 0)
631 fsflags |= MNT_NOATIME;
632 else if (strcmp(opt->name, "atime") == 0) {
633 free(opt->name, M_MOUNT);
634 opt->name = strdup("nonoatime", M_MOUNT);
635 }
636 else if (strcmp(opt->name, "noclusterr") == 0)
637 fsflags |= MNT_NOCLUSTERR;
638 else if (strcmp(opt->name, "clusterr") == 0) {
639 free(opt->name, M_MOUNT);
640 opt->name = strdup("nonoclusterr", M_MOUNT);
641 }
642 else if (strcmp(opt->name, "noclusterw") == 0)
643 fsflags |= MNT_NOCLUSTERW;
644 else if (strcmp(opt->name, "clusterw") == 0) {
645 free(opt->name, M_MOUNT);
646 opt->name = strdup("nonoclusterw", M_MOUNT);
647 }
648 else if (strcmp(opt->name, "noexec") == 0)
649 fsflags |= MNT_NOEXEC;
650 else if (strcmp(opt->name, "exec") == 0) {
651 free(opt->name, M_MOUNT);
652 opt->name = strdup("nonoexec", M_MOUNT);
653 }
654 else if (strcmp(opt->name, "nosuid") == 0)
655 fsflags |= MNT_NOSUID;
656 else if (strcmp(opt->name, "suid") == 0) {
657 free(opt->name, M_MOUNT);
658 opt->name = strdup("nonosuid", M_MOUNT);
659 }
660 else if (strcmp(opt->name, "nosymfollow") == 0)
661 fsflags |= MNT_NOSYMFOLLOW;
662 else if (strcmp(opt->name, "symfollow") == 0) {
663 free(opt->name, M_MOUNT);
664 opt->name = strdup("nonosymfollow", M_MOUNT);
665 }
666 else if (strcmp(opt->name, "noro") == 0) {
667 fsflags &= ~MNT_RDONLY;
668 has_noro = 1;
669 }
670 else if (strcmp(opt->name, "rw") == 0) {
671 fsflags &= ~MNT_RDONLY;
672 has_rw = 1;
673 }
674 else if (strcmp(opt->name, "ro") == 0)
675 fsflags |= MNT_RDONLY;
676 else if (strcmp(opt->name, "rdonly") == 0) {
677 free(opt->name, M_MOUNT);
678 opt->name = strdup("ro", M_MOUNT);
679 fsflags |= MNT_RDONLY;
680 }
681 else if (strcmp(opt->name, "suiddir") == 0)
682 fsflags |= MNT_SUIDDIR;
683 else if (strcmp(opt->name, "sync") == 0)
684 fsflags |= MNT_SYNCHRONOUS;
685 else if (strcmp(opt->name, "union") == 0)
686 fsflags |= MNT_UNION;
687 }
688
689 /*
690 * If "rw" was specified as a mount option, and we
691 * are trying to update a mount-point from "ro" to "rw",
692 * we need a mount option "noro", since in vfs_mergeopts(),
693 * "noro" will cancel "ro", but "rw" will not do anything.
694 */
695 if (has_rw && !has_noro) {
696 noro_opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
697 noro_opt->name = strdup("noro", M_MOUNT);
698 noro_opt->value = NULL;
699 noro_opt->len = 0;
700 TAILQ_INSERT_TAIL(optlist, noro_opt, link);
701 }
702
703 /*
704 * Be ultra-paranoid about making sure the type and fspath
705 * variables will fit in our mp buffers, including the
706 * terminating NUL.
707 */
708 if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
709 error = ENAMETOOLONG;
710 goto bail;
711 }
712
713 mtx_lock(&Giant);
714 error = vfs_domount(td, fstype, fspath, fsflags, optlist);
715 mtx_unlock(&Giant);
716 bail:
717 /* copyout the errmsg */
718 if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt)
719 && errmsg_len > 0 && errmsg != NULL) {
720 if (fsoptions->uio_segflg == UIO_SYSSPACE) {
721 bcopy(errmsg,
722 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
723 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
724 } else {
725 copyout(errmsg,
726 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
727 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
728 }
729 }
730
731 if (error != 0)
732 vfs_freeopts(optlist);
733 return (error);
734 }
735
736 /*
737 * Old mount API.
738 */
739 #ifndef _SYS_SYSPROTO_H_
740 struct mount_args {
741 char *type;
742 char *path;
743 int flags;
744 caddr_t data;
745 };
746 #endif
747 /* ARGSUSED */
748 int
749 mount(td, uap)
750 struct thread *td;
751 struct mount_args /* {
752 char *type;
753 char *path;
754 int flags;
755 caddr_t data;
756 } */ *uap;
757 {
758 char *fstype;
759 struct vfsconf *vfsp = NULL;
760 struct mntarg *ma = NULL;
761 int error;
762
763 AUDIT_ARG(fflags, uap->flags);
764
765 /*
766 * Filter out MNT_ROOTFS. We do not want clients of mount() in
767 * userspace to set this flag, but we must filter it out if we want
768 * MNT_UPDATE on the root file system to work.
769 * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
770 */
771 uap->flags &= ~MNT_ROOTFS;
772
773 fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
774 error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
775 if (error) {
776 free(fstype, M_TEMP);
777 return (error);
778 }
779
780 AUDIT_ARG(text, fstype);
781 mtx_lock(&Giant);
782 vfsp = vfs_byname_kld(fstype, td, &error);
783 free(fstype, M_TEMP);
784 if (vfsp == NULL) {
785 mtx_unlock(&Giant);
786 return (ENOENT);
787 }
788 if (vfsp->vfc_vfsops->vfs_cmount == NULL) {
789 mtx_unlock(&Giant);
790 return (EOPNOTSUPP);
791 }
792
793 ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN);
794 ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
795 ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro");
796 ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid");
797 ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec");
798
799 error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td);
800 mtx_unlock(&Giant);
801 return (error);
802 }
803
804
805 /*
806 * vfs_domount(): actually attempt a filesystem mount.
807 */
808 static int
809 vfs_domount(
810 struct thread *td, /* Calling thread. */
811 const char *fstype, /* Filesystem type. */
812 char *fspath, /* Mount path. */
813 int fsflags, /* Flags common to all filesystems. */
814 void *fsdata /* Options local to the filesystem. */
815 )
816 {
817 struct vnode *vp;
818 struct mount *mp;
819 struct vfsconf *vfsp;
820 struct oexport_args oexport;
821 struct export_args export;
822 int error, flag = 0;
823 struct vattr va;
824 struct nameidata nd;
825
826 mtx_assert(&Giant, MA_OWNED);
827 /*
828 * Be ultra-paranoid about making sure the type and fspath
829 * variables will fit in our mp buffers, including the
830 * terminating NUL.
831 */
832 if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
833 return (ENAMETOOLONG);
834
835 if (jailed(td->td_ucred) || usermount == 0) {
836 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0)
837 return (error);
838 }
839
840 /*
841 * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
842 */
843 if (fsflags & MNT_EXPORTED) {
844 error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED);
845 if (error)
846 return (error);
847 }
848 if (fsflags & MNT_SUIDDIR) {
849 error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR);
850 if (error)
851 return (error);
852 }
853 /*
854 * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users.
855 */
856 if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) {
857 if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0)
858 fsflags |= MNT_NOSUID | MNT_USER;
859 }
860
861 /* Load KLDs before we lock the covered vnode to avoid reversals. */
862 vfsp = NULL;
863 if ((fsflags & MNT_UPDATE) == 0) {
864 /* Don't try to load KLDs if we're mounting the root. */
865 if (fsflags & MNT_ROOTFS)
866 vfsp = vfs_byname(fstype);
867 else
868 vfsp = vfs_byname_kld(fstype, td, &error);
869 if (vfsp == NULL)
870 return (ENODEV);
871 if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL))
872 return (EPERM);
873 }
874 /*
875 * Get vnode to be covered
876 */
877 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE,
878 fspath, td);
879 if ((error = namei(&nd)) != 0)
880 return (error);
881 NDFREE(&nd, NDF_O |