[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_mount.c

Version: -  FREEBSD  -  FREEBSD7  -  FREEBSD70  -  FREEBSD6  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  OPENSOLARIS  -  minix-3-1-1  -  TRUSTEDBSD-SEBSD  -  FREEBSD-LIBC  -  FREEBSD7-LIBC  -  FREEBSD6-LIBC  -  GLIBC27 
SearchContext: -  none  -  excerpts  -  bigexcerpts 

  1 /*-
  2  * Copyright (c) 1999-2004 Poul-Henning Kamp
  3  * Copyright (c) 1999 Michael Smith
  4  * Copyright (c) 1989, 1993
  5  *      The Regents of the University of California.  All rights reserved.
  6  * (c) UNIX System Laboratories, Inc.
  7  * All or some portions of this file are derived from material licensed
  8  * to the University of California by American Telephone and Telegraph
  9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 10  * the permission of UNIX System Laboratories, Inc.
 11  *
 12  * Redistribution and use in source and binary forms, with or without
 13  * modification, are permitted provided that the following conditions
 14  * are met:
 15  * 1. Redistributions of source code must retain the above copyright
 16  *    notice, this list of conditions and the following disclaimer.
 17  * 2. Redistributions in binary form must reproduce the above copyright
 18  *    notice, this list of conditions and the following disclaimer in the
 19  *    documentation and/or other materials provided with the distribution.
 20  * 4. Neither the name of the University nor the names of its contributors
 21  *    may be used to endorse or promote products derived from this software
 22  *    without specific prior written permission.
 23  *
 24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 34  * SUCH DAMAGE.
 35  */
 36 
 37 #include <sys/cdefs.h>
 38 __FBSDID("$FreeBSD: src/sys/kern/vfs_mount.c,v 1.296 2008/11/03 20:00:35 attilio Exp $");
 39 
 40 #include <sys/param.h>
 41 #include <sys/conf.h>
 42 #include <sys/fcntl.h>
 43 #include <sys/jail.h>
 44 #include <sys/kernel.h>
 45 #include <sys/libkern.h>
 46 #include <sys/malloc.h>
 47 #include <sys/mount.h>
 48 #include <sys/mutex.h>
 49 #include <sys/namei.h>
 50 #include <sys/priv.h>
 51 #include <sys/proc.h>
 52 #include <sys/filedesc.h>
 53 #include <sys/reboot.h>
 54 #include <sys/syscallsubr.h>
 55 #include <sys/sysproto.h>
 56 #include <sys/sx.h>
 57 #include <sys/sysctl.h>
 58 #include <sys/sysent.h>
 59 #include <sys/systm.h>
 60 #include <sys/vnode.h>
 61 #include <vm/uma.h>
 62 
 63 #include <geom/geom.h>
 64 
 65 #include <machine/stdarg.h>
 66 
 67 #include <security/audit/audit.h>
 68 #include <security/mac/mac_framework.h>
 69 
 70 #include "opt_rootdevname.h"
 71 #include "opt_mac.h"
 72 
 73 #define ROOTNAME                "root_device"
 74 #define VFS_MOUNTARG_SIZE_MAX   (1024 * 64)
 75 
 76 static int      vfs_domount(struct thread *td, const char *fstype,
 77                     char *fspath, int fsflags, void *fsdata);
 78 static int      vfs_mountroot_ask(void);
 79 static int      vfs_mountroot_try(const char *mountfrom);
 80 static void     free_mntarg(struct mntarg *ma);
 81 static int      vfs_getopt_pos(struct vfsoptlist *opts, const char *name);
 82 
 83 static int      usermount = 0;
 84 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
 85     "Unprivileged users may mount and unmount file systems");
 86 
 87 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
 88 MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker");
 89 static uma_zone_t mount_zone;
 90 
 91 /* List of mounted filesystems. */
 92 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
 93 
 94 /* For any iteration/modification of mountlist */
 95 struct mtx mountlist_mtx;
 96 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF);
 97 
 98 TAILQ_HEAD(vfsoptlist, vfsopt);
 99 struct vfsopt {
100         TAILQ_ENTRY(vfsopt) link;
101         char    *name;
102         void    *value;
103         int     len;
104 };
105 
106 /*
107  * The vnode of the system's root (/ in the filesystem, without chroot
108  * active.)
109  */
110 struct vnode    *rootvnode;
111 
112 /*
113  * The root filesystem is detailed in the kernel environment variable
114  * vfs.root.mountfrom, which is expected to be in the general format
115  *
116  * <vfsname>:[<path>]
117  * vfsname   := the name of a VFS known to the kernel and capable
118  *              of being mounted as root
119  * path      := disk device name or other data used by the filesystem
120  *              to locate its physical store
121  */
122 
123 /*
124  * Global opts, taken by all filesystems
125  */
126 static const char *global_opts[] = {
127         "errmsg",
128         "fstype",
129         "fspath",
130         "ro",
131         "rw",
132         "nosuid",
133         "noexec",
134         NULL
135 };
136 
137 /*
138  * The root specifiers we will try if RB_CDROM is specified.
139  */
140 static char *cdrom_rootdevnames[] = {
141         "cd9660:cd0",
142         "cd9660:acd0",
143         NULL
144 };
145 
146 /* legacy find-root code */
147 char            *rootdevnames[2] = {NULL, NULL};
148 #ifndef ROOTDEVNAME
149 #  define ROOTDEVNAME NULL
150 #endif
151 static const char       *ctrootdevname = ROOTDEVNAME;
152 
153 /*
154  * ---------------------------------------------------------------------
155  * Functions for building and sanitizing the mount options
156  */
157 
158 /* Remove one mount option. */
159 static void
160 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
161 {
162 
163         TAILQ_REMOVE(opts, opt, link);
164         free(opt->name, M_MOUNT);
165         if (opt->value != NULL)
166                 free(opt->value, M_MOUNT);
167 #ifdef INVARIANTS
168         else if (opt->len != 0)
169                 panic("%s: mount option with NULL value but length != 0",
170                     __func__);
171 #endif
172         free(opt, M_MOUNT);
173 }
174 
175 /* Release all resources related to the mount options. */
176 void
177 vfs_freeopts(struct vfsoptlist *opts)
178 {
179         struct vfsopt *opt;
180 
181         while (!TAILQ_EMPTY(opts)) {
182                 opt = TAILQ_FIRST(opts);
183                 vfs_freeopt(opts, opt);
184         }
185         free(opts, M_MOUNT);
186 }
187 
188 void
189 vfs_deleteopt(struct vfsoptlist *opts, const char *name)
190 {
191         struct vfsopt *opt, *temp;
192 
193         if (opts == NULL)
194                 return;
195         TAILQ_FOREACH_SAFE(opt, opts, link, temp)  {
196                 if (strcmp(opt->name, name) == 0)
197                         vfs_freeopt(opts, opt);
198         }
199 }
200 
201 /*
202  * Check if options are equal (with or without the "no" prefix).
203  */
204 static int
205 vfs_equalopts(const char *opt1, const char *opt2)
206 {
207 
208         /* "opt" vs. "opt" or "noopt" vs. "noopt" */
209         if (strcmp(opt1, opt2) == 0)
210                 return (1);
211         /* "noopt" vs. "opt" */
212         if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
213                 return (1);
214         /* "opt" vs. "noopt" */
215         if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
216                 return (1);
217         return (0);
218 }
219 
220 /*
221  * If a mount option is specified several times,
222  * (with or without the "no" prefix) only keep
223  * the last occurence of it.
224  */
225 static void
226 vfs_sanitizeopts(struct vfsoptlist *opts)
227 {
228         struct vfsopt *opt, *opt2, *tmp;
229 
230         TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
231                 opt2 = TAILQ_PREV(opt, vfsoptlist, link);
232                 while (opt2 != NULL) {
233                         if (vfs_equalopts(opt->name, opt2->name)) {
234                                 tmp = TAILQ_PREV(opt2, vfsoptlist, link);
235                                 vfs_freeopt(opts, opt2);
236                                 opt2 = tmp;
237                         } else {
238                                 opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
239                         }
240                 }
241         }
242 }
243 
244 /*
245  * Build a linked list of mount options from a struct uio.
246  */
247 static int
248 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
249 {
250         struct vfsoptlist *opts;
251         struct vfsopt *opt;
252         size_t memused;
253         unsigned int i, iovcnt;
254         int error, namelen, optlen;
255 
256         opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
257         TAILQ_INIT(opts);
258         memused = 0;
259         iovcnt = auio->uio_iovcnt;
260         for (i = 0; i < iovcnt; i += 2) {
261                 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
262                 namelen = auio->uio_iov[i].iov_len;
263                 optlen = auio->uio_iov[i + 1].iov_len;
264                 opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
265                 opt->value = NULL;
266                 opt->len = 0;
267 
268                 /*
269                  * Do this early, so jumps to "bad" will free the current
270                  * option.
271                  */
272                 TAILQ_INSERT_TAIL(opts, opt, link);
273                 memused += sizeof(struct vfsopt) + optlen + namelen;
274 
275                 /*
276                  * Avoid consuming too much memory, and attempts to overflow
277                  * memused.
278                  */
279                 if (memused > VFS_MOUNTARG_SIZE_MAX ||
280                     optlen > VFS_MOUNTARG_SIZE_MAX ||
281                     namelen > VFS_MOUNTARG_SIZE_MAX) {
282                         error = EINVAL;
283                         goto bad;
284                 }
285 
286                 if (auio->uio_segflg == UIO_SYSSPACE) {
287                         bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
288                 } else {
289                         error = copyin(auio->uio_iov[i].iov_base, opt->name,
290                             namelen);
291                         if (error)
292                                 goto bad;
293                 }
294                 /* Ensure names are null-terminated strings. */
295                 if (opt->name[namelen - 1] != '\0') {
296                         error = EINVAL;
297                         goto bad;
298                 }
299                 if (optlen != 0) {
300                         opt->len = optlen;
301                         opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
302                         if (auio->uio_segflg == UIO_SYSSPACE) {
303                                 bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
304                                     optlen);
305                         } else {
306                                 error = copyin(auio->uio_iov[i + 1].iov_base,
307                                     opt->value, optlen);
308                                 if (error)
309                                         goto bad;
310                         }
311                 }
312         }
313         vfs_sanitizeopts(opts);
314         *options = opts;
315         return (0);
316 bad:
317         vfs_freeopts(opts);
318         return (error);
319 }
320 
321 /*
322  * Merge the old mount options with the new ones passed
323  * in the MNT_UPDATE case.
324  *
325  * XXX This function will keep a "nofoo" option in the
326  *     new options if there is no matching "foo" option
327  *     to be cancelled in the old options.  This is a bug
328  *     if the option's canonical name is "foo".  E.g., "noro"
329  *     shouldn't end up in the mount point's active options,
330  *     but it can.
331  */
332 static void
333 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
334 {
335         struct vfsopt *opt, *opt2, *new;
336 
337         TAILQ_FOREACH(opt, opts, link) {
338                 /*
339                  * Check that this option hasn't been redefined
340                  * nor cancelled with a "no" mount option.
341                  */
342                 opt2 = TAILQ_FIRST(toopts);
343                 while (opt2 != NULL) {
344                         if (strcmp(opt2->name, opt->name) == 0)
345                                 goto next;
346                         if (strncmp(opt2->name, "no", 2) == 0 &&
347                             strcmp(opt2->name + 2, opt->name) == 0) {
348                                 vfs_freeopt(toopts, opt2);
349                                 goto next;
350                         }
351                         opt2 = TAILQ_NEXT(opt2, link);
352                 }
353                 /* We want this option, duplicate it. */
354                 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
355                 new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
356                 strcpy(new->name, opt->name);
357                 if (opt->len != 0) {
358                         new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
359                         bcopy(opt->value, new->value, opt->len);
360                 } else {
361                         new->value = NULL;
362                 }
363                 new->len = opt->len;
364                 TAILQ_INSERT_TAIL(toopts, new, link);
365 next:
366                 continue;
367         }
368 }
369 
370 /*
371  * Mount a filesystem.
372  */
373 int
374 nmount(td, uap)
375         struct thread *td;
376         struct nmount_args /* {
377                 struct iovec *iovp;
378                 unsigned int iovcnt;
379                 int flags;
380         } */ *uap;
381 {
382         struct uio *auio;
383         struct iovec *iov;
384         unsigned int i;
385         int error;
386         u_int iovcnt;
387 
388         AUDIT_ARG(fflags, uap->flags);
389 
390         /*
391          * Filter out MNT_ROOTFS.  We do not want clients of nmount() in
392          * userspace to set this flag, but we must filter it out if we want
393          * MNT_UPDATE on the root file system to work.
394          * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
395          */
396         uap->flags &= ~MNT_ROOTFS;
397 
398         iovcnt = uap->iovcnt;
399         /*
400          * Check that we have an even number of iovec's
401          * and that we have at least two options.
402          */
403         if ((iovcnt & 1) || (iovcnt < 4))
404                 return (EINVAL);
405 
406         error = copyinuio(uap->iovp, iovcnt, &auio);
407         if (error)
408                 return (error);
409         iov = auio->uio_iov;
410         for (i = 0; i < iovcnt; i++) {
411                 if (iov->iov_len > MMAXOPTIONLEN) {
412                         free(auio, M_IOV);
413                         return (EINVAL);
414                 }
415                 iov++;
416         }
417         error = vfs_donmount(td, uap->flags, auio);
418 
419         free(auio, M_IOV);
420         return (error);
421 }
422 
423 /*
424  * ---------------------------------------------------------------------
425  * Various utility functions
426  */
427 
428 void
429 vfs_ref(struct mount *mp)
430 {
431 
432         MNT_ILOCK(mp);
433         MNT_REF(mp);
434         MNT_IUNLOCK(mp);
435 }
436 
437 void
438 vfs_rel(struct mount *mp)
439 {
440 
441         MNT_ILOCK(mp);
442         MNT_REL(mp);
443         MNT_IUNLOCK(mp);
444 }
445 
446 static int
447 mount_init(void *mem, int size, int flags)
448 {
449         struct mount *mp;
450 
451         mp = (struct mount *)mem;
452         mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
453         lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
454         return (0);
455 }
456 
457 static void
458 mount_fini(void *mem, int size)
459 {
460         struct mount *mp;
461 
462         mp = (struct mount *)mem;
463         lockdestroy(&mp->mnt_explock);
464         mtx_destroy(&mp->mnt_mtx);
465 }
466 
467 /*
468  * Allocate and initialize the mount point struct.
469  */
470 struct mount *
471 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath,
472     struct ucred *cred)
473 {
474         struct mount *mp;
475 
476         mp = uma_zalloc(mount_zone, M_WAITOK);
477         bzero(&mp->mnt_startzero,
478             __rangeof(struct mount, mnt_startzero, mnt_endzero));
479         TAILQ_INIT(&mp->mnt_nvnodelist);
480         mp->mnt_nvnodelistsize = 0;
481         mp->mnt_ref = 0;
482         (void) vfs_busy(mp, MBF_NOWAIT);
483         mp->mnt_op = vfsp->vfc_vfsops;
484         mp->mnt_vfc = vfsp;
485         vfsp->vfc_refcount++;   /* XXX Unlocked */
486         mp->mnt_stat.f_type = vfsp->vfc_typenum;
487         mp->mnt_gen++;
488         strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
489         mp->mnt_vnodecovered = vp;
490         mp->mnt_cred = crdup(cred);
491         mp->mnt_stat.f_owner = cred->cr_uid;
492         strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
493         mp->mnt_iosize_max = DFLTPHYS;
494 #ifdef MAC
495         mac_mount_init(mp);
496         mac_mount_create(cred, mp);
497 #endif
498         arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
499         return (mp);
500 }
501 
502 /*
503  * Destroy the mount struct previously allocated by vfs_mount_alloc().
504  */
505 void
506 vfs_mount_destroy(struct mount *mp)
507 {
508 
509         MNT_ILOCK(mp);
510         while (mp->mnt_ref)
511                 msleep(mp, MNT_MTX(mp), PVFS, "mntref", 0);
512         if (mp->mnt_writeopcount > 0) {
513                 printf("Waiting for mount point write ops\n");
514                 while (mp->mnt_writeopcount > 0) {
515                         mp->mnt_kern_flag |= MNTK_SUSPEND;
516                         msleep(&mp->mnt_writeopcount,
517                                MNT_MTX(mp),
518                                PZERO, "mntdestroy2", 0);
519                 }
520                 printf("mount point write ops completed\n");
521         }
522         if (mp->mnt_secondary_writes > 0) {
523                 printf("Waiting for mount point secondary write ops\n");
524                 while (mp->mnt_secondary_writes > 0) {
525                         mp->mnt_kern_flag |= MNTK_SUSPEND;
526                         msleep(&mp->mnt_secondary_writes,
527                                MNT_MTX(mp),
528                                PZERO, "mntdestroy3", 0);
529                 }
530                 printf("mount point secondary write ops completed\n");
531         }
532         MNT_IUNLOCK(mp);
533         mp->mnt_vfc->vfc_refcount--;
534         if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
535                 struct vnode *vp;
536 
537                 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes)
538                         vprint("", vp);
539                 panic("unmount: dangling vnode");
540         }
541         MNT_ILOCK(mp);
542         if (mp->mnt_kern_flag & MNTK_MWAIT)
543                 wakeup(mp);
544         if (mp->mnt_writeopcount != 0)
545                 panic("vfs_mount_destroy: nonzero writeopcount");
546         if (mp->mnt_secondary_writes != 0)
547                 panic("vfs_mount_destroy: nonzero secondary_writes");
548         if (mp->mnt_nvnodelistsize != 0)
549                 panic("vfs_mount_destroy: nonzero nvnodelistsize");
550         mp->mnt_writeopcount = -1000;
551         mp->mnt_nvnodelistsize = -1000;
552         mp->mnt_secondary_writes = -1000;
553         MNT_IUNLOCK(mp);
554 #ifdef MAC
555         mac_mount_destroy(mp);
556 #endif
557         if (mp->mnt_opt != NULL)
558                 vfs_freeopts(mp->mnt_opt);
559         crfree(mp->mnt_cred);
560         uma_zfree(mount_zone, mp);
561 }
562 
563 int
564 vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions)
565 {
566         struct vfsoptlist *optlist;
567         struct vfsopt *opt, *noro_opt, *tmp_opt;
568         char *fstype, *fspath, *errmsg;
569         int error, fstypelen, fspathlen, errmsg_len, errmsg_pos;
570         int has_rw, has_noro;
571 
572         errmsg = fspath = NULL;
573         errmsg_len = has_noro = has_rw = fspathlen = 0;
574         errmsg_pos = -1;
575 
576         error = vfs_buildopts(fsoptions, &optlist);
577         if (error)
578                 return (error);
579 
580         if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0)
581                 errmsg_pos = vfs_getopt_pos(optlist, "errmsg");
582 
583         /*
584          * We need these two options before the others,
585          * and they are mandatory for any filesystem.
586          * Ensure they are NUL terminated as well.
587          */
588         fstypelen = 0;
589         error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
590         if (error || fstype[fstypelen - 1] != '\0') {
591                 error = EINVAL;
592                 if (errmsg != NULL)
593                         strncpy(errmsg, "Invalid fstype", errmsg_len);
594                 goto bail;
595         }
596         fspathlen = 0;
597         error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
598         if (error || fspath[fspathlen - 1] != '\0') {
599                 error = EINVAL;
600                 if (errmsg != NULL)
601                         strncpy(errmsg, "Invalid fspath", errmsg_len);
602                 goto bail;
603         }
604 
605         /*
606          * We need to see if we have the "update" option
607          * before we call vfs_domount(), since vfs_domount() has special
608          * logic based on MNT_UPDATE.  This is very important
609          * when we want to update the root filesystem.
610          */
611         TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) {
612                 if (strcmp(opt->name, "update") == 0) {
613                         fsflags |= MNT_UPDATE;
614                         vfs_freeopt(optlist, opt);
615                 }
616                 else if (strcmp(opt->name, "async") == 0)
617                         fsflags |= MNT_ASYNC;
618                 else if (strcmp(opt->name, "force") == 0) {
619                         fsflags |= MNT_FORCE;
620                         vfs_freeopt(optlist, opt);
621                 }
622                 else if (strcmp(opt->name, "reload") == 0) {
623                         fsflags |= MNT_RELOAD;
624                         vfs_freeopt(optlist, opt);
625                 }
626                 else if (strcmp(opt->name, "multilabel") == 0)
627                         fsflags |= MNT_MULTILABEL;
628                 else if (strcmp(opt->name, "noasync") == 0)
629                         fsflags &= ~MNT_ASYNC;
630                 else if (strcmp(opt->name, "noatime") == 0)
631                         fsflags |= MNT_NOATIME;
632                 else if (strcmp(opt->name, "atime") == 0) {
633                         free(opt->name, M_MOUNT);
634                         opt->name = strdup("nonoatime", M_MOUNT);
635                 }
636                 else if (strcmp(opt->name, "noclusterr") == 0)
637                         fsflags |= MNT_NOCLUSTERR;
638                 else if (strcmp(opt->name, "clusterr") == 0) {
639                         free(opt->name, M_MOUNT);
640                         opt->name = strdup("nonoclusterr", M_MOUNT);
641                 }
642                 else if (strcmp(opt->name, "noclusterw") == 0)
643                         fsflags |= MNT_NOCLUSTERW;
644                 else if (strcmp(opt->name, "clusterw") == 0) {
645                         free(opt->name, M_MOUNT);
646                         opt->name = strdup("nonoclusterw", M_MOUNT);
647                 }
648                 else if (strcmp(opt->name, "noexec") == 0)
649                         fsflags |= MNT_NOEXEC;
650                 else if (strcmp(opt->name, "exec") == 0) {
651                         free(opt->name, M_MOUNT);
652                         opt->name = strdup("nonoexec", M_MOUNT);
653                 }
654                 else if (strcmp(opt->name, "nosuid") == 0)
655                         fsflags |= MNT_NOSUID;
656                 else if (strcmp(opt->name, "suid") == 0) {
657                         free(opt->name, M_MOUNT);
658                         opt->name = strdup("nonosuid", M_MOUNT);
659                 }
660                 else if (strcmp(opt->name, "nosymfollow") == 0)
661                         fsflags |= MNT_NOSYMFOLLOW;
662                 else if (strcmp(opt->name, "symfollow") == 0) {
663                         free(opt->name, M_MOUNT);
664                         opt->name = strdup("nonosymfollow", M_MOUNT);
665                 }
666                 else if (strcmp(opt->name, "noro") == 0) {
667                         fsflags &= ~MNT_RDONLY;
668                         has_noro = 1;
669                 }
670                 else if (strcmp(opt->name, "rw") == 0) {
671                         fsflags &= ~MNT_RDONLY;
672                         has_rw = 1;
673                 }
674                 else if (strcmp(opt->name, "ro") == 0)
675                         fsflags |= MNT_RDONLY;
676                 else if (strcmp(opt->name, "rdonly") == 0) {
677                         free(opt->name, M_MOUNT);
678                         opt->name = strdup("ro", M_MOUNT);
679                         fsflags |= MNT_RDONLY;
680                 }
681                 else if (strcmp(opt->name, "suiddir") == 0)
682                         fsflags |= MNT_SUIDDIR;
683                 else if (strcmp(opt->name, "sync") == 0)
684                         fsflags |= MNT_SYNCHRONOUS;
685                 else if (strcmp(opt->name, "union") == 0)
686                         fsflags |= MNT_UNION;
687         }
688 
689         /*
690          * If "rw" was specified as a mount option, and we
691          * are trying to update a mount-point from "ro" to "rw",
692          * we need a mount option "noro", since in vfs_mergeopts(),
693          * "noro" will cancel "ro", but "rw" will not do anything.
694          */
695         if (has_rw && !has_noro) {
696                 noro_opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
697                 noro_opt->name = strdup("noro", M_MOUNT);
698                 noro_opt->value = NULL;
699                 noro_opt->len = 0;
700                 TAILQ_INSERT_TAIL(optlist, noro_opt, link);
701         }
702 
703         /*
704          * Be ultra-paranoid about making sure the type and fspath
705          * variables will fit in our mp buffers, including the
706          * terminating NUL.
707          */
708         if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
709                 error = ENAMETOOLONG;
710                 goto bail;
711         }
712 
713         mtx_lock(&Giant);
714         error = vfs_domount(td, fstype, fspath, fsflags, optlist);
715         mtx_unlock(&Giant);
716 bail:
717         /* copyout the errmsg */
718         if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt)
719             && errmsg_len > 0 && errmsg != NULL) {
720                 if (fsoptions->uio_segflg == UIO_SYSSPACE) {
721                         bcopy(errmsg,
722                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
723                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
724                 } else {
725                         copyout(errmsg,
726                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
727                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
728                 }
729         }
730 
731         if (error != 0)
732                 vfs_freeopts(optlist);
733         return (error);
734 }
735 
736 /*
737  * Old mount API.
738  */
739 #ifndef _SYS_SYSPROTO_H_
740 struct mount_args {
741         char    *type;
742         char    *path;
743         int     flags;
744         caddr_t data;
745 };
746 #endif
747 /* ARGSUSED */
748 int
749 mount(td, uap)
750         struct thread *td;
751         struct mount_args /* {
752                 char *type;
753                 char *path;
754                 int flags;
755                 caddr_t data;
756         } */ *uap;
757 {
758         char *fstype;
759         struct vfsconf *vfsp = NULL;
760         struct mntarg *ma = NULL;
761         int error;
762 
763         AUDIT_ARG(fflags, uap->flags);
764 
765         /*
766          * Filter out MNT_ROOTFS.  We do not want clients of mount() in
767          * userspace to set this flag, but we must filter it out if we want
768          * MNT_UPDATE on the root file system to work.
769          * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
770          */
771         uap->flags &= ~MNT_ROOTFS;
772 
773         fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
774         error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
775         if (error) {
776                 free(fstype, M_TEMP);
777                 return (error);
778         }
779 
780         AUDIT_ARG(text, fstype);
781         mtx_lock(&Giant);
782         vfsp = vfs_byname_kld(fstype, td, &error);
783         free(fstype, M_TEMP);
784         if (vfsp == NULL) {
785                 mtx_unlock(&Giant);
786                 return (ENOENT);
787         }
788         if (vfsp->vfc_vfsops->vfs_cmount == NULL) {
789                 mtx_unlock(&Giant);
790                 return (EOPNOTSUPP);
791         }
792 
793         ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN);
794         ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
795         ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro");
796         ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid");
797         ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec");
798 
799         error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td);
800         mtx_unlock(&Giant);
801         return (error);
802 }
803 
804 
805 /*
806  * vfs_domount(): actually attempt a filesystem mount.
807  */
808 static int
809 vfs_domount(
810         struct thread *td,      /* Calling thread. */
811         const char *fstype,     /* Filesystem type. */
812         char *fspath,           /* Mount path. */
813         int fsflags,            /* Flags common to all filesystems. */
814         void *fsdata            /* Options local to the filesystem. */
815         )
816 {
817         struct vnode *vp;
818         struct mount *mp;
819         struct vfsconf *vfsp;
820         struct oexport_args oexport;
821         struct export_args export;
822         int error, flag = 0;
823         struct vattr va;
824         struct nameidata nd;
825 
826         mtx_assert(&Giant, MA_OWNED);
827         /*
828          * Be ultra-paranoid about making sure the type and fspath
829          * variables will fit in our mp buffers, including the
830          * terminating NUL.
831          */
832         if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
833                 return (ENAMETOOLONG);
834 
835         if (jailed(td->td_ucred) || usermount == 0) {
836                 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0)
837                         return (error);
838         }
839 
840         /*
841          * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
842          */
843         if (fsflags & MNT_EXPORTED) {
844                 error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED);
845                 if (error)
846                         return (error);
847         }
848         if (fsflags & MNT_SUIDDIR) {
849                 error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR);
850                 if (error)
851                         return (error);
852         }
853         /*
854          * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users.
855          */
856         if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) {
857                 if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0)
858                         fsflags |= MNT_NOSUID | MNT_USER;
859         }
860 
861         /* Load KLDs before we lock the covered vnode to avoid reversals. */
862         vfsp = NULL;
863         if ((fsflags & MNT_UPDATE) == 0) {
864                 /* Don't try to load KLDs if we're mounting the root. */
865                 if (fsflags & MNT_ROOTFS)
866                         vfsp = vfs_byname(fstype);
867                 else
868                         vfsp = vfs_byname_kld(fstype, td, &error);
869                 if (vfsp == NULL)
870                         return (ENODEV);
871                 if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL))
872                         return (EPERM);
873         }
874         /*
875          * Get vnode to be covered
876          */
877         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE,
878             fspath, td);
879         if ((error = namei(&nd)) != 0)
880                 return (error);
881         NDFREE(&nd, NDF_O