The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_mount.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1999-2004 Poul-Henning Kamp
    3  * Copyright (c) 1999 Michael Smith
    4  * Copyright (c) 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD$");
   39 
   40 #include <sys/param.h>
   41 #include <sys/conf.h>
   42 #include <sys/clock.h>
   43 #include <sys/jail.h>
   44 #include <sys/kernel.h>
   45 #include <sys/libkern.h>
   46 #include <sys/malloc.h>
   47 #include <sys/mount.h>
   48 #include <sys/mutex.h>
   49 #include <sys/namei.h>
   50 #include <sys/priv.h>
   51 #include <sys/proc.h>
   52 #include <sys/filedesc.h>
   53 #include <sys/reboot.h>
   54 #include <sys/syscallsubr.h>
   55 #include <sys/sysproto.h>
   56 #include <sys/sx.h>
   57 #include <sys/sysctl.h>
   58 #include <sys/sysent.h>
   59 #include <sys/systm.h>
   60 #include <sys/vnode.h>
   61 #include <vm/uma.h>
   62 
   63 #include <geom/geom.h>
   64 
   65 #include <machine/stdarg.h>
   66 
   67 #include <security/audit/audit.h>
   68 #include <security/mac/mac_framework.h>
   69 
   70 #include "opt_rootdevname.h"
   71 #include "opt_ddb.h"
   72 #include "opt_mac.h"
   73 
   74 #ifdef DDB
   75 #include <ddb/ddb.h>
   76 #endif
   77 
   78 #define ROOTNAME                "root_device"
   79 #define VFS_MOUNTARG_SIZE_MAX   (1024 * 64)
   80 
   81 static int      vfs_domount(struct thread *td, const char *fstype,
   82                     char *fspath, int fsflags, void *fsdata);
   83 static int      vfs_mountroot_ask(void);
   84 static int      vfs_mountroot_try(const char *mountfrom);
   85 static int      vfs_donmount(struct thread *td, int fsflags,
   86                     struct uio *fsoptions);
   87 static void     free_mntarg(struct mntarg *ma);
   88 static int      vfs_getopt_pos(struct vfsoptlist *opts, const char *name);
   89 
   90 static int      usermount = 0;
   91 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
   92     "Unprivileged users may mount and unmount file systems");
   93 
   94 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
   95 MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker");
   96 static uma_zone_t mount_zone;
   97 
   98 /* List of mounted filesystems. */
   99 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
  100 
  101 /* For any iteration/modification of mountlist */
  102 struct mtx mountlist_mtx;
  103 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF);
  104 
  105 TAILQ_HEAD(vfsoptlist, vfsopt);
  106 struct vfsopt {
  107         TAILQ_ENTRY(vfsopt) link;
  108         char    *name;
  109         void    *value;
  110         int     len;
  111 };
  112 
  113 /*
  114  * The vnode of the system's root (/ in the filesystem, without chroot
  115  * active.)
  116  */
  117 struct vnode    *rootvnode;
  118 
  119 /*
  120  * The root filesystem is detailed in the kernel environment variable
  121  * vfs.root.mountfrom, which is expected to be in the general format
  122  *
  123  * <vfsname>:[<path>]
  124  * vfsname   := the name of a VFS known to the kernel and capable
  125  *              of being mounted as root
  126  * path      := disk device name or other data used by the filesystem
  127  *              to locate its physical store
  128  */
  129 
  130 /*
  131  * Global opts, taken by all filesystems
  132  */
  133 static const char *global_opts[] = {
  134         "errmsg",
  135         "fstype",
  136         "fspath",
  137         "ro",
  138         "rw",
  139         "nosuid",
  140         "noexec",
  141         "update",
  142         NULL
  143 };
  144 
  145 /*
  146  * The root specifiers we will try if RB_CDROM is specified.
  147  */
  148 static char *cdrom_rootdevnames[] = {
  149         "cd9660:cd0",
  150         "cd9660:acd0",
  151         NULL
  152 };
  153 
  154 /* legacy find-root code */
  155 char            *rootdevnames[2] = {NULL, NULL};
  156 #ifndef ROOTDEVNAME
  157 #  define ROOTDEVNAME NULL
  158 #endif
  159 static const char       *ctrootdevname = ROOTDEVNAME;
  160 
  161 /*
  162  * ---------------------------------------------------------------------
  163  * Functions for building and sanitizing the mount options
  164  */
  165 
  166 /* Remove one mount option. */
  167 static void
  168 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
  169 {
  170 
  171         TAILQ_REMOVE(opts, opt, link);
  172         free(opt->name, M_MOUNT);
  173         if (opt->value != NULL)
  174                 free(opt->value, M_MOUNT);
  175 #ifdef INVARIANTS
  176         else if (opt->len != 0)
  177                 panic("%s: mount option with NULL value but length != 0",
  178                     __func__);
  179 #endif
  180         free(opt, M_MOUNT);
  181 }
  182 
  183 /* Release all resources related to the mount options. */
  184 void
  185 vfs_freeopts(struct vfsoptlist *opts)
  186 {
  187         struct vfsopt *opt;
  188 
  189         while (!TAILQ_EMPTY(opts)) {
  190                 opt = TAILQ_FIRST(opts);
  191                 vfs_freeopt(opts, opt);
  192         }
  193         free(opts, M_MOUNT);
  194 }
  195 
  196 void
  197 vfs_deleteopt(struct vfsoptlist *opts, const char *name)
  198 {
  199         struct vfsopt *opt, *temp;
  200 
  201         TAILQ_FOREACH_SAFE(opt, opts, link, temp)  {
  202                 if (strcmp(opt->name, name) == 0)
  203                         vfs_freeopt(opts, opt);
  204         }
  205 }
  206 
  207 /*
  208  * Check if options are equal (with or without the "no" prefix).
  209  */
  210 static int
  211 vfs_equalopts(const char *opt1, const char *opt2)
  212 {
  213 
  214         /* "opt" vs. "opt" or "noopt" vs. "noopt" */
  215         if (strcmp(opt1, opt2) == 0)
  216                 return (1);
  217         /* "noopt" vs. "opt" */
  218         if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
  219                 return (1);
  220         /* "opt" vs. "noopt" */
  221         if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
  222                 return (1);
  223         return (0);
  224 }
  225 
  226 /*
  227  * If a mount option is specified several times,
  228  * (with or without the "no" prefix) only keep
  229  * the last occurence of it.
  230  */
  231 static void
  232 vfs_sanitizeopts(struct vfsoptlist *opts)
  233 {
  234         struct vfsopt *opt, *opt2, *tmp;
  235 
  236         TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
  237                 opt2 = TAILQ_PREV(opt, vfsoptlist, link);
  238                 while (opt2 != NULL) {
  239                         if (vfs_equalopts(opt->name, opt2->name)) {
  240                                 tmp = TAILQ_PREV(opt2, vfsoptlist, link);
  241                                 vfs_freeopt(opts, opt2);
  242                                 opt2 = tmp;
  243                         } else {
  244                                 opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
  245                         }
  246                 }
  247         }
  248 }
  249 
  250 /*
  251  * Build a linked list of mount options from a struct uio.
  252  */
  253 static int
  254 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
  255 {
  256         struct vfsoptlist *opts;
  257         struct vfsopt *opt;
  258         size_t memused;
  259         unsigned int i, iovcnt;
  260         int error, namelen, optlen;
  261 
  262         opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
  263         TAILQ_INIT(opts);
  264         memused = 0;
  265         iovcnt = auio->uio_iovcnt;
  266         for (i = 0; i < iovcnt; i += 2) {
  267                 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  268                 namelen = auio->uio_iov[i].iov_len;
  269                 optlen = auio->uio_iov[i + 1].iov_len;
  270                 opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
  271                 opt->value = NULL;
  272                 opt->len = 0;
  273 
  274                 /*
  275                  * Do this early, so jumps to "bad" will free the current
  276                  * option.
  277                  */
  278                 TAILQ_INSERT_TAIL(opts, opt, link);
  279                 memused += sizeof(struct vfsopt) + optlen + namelen;
  280 
  281                 /*
  282                  * Avoid consuming too much memory, and attempts to overflow
  283                  * memused.
  284                  */
  285                 if (memused > VFS_MOUNTARG_SIZE_MAX ||
  286                     optlen > VFS_MOUNTARG_SIZE_MAX ||
  287                     namelen > VFS_MOUNTARG_SIZE_MAX) {
  288                         error = EINVAL;
  289                         goto bad;
  290                 }
  291 
  292                 if (auio->uio_segflg == UIO_SYSSPACE) {
  293                         bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
  294                 } else {
  295                         error = copyin(auio->uio_iov[i].iov_base, opt->name,
  296                             namelen);
  297                         if (error)
  298                                 goto bad;
  299                 }
  300                 /* Ensure names are null-terminated strings. */
  301                 if (opt->name[namelen - 1] != '\0') {
  302                         error = EINVAL;
  303                         goto bad;
  304                 }
  305                 if (optlen != 0) {
  306                         opt->len = optlen;
  307                         opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
  308                         if (auio->uio_segflg == UIO_SYSSPACE) {
  309                                 bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
  310                                     optlen);
  311                         } else {
  312                                 error = copyin(auio->uio_iov[i + 1].iov_base,
  313                                     opt->value, optlen);
  314                                 if (error)
  315                                         goto bad;
  316                         }
  317                 }
  318         }
  319         vfs_sanitizeopts(opts);
  320         *options = opts;
  321         return (0);
  322 bad:
  323         vfs_freeopts(opts);
  324         return (error);
  325 }
  326 
  327 /*
  328  * Merge the old mount options with the new ones passed
  329  * in the MNT_UPDATE case.
  330  */
  331 static void
  332 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
  333 {
  334         struct vfsopt *opt, *opt2, *new;
  335 
  336         TAILQ_FOREACH(opt, opts, link) {
  337                 /*
  338                  * Check that this option hasn't been redefined
  339                  * nor cancelled with a "no" mount option.
  340                  */
  341                 opt2 = TAILQ_FIRST(toopts);
  342                 while (opt2 != NULL) {
  343                         if (strcmp(opt2->name, opt->name) == 0)
  344                                 goto next;
  345                         if (strncmp(opt2->name, "no", 2) == 0 &&
  346                             strcmp(opt2->name + 2, opt->name) == 0) {
  347                                 vfs_freeopt(toopts, opt2);
  348                                 goto next;
  349                         }
  350                         opt2 = TAILQ_NEXT(opt2, link);
  351                 }
  352                 /* We want this option, duplicate it. */
  353                 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  354                 new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
  355                 strcpy(new->name, opt->name);
  356                 if (opt->len != 0) {
  357                         new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
  358                         bcopy(opt->value, new->value, opt->len);
  359                 } else {
  360                         new->value = NULL;
  361                 }
  362                 new->len = opt->len;
  363                 TAILQ_INSERT_TAIL(toopts, new, link);
  364 next:
  365                 continue;
  366         }
  367 }
  368 
  369 /*
  370  * Mount a filesystem.
  371  */
  372 int
  373 nmount(td, uap)
  374         struct thread *td;
  375         struct nmount_args /* {
  376                 struct iovec *iovp;
  377                 unsigned int iovcnt;
  378                 int flags;
  379         } */ *uap;
  380 {
  381         struct uio *auio;
  382         struct iovec *iov;
  383         unsigned int i;
  384         int error;
  385         u_int iovcnt;
  386 
  387         AUDIT_ARG(fflags, uap->flags);
  388 
  389         /*
  390          * Filter out MNT_ROOTFS.  We do not want clients of nmount() in
  391          * userspace to set this flag, but we must filter it out if we want
  392          * MNT_UPDATE on the root file system to work.
  393          * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
  394          */
  395         uap->flags &= ~MNT_ROOTFS;
  396 
  397         iovcnt = uap->iovcnt;
  398         /*
  399          * Check that we have an even number of iovec's
  400          * and that we have at least two options.
  401          */
  402         if ((iovcnt & 1) || (iovcnt < 4))
  403                 return (EINVAL);
  404 
  405         error = copyinuio(uap->iovp, iovcnt, &auio);
  406         if (error)
  407                 return (error);
  408         iov = auio->uio_iov;
  409         for (i = 0; i < iovcnt; i++) {
  410                 if (iov->iov_len > MMAXOPTIONLEN) {
  411                         free(auio, M_IOV);
  412                         return (EINVAL);
  413                 }
  414                 iov++;
  415         }
  416         error = vfs_donmount(td, uap->flags, auio);
  417 
  418         free(auio, M_IOV);
  419         return (error);
  420 }
  421 
  422 /*
  423  * ---------------------------------------------------------------------
  424  * Various utility functions
  425  */
  426 
  427 void
  428 vfs_ref(struct mount *mp)
  429 {
  430 
  431         MNT_ILOCK(mp);
  432         MNT_REF(mp);
  433         MNT_IUNLOCK(mp);
  434 }
  435 
  436 void
  437 vfs_rel(struct mount *mp)
  438 {
  439 
  440         MNT_ILOCK(mp);
  441         MNT_REL(mp);
  442         MNT_IUNLOCK(mp);
  443 }
  444 
  445 static int
  446 mount_init(void *mem, int size, int flags)
  447 {
  448         struct mount *mp;
  449 
  450         mp = (struct mount *)mem;
  451         mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
  452         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
  453         return (0);
  454 }
  455 
  456 static void
  457 mount_fini(void *mem, int size)
  458 {
  459         struct mount *mp;
  460 
  461         mp = (struct mount *)mem;
  462         lockdestroy(&mp->mnt_lock);
  463         mtx_destroy(&mp->mnt_mtx);
  464 }
  465 
  466 /*
  467  * Allocate and initialize the mount point struct.
  468  */
  469 struct mount *
  470 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp,
  471     const char *fspath, struct thread *td)
  472 {
  473         struct mount *mp;
  474 
  475         mp = uma_zalloc(mount_zone, M_WAITOK);
  476         bzero(&mp->mnt_startzero,
  477             __rangeof(struct mount, mnt_startzero, mnt_endzero));
  478         TAILQ_INIT(&mp->mnt_nvnodelist);
  479         mp->mnt_nvnodelistsize = 0;
  480         mp->mnt_ref = 0;
  481         (void) vfs_busy(mp, LK_NOWAIT, 0, td);
  482         mp->mnt_op = vfsp->vfc_vfsops;
  483         mp->mnt_vfc = vfsp;
  484         vfsp->vfc_refcount++;   /* XXX Unlocked */
  485         mp->mnt_stat.f_type = vfsp->vfc_typenum;
  486         mp->mnt_gen++;
  487         strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
  488         mp->mnt_vnodecovered = vp;
  489         mp->mnt_cred = crdup(td->td_ucred);
  490         mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
  491         strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
  492         mp->mnt_iosize_max = DFLTPHYS;
  493 #ifdef MAC
  494         mac_init_mount(mp);
  495         mac_create_mount(td->td_ucred, mp);
  496 #endif
  497         arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
  498         return (mp);
  499 }
  500 
  501 /*
  502  * Destroy the mount struct previously allocated by vfs_mount_alloc().
  503  */
  504 void
  505 vfs_mount_destroy(struct mount *mp)
  506 {
  507         int i;
  508 
  509         MNT_ILOCK(mp);
  510         for (i = 0; mp->mnt_ref && i < 3; i++)
  511                 msleep(mp, MNT_MTX(mp), PVFS, "mntref", hz);
  512         /*
  513          * This will always cause a 3 second delay in rebooting due to
  514          * refs on the root mountpoint that never go away.  Most of these
  515          * are held by init which never exits.
  516          */
  517         if (i == 3 && (!rebooting || bootverbose))
  518                 printf("Mount point %s had %d dangling refs\n",
  519                     mp->mnt_stat.f_mntonname, mp->mnt_ref);
  520         if (mp->mnt_holdcnt != 0) {
  521                 printf("Waiting for mount point to be unheld\n");
  522                 while (mp->mnt_holdcnt != 0) {
  523                         mp->mnt_holdcntwaiters++;
  524                         msleep(&mp->mnt_holdcnt, MNT_MTX(mp),
  525                                PZERO, "mntdestroy", 0);
  526                         mp->mnt_holdcntwaiters--;
  527                 }
  528                 printf("mount point unheld\n");
  529         }
  530         if (mp->mnt_writeopcount > 0) {
  531                 printf("Waiting for mount point write ops\n");
  532                 while (mp->mnt_writeopcount > 0) {
  533                         mp->mnt_kern_flag |= MNTK_SUSPEND;
  534                         msleep(&mp->mnt_writeopcount,
  535                                MNT_MTX(mp),
  536                                PZERO, "mntdestroy2", 0);
  537                 }
  538                 printf("mount point write ops completed\n");
  539         }
  540         if (mp->mnt_secondary_writes > 0) {
  541                 printf("Waiting for mount point secondary write ops\n");
  542                 while (mp->mnt_secondary_writes > 0) {
  543                         mp->mnt_kern_flag |= MNTK_SUSPEND;
  544                         msleep(&mp->mnt_secondary_writes,
  545                                MNT_MTX(mp),
  546                                PZERO, "mntdestroy3", 0);
  547                 }
  548                 printf("mount point secondary write ops completed\n");
  549         }
  550         MNT_IUNLOCK(mp);
  551         mp->mnt_vfc->vfc_refcount--;
  552         if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
  553                 struct vnode *vp;
  554 
  555                 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes)
  556                         vprint("", vp);
  557                 panic("unmount: dangling vnode");
  558         }
  559         MNT_ILOCK(mp);
  560         if (mp->mnt_kern_flag & MNTK_MWAIT)
  561                 wakeup(mp);
  562         if (mp->mnt_writeopcount != 0)
  563                 panic("vfs_mount_destroy: nonzero writeopcount");
  564         if (mp->mnt_secondary_writes != 0)
  565                 panic("vfs_mount_destroy: nonzero secondary_writes");
  566         if (mp->mnt_nvnodelistsize != 0)
  567                 panic("vfs_mount_destroy: nonzero nvnodelistsize");
  568         mp->mnt_writeopcount = -1000;
  569         mp->mnt_nvnodelistsize = -1000;
  570         mp->mnt_secondary_writes = -1000;
  571         MNT_IUNLOCK(mp);
  572 #ifdef MAC
  573         mac_destroy_mount(mp);
  574 #endif
  575         if (mp->mnt_opt != NULL)
  576                 vfs_freeopts(mp->mnt_opt);
  577         crfree(mp->mnt_cred);
  578         uma_zfree(mount_zone, mp);
  579 }
  580 
  581 static int
  582 vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions)
  583 {
  584         struct vfsoptlist *optlist;
  585         struct vfsopt *opt, *noro_opt;
  586         char *fstype, *fspath, *errmsg;
  587         int error, fstypelen, fspathlen, errmsg_len, errmsg_pos;
  588         int has_rw, has_noro;
  589 
  590         errmsg = NULL;
  591         errmsg_len = 0;
  592         errmsg_pos = -1;
  593         has_rw = 0;
  594         has_noro = 0;
  595 
  596         error = vfs_buildopts(fsoptions, &optlist);
  597         if (error)
  598                 return (error);
  599 
  600         if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0)
  601                 errmsg_pos = vfs_getopt_pos(optlist, "errmsg");
  602 
  603         /*
  604          * We need these two options before the others,
  605          * and they are mandatory for any filesystem.
  606          * Ensure they are NUL terminated as well.
  607          */
  608         fstypelen = 0;
  609         error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
  610         if (error || fstype[fstypelen - 1] != '\0') {
  611                 error = EINVAL;
  612                 if (errmsg != NULL)
  613                         strncpy(errmsg, "Invalid fstype", errmsg_len);
  614                 goto bail;
  615         }
  616         fspathlen = 0;
  617         error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
  618         if (error || fspath[fspathlen - 1] != '\0') {
  619                 error = EINVAL;
  620                 if (errmsg != NULL)
  621                         strncpy(errmsg, "Invalid fspath", errmsg_len);
  622                 goto bail;
  623         }
  624 
  625         /*
  626          * We need to see if we have the "update" option
  627          * before we call vfs_domount(), since vfs_domount() has special
  628          * logic based on MNT_UPDATE.  This is very important
  629          * when we want to update the root filesystem.
  630          */
  631         TAILQ_FOREACH(opt, optlist, link) {
  632                 if (strcmp(opt->name, "update") == 0)
  633                         fsflags |= MNT_UPDATE;
  634                 else if (strcmp(opt->name, "async") == 0)
  635                         fsflags |= MNT_ASYNC;
  636                 else if (strcmp(opt->name, "force") == 0)
  637                         fsflags |= MNT_FORCE;
  638                 else if (strcmp(opt->name, "multilabel") == 0)
  639                         fsflags |= MNT_MULTILABEL;
  640                 else if (strcmp(opt->name, "noasync") == 0)
  641                         fsflags &= ~MNT_ASYNC;
  642                 else if (strcmp(opt->name, "noatime") == 0)
  643                         fsflags |= MNT_NOATIME;
  644                 else if (strcmp(opt->name, "atime") == 0) {
  645                         free(opt->name, M_MOUNT);
  646                         opt->name = strdup("nonoatime", M_MOUNT);
  647                 }
  648                 else if (strcmp(opt->name, "noclusterr") == 0)
  649                         fsflags |= MNT_NOCLUSTERR;
  650                 else if (strcmp(opt->name, "clusterr") == 0) {
  651                         free(opt->name, M_MOUNT);
  652                         opt->name = strdup("nonoclusterr", M_MOUNT);
  653                 }
  654                 else if (strcmp(opt->name, "noclusterw") == 0)
  655                         fsflags |= MNT_NOCLUSTERW;
  656                 else if (strcmp(opt->name, "clusterw") == 0) {
  657                         free(opt->name, M_MOUNT);
  658                         opt->name = strdup("nonoclusterw", M_MOUNT);
  659                 }
  660                 else if (strcmp(opt->name, "noexec") == 0)
  661                         fsflags |= MNT_NOEXEC;
  662                 else if (strcmp(opt->name, "exec") == 0) {
  663                         free(opt->name, M_MOUNT);
  664                         opt->name = strdup("nonoexec", M_MOUNT);
  665                 }
  666                 else if (strcmp(opt->name, "nosuid") == 0)
  667                         fsflags |= MNT_NOSUID;
  668                 else if (strcmp(opt->name, "suid") == 0) {
  669                         free(opt->name, M_MOUNT);
  670                         opt->name = strdup("nonosuid", M_MOUNT);
  671                 }
  672                 else if (strcmp(opt->name, "nosymfollow") == 0)
  673                         fsflags |= MNT_NOSYMFOLLOW;
  674                 else if (strcmp(opt->name, "symfollow") == 0) {
  675                         free(opt->name, M_MOUNT);
  676                         opt->name = strdup("nonosymfollow", M_MOUNT);
  677                 }
  678                 else if (strcmp(opt->name, "noro") == 0) {
  679                         fsflags &= ~MNT_RDONLY;
  680                         has_noro = 1;
  681                 }
  682                 else if (strcmp(opt->name, "rw") == 0) {
  683                         fsflags &= ~MNT_RDONLY;
  684                         has_rw = 1;
  685                 }
  686                 else if (strcmp(opt->name, "ro") == 0)
  687                         fsflags |= MNT_RDONLY;
  688                 else if (strcmp(opt->name, "rdonly") == 0) {
  689                         free(opt->name, M_MOUNT);
  690                         opt->name = strdup("ro", M_MOUNT);
  691                         fsflags |= MNT_RDONLY;
  692                 }
  693                 else if (strcmp(opt->name, "snapshot") == 0)
  694                         fsflags |= MNT_SNAPSHOT;
  695                 else if (strcmp(opt->name, "suiddir") == 0)
  696                         fsflags |= MNT_SUIDDIR;
  697                 else if (strcmp(opt->name, "sync") == 0)
  698                         fsflags |= MNT_SYNCHRONOUS;
  699                 else if (strcmp(opt->name, "union") == 0)
  700                         fsflags |= MNT_UNION;
  701         }
  702 
  703         /*
  704          * If "rw" was specified as a mount option, and we
  705          * are trying to update a mount-point from "ro" to "rw",
  706          * we need a mount option "noro", since in vfs_mergeopts(),
  707          * "noro" will cancel "ro", but "rw" will not do anything.
  708          */
  709         if (has_rw && !has_noro) {
  710                 noro_opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  711                 noro_opt->name = strdup("noro", M_MOUNT);
  712                 noro_opt->value = NULL;
  713                 noro_opt->len = 0;
  714                 TAILQ_INSERT_TAIL(optlist, noro_opt, link);
  715         }
  716 
  717         /*
  718          * Be ultra-paranoid about making sure the type and fspath
  719          * variables will fit in our mp buffers, including the
  720          * terminating NUL.
  721          */
  722         if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
  723                 error = ENAMETOOLONG;
  724                 goto bail;
  725         }
  726 
  727         mtx_lock(&Giant);
  728         error = vfs_domount(td, fstype, fspath, fsflags, optlist);
  729         mtx_unlock(&Giant);
  730 bail:
  731         /* copyout the errmsg */
  732         if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt)
  733             && errmsg_len > 0 && errmsg != NULL) {
  734                 if (fsoptions->uio_segflg == UIO_SYSSPACE) {
  735                         bcopy(errmsg,
  736                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
  737                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
  738                 } else {
  739                         copyout(errmsg,
  740                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
  741                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
  742                 }
  743         }
  744 
  745         if (error != 0)
  746                 vfs_freeopts(optlist);
  747         return (error);
  748 }
  749 
  750 /*
  751  * Old mount API.
  752  */
  753 #ifndef _SYS_SYSPROTO_H_
  754 struct mount_args {
  755         char    *type;
  756         char    *path;
  757         int     flags;
  758         caddr_t data;
  759 };
  760 #endif
  761 /* ARGSUSED */
  762 int
  763 mount(td, uap)
  764         struct thread *td;
  765         struct mount_args /* {
  766                 char *type;
  767                 char *path;
  768                 int flags;
  769                 caddr_t data;
  770         } */ *uap;
  771 {
  772         char *fstype;
  773         struct vfsconf *vfsp = NULL;
  774         struct mntarg *ma = NULL;
  775         int error;
  776 
  777         AUDIT_ARG(fflags, uap->flags);
  778 
  779         /*
  780          * Filter out MNT_ROOTFS.  We do not want clients of mount() in
  781          * userspace to set this flag, but we must filter it out if we want
  782          * MNT_UPDATE on the root file system to work.
  783          * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
  784          */
  785         uap->flags &= ~MNT_ROOTFS;
  786 
  787         fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
  788         error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
  789         if (error) {
  790                 free(fstype, M_TEMP);
  791                 return (error);
  792         }
  793 
  794         AUDIT_ARG(text, fstype);
  795         mtx_lock(&Giant);
  796         vfsp = vfs_byname_kld(fstype, td, &error);
  797         free(fstype, M_TEMP);
  798         if (vfsp == NULL) {
  799                 mtx_unlock(&Giant);
  800                 return (ENOENT);
  801         }
  802         if (vfsp->vfc_vfsops->vfs_cmount == NULL) {
  803                 mtx_unlock(&Giant);
  804                 return (EOPNOTSUPP);
  805         }
  806 
  807         ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN);
  808         ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
  809         ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro");
  810         ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid");
  811         ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec");
  812 
  813         error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td);
  814         mtx_unlock(&Giant);
  815         return (error);
  816 }
  817 
  818 
  819 /*
  820  * vfs_domount(): actually attempt a filesystem mount.
  821  */
  822 static int
  823 vfs_domount(
  824         struct thread *td,      /* Calling thread. */
  825         const char *fstype,     /* Filesystem type. */
  826         char *fspath,           /* Mount path. */
  827         int fsflags,            /* Flags common to all filesystems. */
  828         void *fsdata            /* Options local to the filesystem. */
  829         )
  830 {
  831         struct vnode *vp;
  832         struct mount *mp;
  833         struct vfsconf *vfsp;
  834         struct export_args export;
  835         int error, flag = 0;
  836         struct vattr va;
  837         struct nameidata nd;
  838 
  839         mtx_assert(&Giant, MA_OWNED);
  840         /*
  841          * Be ultra-paranoid about making sure the type and fspath
  842          * variables will fit in our mp buffers, including the
  843          * terminating NUL.
  844          */
  845         if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
  846                 return (ENAMETOOLONG);
  847 
  848         if (jailed(td->td_ucred) || usermount == 0) {
  849                 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0)
  850                         return (error);
  851         }
  852 
  853         /*
  854          * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
  855          */
  856         if (fsflags & MNT_EXPORTED) {
  857                 error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED);
  858                 if (error)
  859                         return (error);
  860         }
  861         if (fsflags & MNT_SUIDDIR) {
  862                 error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR);
  863                 if (error)
  864                         return (error);
  865         }
  866         /*
  867          * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users.
  868          */
  869         if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) {
  870                 if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0)
  871                         fsflags |= MNT_NOSUID | MNT_USER;
  872         }
  873 
  874         /* Load KLDs before we lock the covered vnode to avoid reversals. */
  875         vfsp = NULL;
  876         if ((fsflags & MNT_UPDATE) == 0) {
  877                 /* Don't try to load KLDs if we're mounting the root. */
  878                 if (fsflags & MNT_ROOTFS)
  879                         vfsp = vfs_byname(fstype);
  880                 else
  881                         vfsp = vfs_byname_kld(fstype, td, &error);
  882                 if (vfsp == NULL)
  883                         return (ENODEV);
  884                 if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL))
  885                         return (EPERM);
  886         }
  887         /*
  888          * Get vnode to be covered
  889          */
  890         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE,
  891             fspath, td);
  892         if ((error = namei(&nd)) != 0)
  893                 return (error);
  894         NDFREE(&nd, NDF_ONLY_PNBUF);
  895         vp = nd.ni_vp;
  896         if (fsflags & MNT_UPDATE) {
  897                 if ((vp->v_vflag & VV_ROOT) == 0) {
  898                         vput(vp);
  899                         return (EINVAL);
  900                 }
  901                 mp = vp->v_mount;
  902                 MNT_ILOCK(mp);
  903                 flag = mp->mnt_flag;
  904                 /*
  905                  * We only allow the filesystem to be reloaded if it
  906                  * is currently mounted read-only.
  907                  */
  908                 if ((fsflags & MNT_RELOAD) &&
  909                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
  910                         MNT_IUNLOCK(mp);
  911                         vput(vp);
  912                         return (EOPNOTSUPP);    /* Needs translation */
  913                 }
  914                 MNT_IUNLOCK(mp);
  915                 /*
  916                  * Only privileged root, or (if MNT_USER is set) the user that
  917                  * did the original mount is permitted to update it.
  918                  */
  919                 error = vfs_suser(mp, td);
  920                 if (error) {
  921                         vput(vp);
  922                         return (error);
  923                 }
  924                 if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
  925                         vput(vp);
  926                         return (EBUSY);
  927                 }
  928                 VI_LOCK(vp);
  929                 if ((vp->v_iflag & VI_MOUNT) != 0 ||
  930                     vp->v_mountedhere != NULL) {
  931                         VI_UNLOCK(vp);
  932                         vfs_unbusy(mp, td);
  933                         vput(vp);
  934                         return (EBUSY);
  935                 }
  936                 vp->v_iflag |= VI_MOUNT;
  937                 VI_UNLOCK(vp);
  938                 MNT_ILOCK(mp);
  939                 mp->mnt_flag |= fsflags &
  940                     (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS);
  941                 MNT_IUNLOCK(mp);
  942                 VOP_UNLOCK(vp, 0, td);
  943                 mp->mnt_optnew = fsdata;
  944                 vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
  945         } else {
  946                 /*
  947                  * If the user is not root, ensure that they own the directory
  948                  * onto which we are attempting to mount.
  949                  */
  950                 error = VOP_GETATTR(vp, &va, td->td_ucred, td);
  951                 if (error) {
  952                         vput(vp);
  953                         return (error);
  954                 }
  955                 if (va.va_uid != td->td_ucred->cr_uid) {
  956                         error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN,
  957                             0);
  958                         if (error) {
  959                                 vput(vp);
  960                                 return (error);
  961                         }
  962                 }
  963                 error = vinvalbuf(vp, V_SAVE, td, 0, 0);
  964                 if (error != 0) {
  965                         vput(vp);
  966                         return (error);
  967                 }
  968                 if (vp->v_type != VDIR) {
  969                         vput(vp);
  970                         return (ENOTDIR);
  971                 }
  972                 VI_LOCK(vp);
  973                 if ((vp->v_iflag & VI_MOUNT) != 0 ||
  974                     vp->v_mountedhere != NULL) {
  975                         VI_UNLOCK(vp);
  976                         vput(vp);
  977                         return (EBUSY);
  978                 }
  979                 vp->v_iflag |= VI_MOUNT;
  980                 VI_UNLOCK(vp);
  981 
  982                 /*
  983                  * Allocate and initialize the filesystem.
  984                  */
  985                 mp = vfs_mount_alloc(vp, vfsp, fspath, td);
  986                 VOP_UNLOCK(vp, 0, td);
  987 
  988                 /* XXXMAC: pass to vfs_mount_alloc? */
  989                 mp->mnt_optnew = fsdata;
  990         }
  991 
  992         /*
  993          * Set the mount level flags.
  994          */
  995         MNT_ILOCK(mp);
  996         mp->mnt_flag = (mp->mnt_flag & ~MNT_UPDATEMASK) |
  997                 (fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS |
  998                             MNT_RDONLY));
  999         if ((mp->mnt_flag & MNT_ASYNC) == 0)
 1000                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1001         MNT_IUNLOCK(mp);
 1002         /*
 1003          * Mount the filesystem.
 1004          * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
 1005          * get.  No freeing of cn_pnbuf.
 1006          */
 1007         error = VFS_MOUNT(mp, td);
 1008 
 1009         /*
 1010          * Process the export option only if we are
 1011          * updating mount options.
 1012          */
 1013         if (!error && (fsflags & MNT_UPDATE)) {
 1014                 if (vfs_copyopt(mp->mnt_optnew, "export", &export,
 1015                     sizeof(export)) == 0)
 1016                         error = vfs_export(mp, &export);
 1017         }
 1018 
 1019         if (!error) {
 1020                 if (mp->mnt_opt != NULL)
 1021                         vfs_freeopts(mp->mnt_opt);
 1022                 mp->mnt_opt = mp->mnt_optnew;
 1023                 (void)VFS_STATFS(mp, &mp->mnt_stat, td);
 1024         }
 1025         /*
 1026          * Prevent external consumers of mount options from reading
 1027          * mnt_optnew.
 1028         */
 1029         mp->mnt_optnew = NULL;
 1030         if (mp->mnt_flag & MNT_UPDATE) {
 1031                 MNT_ILOCK(mp);
 1032                 if (error)
 1033                         mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) |
 1034                                 (flag & ~MNT_QUOTA);
 1035                 else
 1036                         mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD |
 1037                                           MNT_FORCE | MNT_SNAPSHOT);
 1038                 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 1039                         mp->mnt_kern_flag |= MNTK_ASYNC;
 1040                 else
 1041                         mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1042                 MNT_IUNLOCK(mp);
 1043                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
 1044                         if (mp->mnt_syncer == NULL)
 1045                                 error = vfs_allocate_syncvnode(mp);
 1046                 } else {
 1047                         if (mp->mnt_syncer != NULL)
 1048                                 vrele(mp->mnt_syncer);
 1049                         mp->mnt_syncer = NULL;
 1050                 }
 1051                 vfs_unbusy(mp, td);
 1052                 VI_LOCK(vp);
 1053                 vp->v_iflag &= ~VI_MOUNT;
 1054                 VI_UNLOCK(vp);
 1055                 vrele(vp);
 1056                 return (error);
 1057         }
 1058         MNT_ILOCK(mp);
 1059         if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 1060                 mp->mnt_kern_flag |= MNTK_ASYNC;
 1061         else
 1062                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1063         MNT_IUNLOCK(mp);
 1064         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1065         /*
 1066          * Put the new filesystem on the mount list after root.
 1067          */
 1068         cache_purge(vp);
 1069         if (!error) {
 1070                 struct vnode *newdp;
 1071 
 1072                 VI_LOCK(vp);
 1073                 vp->v_iflag &= ~VI_MOUNT;
 1074                 VI_UNLOCK(vp);
 1075                 vp->v_mountedhere = mp;
 1076                 mtx_lock(&mountlist_mtx);
 1077                 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 1078                 mtx_unlock(&mountlist_mtx);
 1079                 vfs_event_signal(NULL, VQ_MOUNT, 0);
 1080                 if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp, td))
 1081                         panic("mount: lost mount");
 1082                 mountcheckdirs(vp, newdp);
 1083                 vput(newdp);
 1084                 VOP_UNLOCK(vp, 0, td);
 1085                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
 1086                         error = vfs_allocate_syncvnode(mp);
 1087                 vfs_unbusy(mp, td);
 1088                 if (error)
 1089                         vrele(vp);
 1090         } else {
 1091                 VI_LOCK(vp);
 1092                 vp->v_iflag &= ~VI_MOUNT;
 1093                 VI_UNLOCK(vp);
 1094                 vfs_unbusy(mp, td);
 1095                 vfs_mount_destroy(mp);
 1096                 vput(vp);
 1097         }
 1098         return (error);
 1099 }
 1100 
 1101 /*
 1102  * Unmount a filesystem.
 1103  *
 1104  * Note: unmount takes a path to the vnode mounted on as argument, not
 1105  * special file (as before).
 1106  */
 1107 #ifndef _SYS_SYSPROTO_H_
 1108 struct unmount_args {
 1109         char    *path;
 1110         int     flags;
 1111 };
 1112 #endif
 1113 /* ARGSUSED */
 1114 int
 1115 unmount(td, uap)
 1116         struct thread *td;
 1117         register struct unmount_args /* {
 1118                 char *path;
 1119                 int flags;
 1120         } */ *uap;
 1121 {
 1122         struct mount *mp;
 1123         char *pathbuf;
 1124         int error, id0, id1;
 1125 
 1126         if (jailed(td->td_ucred) || usermount == 0) {
 1127                 error = priv_check(td, PRIV_VFS_UNMOUNT);
 1128                 if (error)
 1129                         return (error);
 1130         }
 1131 
 1132         pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
 1133         error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL);
 1134         if (error) {
 1135                 free(pathbuf, M_TEMP);
 1136                 return (error);
 1137         }
 1138         AUDIT_ARG(upath, td, pathbuf, ARG_UPATH1);
 1139         mtx_lock(&Giant);
 1140         if (uap->flags & MNT_BYFSID) {
 1141                 /* Decode the filesystem ID. */
 1142                 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
 1143                         mtx_unlock(&Giant);
 1144                         free(pathbuf, M_TEMP);
 1145                         return (EINVAL);
 1146                 }
 1147 
 1148                 mtx_lock(&mountlist_mtx);
 1149                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 1150                         if (mp->mnt_stat.f_fsid.val[0] == id0 &&
 1151                             mp->mnt_stat.f_fsid.val[1] == id1)
 1152                                 break;
 1153                 }
 1154                 mtx_unlock(&mountlist_mtx);
 1155         } else {
 1156                 mtx_lock(&mountlist_mtx);
 1157                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 1158                         if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0)
 1159                                 break;
 1160                 }
 1161                 mtx_unlock(&mountlist_mtx);
 1162         }
 1163         free(pathbuf, M_TEMP);
 1164         if (mp == NULL) {
 1165                 /*
 1166                  * Previously we returned ENOENT for a nonexistent path and
 1167                  * EINVAL for a non-mountpoint.  We cannot tell these apart
 1168                  * now, so in the !MNT_BYFSID case return the more likely
 1169                  * EINVAL for compatibility.
 1170                  */
 1171                 mtx_unlock(&Giant);
 1172                 return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL);
 1173         }
 1174 
 1175         /*
 1176          * Don't allow unmounting the root filesystem.
 1177          */
 1178         if (mp->mnt_flag & MNT_ROOTFS) {
 1179                 mtx_unlock(&Giant);
 1180                 return (EINVAL);
 1181         }
 1182         error = dounmount(mp, uap->flags, td);
 1183         mtx_unlock(&Giant);
 1184         return (error);
 1185 }
 1186 
 1187 /*
 1188  * Do the actual filesystem unmount.
 1189  */
 1190 int
 1191 dounmount(mp, flags, td)
 1192         struct mount *mp;
 1193         int flags;
 1194         struct thread *td;
 1195 {
 1196         struct vnode *coveredvp, *fsrootvp;
 1197         int error;
 1198         int async_flag;
 1199         int mnt_gen_r;
 1200 
 1201         mtx_assert(&Giant, MA_OWNED);
 1202 
 1203         if ((coveredvp = mp->mnt_vnodecovered) != NULL) {
 1204                 mnt_gen_r = mp->mnt_gen;
 1205                 VI_LOCK(coveredvp);
 1206                 vholdl(coveredvp);
 1207                 vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, td);
 1208                 vdrop(coveredvp);
 1209                 /*
 1210                  * Check for mp being unmounted while waiting for the
 1211                  * covered vnode lock.
 1212                  */
 1213                 if (coveredvp->v_mountedhere != mp ||
 1214                     coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) {
 1215                         VOP_UNLOCK(coveredvp, 0, td);
 1216                         return (EBUSY);
 1217                 }
 1218         }
 1219         /*
 1220          * Only privileged root, or (if MNT_USER is set) the user that did the
 1221          * original mount is permitted to unmount this filesystem.
 1222          */
 1223         error = vfs_suser(mp, td);
 1224         if (error) {
 1225                 if (coveredvp)
 1226                         VOP_UNLOCK(coveredvp, 0, td);
 1227                 return (error);
 1228         }
 1229 
 1230         MNT_ILOCK(mp);
 1231         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 1232                 MNT_IUNLOCK(mp);
 1233                 if (coveredvp)
 1234                         VOP_UNLOCK(coveredvp, 0, td);
 1235                 return (EBUSY);
 1236         }
 1237         mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_NOINSMNTQ;
 1238         /* Allow filesystems to detect that a forced unmount is in progress. */
 1239         if (flags & MNT_FORCE)
 1240                 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
 1241         error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
 1242             ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), MNT_MTX(mp), td);
 1243         if (error) {
 1244                 MNT_ILOCK(mp);
 1245                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_NOINSMNTQ |
 1246                     MNTK_UNMOUNTF);
 1247                 if (mp->mnt_kern_flag & MNTK_MWAIT)
 1248                         wakeup(mp);
 1249                 MNT_IUNLOCK(mp);
 1250                 if (coveredvp)
 1251                         VOP_UNLOCK(coveredvp, 0, td);
 1252                 return (error);
 1253         }
 1254         vn_start_write(NULL, &mp, V_WAIT);
 1255 
 1256         if (mp->mnt_flag & MNT_EXPUBLIC)
 1257                 vfs_setpublicfs(NULL, NULL, NULL);
 1258 
 1259         vfs_msync(mp, MNT_WAIT);
 1260         MNT_ILOCK(mp);
 1261         async_flag = mp->mnt_flag & MNT_ASYNC;
 1262         mp->mnt_flag &= ~MNT_ASYNC;
 1263         mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1264         MNT_IUNLOCK(mp);
 1265         cache_purgevfs(mp);     /* remove cache entries for this file sys */
 1266         if (mp->mnt_syncer != NULL)
 1267                 vrele(mp->mnt_syncer);
 1268         /*
 1269          * For forced unmounts, move process cdir/rdir refs on the fs root
 1270          * vnode to the covered vnode.  For non-forced unmounts we want
 1271          * such references to cause an EBUSY error.
 1272          */
 1273         if ((flags & MNT_FORCE) &&
 1274             VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
 1275                 if (mp->mnt_vnodecovered != NULL)
 1276                         mountcheckdirs(fsrootvp, mp->mnt_vnodecovered);
 1277                 if (fsrootvp == rootvnode) {
 1278                         vrele(rootvnode);
 1279                         rootvnode = NULL;
 1280                 }
 1281                 vput(fsrootvp);
 1282         }
 1283         if (((mp->mnt_flag & MNT_RDONLY) ||
 1284              (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) ||
 1285             (flags & MNT_FORCE)) {
 1286                 error = VFS_UNMOUNT(mp, flags, td);
 1287         }
 1288         vn_finished_write(mp);
 1289         if (error) {
 1290                 /* Undo cdir/rdir and rootvnode changes made above. */
 1291                 if ((flags & MNT_FORCE) &&
 1292                     VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
 1293                         if (mp->mnt_vnodecovered != NULL)
 1294                                 mountcheckdirs(mp->mnt_vnodecovered, fsrootvp);
 1295                         if (rootvnode == NULL) {
 1296                                 rootvnode = fsrootvp;
 1297                                 vref(rootvnode);
 1298                         }
 1299                         vput(fsrootvp);
 1300                 }
 1301                 MNT_ILOCK(mp);
 1302                 mp->mnt_kern_flag &= ~MNTK_NOINSMNTQ;
 1303                 if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) {
 1304                         MNT_IUNLOCK(mp);
 1305                         (void) vfs_allocate_syncvnode(mp);
 1306                         MNT_ILOCK(mp);
 1307                 }
 1308                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
 1309                 mp->mnt_flag |= async_flag;
 1310                 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 1311                         mp->mnt_kern_flag |= MNTK_ASYNC;
 1312                 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
 1313                 if (mp->mnt_kern_flag & MNTK_MWAIT)
 1314                         wakeup(mp);
 1315                 MNT_IUNLOCK(mp);
 1316                 if (coveredvp)
 1317                         VOP_UNLOCK(coveredvp, 0, td);
 1318                 return (error);
 1319         }
 1320         mtx_lock(&mountlist_mtx);
 1321         TAILQ_REMOVE(&mountlist, mp, mnt_list);
 1322         mtx_unlock(&mountlist_mtx);
 1323         if (coveredvp != NULL) {
 1324                 coveredvp->v_mountedhere = NULL;
 1325                 vput(coveredvp);
 1326         }
 1327         vfs_event_signal(NULL, VQ_UNMOUNT, 0);
 1328         lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
 1329         vfs_mount_destroy(mp);
 1330         return (0);
 1331 }
 1332 
 1333 /*
 1334  * ---------------------------------------------------------------------
 1335  * Mounting of root filesystem
 1336  *
 1337  */
 1338 
 1339 struct root_hold_token {
 1340         const char                      *who;
 1341         LIST_ENTRY(root_hold_token)     list;
 1342 };
 1343 
 1344 static LIST_HEAD(, root_hold_token)     root_holds =
 1345     LIST_HEAD_INITIALIZER(&root_holds);
 1346 
 1347 static int root_mount_complete;
 1348 
 1349 /*
 1350  * Hold root mount.
 1351  */
 1352 struct root_hold_token *
 1353 root_mount_hold(const char *identifier)
 1354 {
 1355         struct root_hold_token *h;
 1356 
 1357         h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
 1358         h->who = identifier;
 1359         mtx_lock(&mountlist_mtx);
 1360         LIST_INSERT_HEAD(&root_holds, h, list);
 1361         mtx_unlock(&mountlist_mtx);
 1362         return (h);
 1363 }
 1364 
 1365 /*
 1366  * Release root mount.
 1367  */
 1368 void
 1369 root_mount_rel(struct root_hold_token *h)
 1370 {
 1371 
 1372         mtx_lock(&mountlist_mtx);
 1373         LIST_REMOVE(h, list);
 1374         wakeup(&root_holds);
 1375         mtx_unlock(&mountlist_mtx);
 1376         free(h, M_DEVBUF);
 1377 }
 1378 
 1379 /*
 1380  * Wait for all subsystems to release root mount.
 1381  */
 1382 static void
 1383 root_mount_prepare(void)
 1384 {
 1385         struct root_hold_token *h;
 1386 
 1387         for (;;) {
 1388                 DROP_GIANT();
 1389                 g_waitidle();
 1390                 PICKUP_GIANT();
 1391                 mtx_lock(&mountlist_mtx);
 1392                 if (LIST_EMPTY(&root_holds)) {
 1393                         mtx_unlock(&mountlist_mtx);
 1394                         break;
 1395                 }
 1396                 printf("Root mount waiting for:");
 1397                 LIST_FOREACH(h, &root_holds, list)
 1398                         printf(" %s", h->who);
 1399                 printf("\n");
 1400                 msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold",
 1401                     hz);
 1402         }
 1403 }
 1404 
 1405 /*
 1406  * Root was mounted, share the good news.
 1407  */
 1408 static void
 1409 root_mount_done(void)
 1410 {
 1411 
 1412         /*
 1413          * Use a mutex to prevent the wakeup being missed and waiting for
 1414          * an extra 1 second sleep.
 1415          */
 1416         mtx_lock(&mountlist_mtx);
 1417         root_mount_complete = 1;
 1418         wakeup(&root_mount_complete);
 1419         mtx_unlock(&mountlist_mtx);
 1420 }
 1421 
 1422 /*
 1423  * Return true if root is already mounted.
 1424  */
 1425 int
 1426 root_mounted(void)
 1427 {
 1428 
 1429         /* No mutex is acquired here because int stores are atomic. */
 1430         return (root_mount_complete);
 1431 }
 1432 
 1433 /*
 1434  * Wait until root is mounted.
 1435  */
 1436 void
 1437 root_mount_wait(void)
 1438 {
 1439 
 1440         /*
 1441          * Panic on an obvious deadlock - the function can't be called from
 1442          * a thread which is doing the whole SYSINIT stuff.
 1443          */
 1444         KASSERT(curthread->td_proc->p_pid != 0,
 1445             ("root_mount_wait: cannot be called from the swapper thread"));
 1446         mtx_lock(&mountlist_mtx);
 1447         while (!root_mount_complete) {
 1448                 msleep(&root_mount_complete, &mountlist_mtx, PZERO, "rootwait",
 1449                     hz);
 1450         }
 1451         mtx_unlock(&mountlist_mtx);
 1452 }
 1453 
 1454 static void
 1455 set_rootvnode(struct thread *td)
 1456 {
 1457         struct proc *p;
 1458 
 1459         if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode, td))
 1460                 panic("Cannot find root vnode");
 1461 
 1462         p = td->td_proc;
 1463         FILEDESC_SLOCK(p->p_fd);
 1464 
 1465         if (p->p_fd->fd_cdir != NULL)
 1466                 vrele(p->p_fd->fd_cdir);
 1467         p->p_fd->fd_cdir = rootvnode;
 1468         VREF(rootvnode);
 1469 
 1470         if (p->p_fd->fd_rdir != NULL)
 1471                 vrele(p->p_fd->fd_rdir);
 1472         p->p_fd->fd_rdir = rootvnode;
 1473         VREF(rootvnode);
 1474 
 1475         FILEDESC_SUNLOCK(p->p_fd);
 1476 
 1477         VOP_UNLOCK(rootvnode, 0, td);
 1478 }
 1479 
 1480 /*
 1481  * Mount /devfs as our root filesystem, but do not put it on the mountlist
 1482  * yet.  Create a /dev -> / symlink so that absolute pathnames will lookup.
 1483  */
 1484 
 1485 static void
 1486 devfs_first(void)
 1487 {
 1488         struct thread *td = curthread;
 1489         struct vfsoptlist *opts;
 1490         struct vfsconf *vfsp;
 1491         struct mount *mp = NULL;
 1492         int error;
 1493 
 1494         vfsp = vfs_byname("devfs");
 1495         KASSERT(vfsp != NULL, ("Could not find devfs by name"));
 1496         if (vfsp == NULL)
 1497                 return;
 1498 
 1499         mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td);
 1500 
 1501         error = VFS_MOUNT(mp, td);
 1502         KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
 1503         if (error)
 1504                 return;
 1505 
 1506         opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
 1507         TAILQ_INIT(opts);
 1508         mp->mnt_opt = opts;
 1509 
 1510         mtx_lock(&mountlist_mtx);
 1511         TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
 1512         mtx_unlock(&mountlist_mtx);
 1513 
 1514         set_rootvnode(td);
 1515 
 1516         error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);
 1517         if (error)
 1518                 printf("kern_symlink /dev -> / returns %d\n", error);
 1519 }
 1520 
 1521 /*
 1522  * Surgically move our devfs to be mounted on /dev.
 1523  */
 1524 
 1525 static void
 1526 devfs_fixup(struct thread *td)
 1527 {
 1528         struct nameidata nd;
 1529         int error;
 1530         struct vnode *vp, *dvp;
 1531         struct mount *mp;
 1532 
 1533         /* Remove our devfs mount from the mountlist and purge the cache */
 1534         mtx_lock(&mountlist_mtx);
 1535         mp = TAILQ_FIRST(&mountlist);
 1536         TAILQ_REMOVE(&mountlist, mp, mnt_list);
 1537         mtx_unlock(&mountlist_mtx);
 1538         cache_purgevfs(mp);
 1539 
 1540         VFS_ROOT(mp, LK_EXCLUSIVE, &dvp, td);
 1541         VI_LOCK(dvp);
 1542         dvp->v_iflag &= ~VI_MOUNT;
 1543         VI_UNLOCK(dvp);
 1544         dvp->v_mountedhere = NULL;
 1545 
 1546         /* Set up the real rootvnode, and purge the cache */
 1547         TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL;
 1548         set_rootvnode(td);
 1549         cache_purgevfs(rootvnode->v_mount);
 1550 
 1551         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
 1552         error = namei(&nd);
 1553         if (error) {
 1554                 printf("Lookup of /dev for devfs, error: %d\n", error);
 1555                 return;
 1556         }
 1557         NDFREE(&nd, NDF_ONLY_PNBUF);
 1558         vp = nd.ni_vp;
 1559         if (vp->v_type != VDIR) {
 1560                 vput(vp);
 1561         }
 1562         error = vinvalbuf(vp, V_SAVE, td, 0, 0);
 1563         if (error) {
 1564                 vput(vp);
 1565         }
 1566         cache_purge(vp);
 1567         mp->mnt_vnodecovered = vp;
 1568         vp->v_mountedhere = mp;
 1569         mtx_lock(&mountlist_mtx);
 1570         TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 1571         mtx_unlock(&mountlist_mtx);
 1572         VOP_UNLOCK(vp, 0, td);
 1573         vput(dvp);
 1574         vfs_unbusy(mp, td);
 1575 
 1576         /* Unlink the no longer needed /dev/dev -> / symlink */
 1577         kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
 1578 }
 1579 
 1580 /*
 1581  * Report errors during filesystem mounting.
 1582  */
 1583 void
 1584 vfs_mount_error(struct mount *mp, const char *fmt, ...)
 1585 {
 1586         struct vfsoptlist *moptlist = mp->mnt_optnew;
 1587         va_list ap;
 1588         int error, len;
 1589         char *errmsg;
 1590 
 1591         error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len);
 1592         if (error || errmsg == NULL || len <= 0)
 1593                 return;
 1594 
 1595         va_start(ap, fmt);
 1596         vsnprintf(errmsg, (size_t)len, fmt, ap);
 1597         va_end(ap);
 1598 }
 1599 
 1600 /*
 1601  * Find and mount the root filesystem
 1602  */
 1603 void
 1604 vfs_mountroot(void)
 1605 {
 1606         char *cp;
 1607         int error, i, asked = 0;
 1608 
 1609         root_mount_prepare();
 1610 
 1611         mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount),
 1612             NULL, NULL, mount_init, mount_fini,
 1613             UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 1614         devfs_first();
 1615 
 1616         /*
 1617          * We are booted with instructions to prompt for the root filesystem.
 1618          */
 1619         if (boothowto & RB_ASKNAME) {
 1620                 if (!vfs_mountroot_ask())
 1621                         goto mounted;
 1622                 asked = 1;
 1623         }
 1624 
 1625         /*
 1626          * The root filesystem information is compiled in, and we are
 1627          * booted with instructions to use it.
 1628          */
 1629         if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) {
 1630                 if (!vfs_mountroot_try(ctrootdevname))
 1631                         goto mounted;
 1632                 ctrootdevname = NULL;
 1633         }
 1634 
 1635         /*
 1636          * We've been given the generic "use CDROM as root" flag.  This is
 1637          * necessary because one media may be used in many different
 1638          * devices, so we need to search for them.
 1639          */
 1640         if (boothowto & RB_CDROM) {
 1641                 for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
 1642                         if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
 1643                                 goto mounted;
 1644                 }
 1645         }
 1646 
 1647         /*
 1648          * Try to use the value read by the loader from /etc/fstab, or
 1649          * supplied via some other means.  This is the preferred
 1650          * mechanism.
 1651          */
 1652         cp = getenv("vfs.root.mountfrom");
 1653         if (cp != NULL) {
 1654                 error = vfs_mountroot_try(cp);
 1655                 freeenv(cp);
 1656                 if (!error)
 1657                         goto mounted;
 1658         }
 1659 
 1660         /*
 1661          * Try values that may have been computed by code during boot
 1662          */
 1663         if (!vfs_mountroot_try(rootdevnames[0]))
 1664                 goto mounted;
 1665         if (!vfs_mountroot_try(rootdevnames[1]))
 1666                 goto mounted;
 1667 
 1668         /*
 1669          * If we (still) have a compiled-in default, try it.
 1670          */
 1671         if (ctrootdevname != NULL)
 1672                 if (!vfs_mountroot_try(ctrootdevname))
 1673                         goto mounted;
 1674         /*
 1675          * Everything so far has failed, prompt on the console if we haven't
 1676          * already tried that.
 1677          */
 1678         if (!asked)
 1679                 if (!vfs_mountroot_ask())
 1680                         goto mounted;
 1681 
 1682         panic("Root mount failed, startup aborted.");
 1683 
 1684 mounted:
 1685         root_mount_done();
 1686 }
 1687 
 1688 /*
 1689  * Mount (mountfrom) as the root filesystem.
 1690  */
 1691 static int
 1692 vfs_mountroot_try(const char *mountfrom)
 1693 {
 1694         struct mount    *mp;
 1695         char            *vfsname, *path;
 1696         time_t          timebase;
 1697         int             error;
 1698         char            patt[32];
 1699 
 1700         vfsname = NULL;
 1701         path    = NULL;
 1702         mp      = NULL;
 1703         error   = EINVAL;
 1704 
 1705         if (mountfrom == NULL)
 1706                 return (error);         /* don't complain */
 1707         printf("Trying to mount root from %s\n", mountfrom);
 1708 
 1709         /* parse vfs name and path */
 1710         vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
 1711         path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
 1712         vfsname[0] = path[0] = 0;
 1713         sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
 1714         if (sscanf(mountfrom, patt, vfsname, path) < 1)
 1715                 goto out;
 1716 
 1717         if (path[0] == '\0')
 1718                 strcpy(path, ROOTNAME);
 1719 
 1720         error = kernel_vmount(
 1721             MNT_RDONLY | MNT_ROOTFS,
 1722             "fstype", vfsname,
 1723             "fspath", "/",
 1724             "from", path,
 1725             NULL);
 1726         if (error == 0) {
 1727                 /*
 1728                  * We mount devfs prior to mounting the / FS, so the first
 1729                  * entry will typically be devfs.
 1730                  */
 1731                 mp = TAILQ_FIRST(&mountlist);
 1732                 KASSERT(mp != NULL, ("%s: mountlist is empty", __func__));
 1733 
 1734                 /*
 1735                  * Iterate over all currently mounted file systems and use
 1736                  * the time stamp found to check and/or initialize the RTC.
 1737                  * Typically devfs has no time stamp and the only other FS
 1738                  * is the actual / FS.
 1739                  * Call inittodr() only once and pass it the largest of the
 1740                  * timestamps we encounter.
 1741                  */
 1742                 timebase = 0;
 1743                 do {
 1744                         if (mp->mnt_time > timebase)
 1745                                 timebase = mp->mnt_time;
 1746                         mp = TAILQ_NEXT(mp, mnt_list);
 1747                 } while (mp != NULL);
 1748                 inittodr(timebase);
 1749 
 1750                 devfs_fixup(curthread);
 1751         }
 1752 out:
 1753         free(path, M_MOUNT);
 1754         free(vfsname, M_MOUNT);
 1755         return (error);
 1756 }
 1757 
 1758 /*
 1759  * ---------------------------------------------------------------------
 1760  * Interactive root filesystem selection code.
 1761  */
 1762 
 1763 static int
 1764 vfs_mountroot_ask(void)
 1765 {
 1766         char name[128];
 1767 
 1768         for(;;) {
 1769                 printf("\nManual root filesystem specification:\n");
 1770                 printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
 1771 #if defined(__amd64__) || defined(__i386__) || defined(__ia64__)
 1772                 printf("                       eg. ufs:da0s1a\n");
 1773 #else
 1774                 printf("                       eg. ufs:/dev/da0a\n");
 1775 #endif
 1776                 printf("  ?                  List valid disk boot devices\n");
 1777                 printf("  <empty line>       Abort manual input\n");
 1778                 printf("\nmountroot> ");
 1779                 gets(name, sizeof(name), 1);
 1780                 if (name[0] == '\0')
 1781                         return (1);
 1782                 if (name[0] == '?') {
 1783                         printf("\nList of GEOM managed disk devices:\n  ");
 1784                         g_dev_print();
 1785                         continue;
 1786                 }
 1787                 if (!vfs_mountroot_try(name))
 1788                         return (0);
 1789         }
 1790 }
 1791 
 1792 /*
 1793  * ---------------------------------------------------------------------
 1794  * Functions for querying mount options/arguments from filesystems.
 1795  */
 1796 
 1797 /*
 1798  * Check that no unknown options are given
 1799  */
 1800 int
 1801 vfs_filteropt(struct vfsoptlist *opts, const char **legal)
 1802 {
 1803         struct vfsopt *opt;
 1804         char errmsg[255];
 1805         const char **t, *p, *q;
 1806         int ret = 0;
 1807 
 1808         TAILQ_FOREACH(opt, opts, link) {
 1809                 p = opt->name;
 1810                 q = NULL;
 1811                 if (p[0] == 'n' && p[1] == 'o')
 1812                         q = p + 2;
 1813                 for(t = global_opts; *t != NULL; t++) {
 1814                         if (strcmp(*t, p) == 0)
 1815                                 break;
 1816                         if (q != NULL) {
 1817                                 if (strcmp(*t, q) == 0)
 1818                                         break;
 1819                         }
 1820                 }
 1821                 if (*t != NULL)
 1822                         continue;
 1823                 for(t = legal; *t != NULL; t++) {
 1824                         if (strcmp(*t, p) == 0)
 1825                                 break;
 1826                         if (q != NULL) {
 1827                                 if (strcmp(*t, q) == 0)
 1828                                         break;
 1829                         }
 1830                 }
 1831                 if (*t != NULL)
 1832                         continue;
 1833                 snprintf(errmsg, sizeof(errmsg),
 1834                     "mount option <%s> is unknown", p);
 1835                 printf("%s\n", errmsg);
 1836                 ret = EINVAL;
 1837         }
 1838         if (ret != 0) {
 1839                 TAILQ_FOREACH(opt, opts, link) {
 1840                         if (strcmp(opt->name, "errmsg") == 0) {
 1841                                 strncpy((char *)opt->value, errmsg, opt->len);
 1842                         }
 1843                 }
 1844         }
 1845         return (ret);
 1846 }
 1847 
 1848 /*
 1849  * Get a mount option by its name.
 1850  *
 1851  * Return 0 if the option was found, ENOENT otherwise.
 1852  * If len is non-NULL it will be filled with the length
 1853  * of the option. If buf is non-NULL, it will be filled
 1854  * with the address of the option.
 1855  */
 1856 int
 1857 vfs_getopt(opts, name, buf, len)
 1858         struct vfsoptlist *opts;
 1859         const char *name;
 1860         void **buf;
 1861         int *len;
 1862 {
 1863         struct vfsopt *opt;
 1864 
 1865         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 1866 
 1867         TAILQ_FOREACH(opt, opts, link) {
 1868                 if (strcmp(name, opt->name) == 0) {
 1869                         if (len != NULL)
 1870                                 *len = opt->len;
 1871                         if (buf != NULL)
 1872                                 *buf = opt->value;
 1873                         return (0);
 1874                 }
 1875         }
 1876         return (ENOENT);
 1877 }
 1878 
 1879 static int
 1880 vfs_getopt_pos(struct vfsoptlist *opts, const char *name)
 1881 {
 1882         struct vfsopt *opt;
 1883         int i;
 1884 
 1885         if (opts == NULL)
 1886                 return (-1);
 1887 
 1888         i = 0;
 1889         TAILQ_FOREACH(opt, opts, link) {
 1890                 if (strcmp(name, opt->name) == 0)
 1891                         return (i);
 1892                 ++i;
 1893         }
 1894         return (-1);
 1895 }
 1896 
 1897 char *
 1898 vfs_getopts(struct vfsoptlist *opts, const char *name, int *error)
 1899 {
 1900         struct vfsopt *opt;
 1901 
 1902         *error = 0;
 1903         TAILQ_FOREACH(opt, opts, link) {
 1904                 if (strcmp(name, opt->name) != 0)
 1905                         continue;
 1906                 if (((char *)opt->value)[opt->len - 1] != '\0') {
 1907                         *error = EINVAL;
 1908                         return (NULL);
 1909                 }
 1910                 return (opt->value);
 1911         }
 1912         *error = ENOENT;
 1913         return (NULL);
 1914 }
 1915 
 1916 int
 1917 vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val)
 1918 {
 1919         struct vfsopt *opt;
 1920 
 1921         TAILQ_FOREACH(opt, opts, link) {
 1922                 if (strcmp(name, opt->name) == 0) {
 1923                         if (w != NULL)
 1924                                 *w |= val;
 1925                         return (1);
 1926                 }
 1927         }
 1928         if (w != NULL)
 1929                 *w &= ~val;
 1930         return (0);
 1931 }
 1932 
 1933 int
 1934 vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...)
 1935 {
 1936         va_list ap;
 1937         struct vfsopt *opt;
 1938         int ret;
 1939 
 1940         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 1941 
 1942         TAILQ_FOREACH(opt, opts, link) {
 1943                 if (strcmp(name, opt->name) != 0)
 1944                         continue;
 1945                 if (opt->len == 0 || opt->value == NULL)
 1946                         return (0);
 1947                 if (((char *)opt->value)[opt->len - 1] != '\0')
 1948                         return (0);
 1949                 va_start(ap, fmt);
 1950                 ret = vsscanf(opt->value, fmt, ap);
 1951                 va_end(ap);
 1952                 return (ret);
 1953         }
 1954         return (0);
 1955 }
 1956 
 1957 /*
 1958  * Find and copy a mount option.
 1959  *
 1960  * The size of the buffer has to be specified
 1961  * in len, if it is not the same length as the
 1962  * mount option, EINVAL is returned.
 1963  * Returns ENOENT if the option is not found.
 1964  */
 1965 int
 1966 vfs_copyopt(opts, name, dest, len)
 1967         struct vfsoptlist *opts;
 1968         const char *name;
 1969         void *dest;
 1970         int len;
 1971 {
 1972         struct vfsopt *opt;
 1973 
 1974         KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
 1975 
 1976         TAILQ_FOREACH(opt, opts, link) {
 1977                 if (strcmp(name, opt->name) == 0) {
 1978                         if (len != opt->len)
 1979                                 return (EINVAL);
 1980                         bcopy(opt->value, dest, opt->len);
 1981                         return (0);
 1982                 }
 1983         }
 1984         return (ENOENT);
 1985 }
 1986 
 1987 /*
 1988  * This is a helper function for filesystems to traverse their
 1989  * vnodes.  See MNT_VNODE_FOREACH() in sys/mount.h
 1990  */
 1991 
 1992 struct vnode *
 1993 __mnt_vnode_next(struct vnode **mvp, struct mount *mp)
 1994 {
 1995         struct vnode *vp;
 1996 
 1997         mtx_assert(MNT_MTX(mp), MA_OWNED);
 1998 
 1999         KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 2000         vp = TAILQ_NEXT(*mvp, v_nmntvnodes);
 2001         while (vp != NULL && vp->v_type == VMARKER)
 2002                 vp = TAILQ_NEXT(vp, v_nmntvnodes);
 2003 
 2004         /* Check if we are done */
 2005         if (vp == NULL) {
 2006                 __mnt_vnode_markerfree(mvp, mp);
 2007                 return (NULL);
 2008         }
 2009         TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
 2010         TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
 2011         return (vp);
 2012 }
 2013 
 2014 struct vnode *
 2015 __mnt_vnode_first(struct vnode **mvp, struct mount *mp)
 2016 {
 2017         struct vnode *vp;
 2018 
 2019         mtx_assert(MNT_MTX(mp), MA_OWNED);
 2020 
 2021         vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
 2022         while (vp != NULL && vp->v_type == VMARKER)
 2023                 vp = TAILQ_NEXT(vp, v_nmntvnodes);
 2024 
 2025         /* Check if we are done */
 2026         if (vp == NULL) {
 2027                 *mvp = NULL;
 2028                 return (NULL);
 2029         }
 2030         mp->mnt_holdcnt++;
 2031         MNT_IUNLOCK(mp);
 2032         *mvp = (struct vnode *) malloc(sizeof(struct vnode),
 2033                                        M_VNODE_MARKER,
 2034                                        M_WAITOK | M_ZERO);
 2035         MNT_ILOCK(mp);
 2036         (*mvp)->v_type = VMARKER;
 2037 
 2038         vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
 2039         while (vp != NULL && vp->v_type == VMARKER)
 2040                 vp = TAILQ_NEXT(vp, v_nmntvnodes);
 2041 
 2042         /* Check if we are done */
 2043         if (vp == NULL) {
 2044                 MNT_IUNLOCK(mp);
 2045                 free(*mvp, M_VNODE_MARKER);
 2046                 MNT_ILOCK(mp);
 2047                 *mvp = NULL;
 2048                 mp->mnt_holdcnt--;
 2049                 if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0)
 2050                         wakeup(&mp->mnt_holdcnt);
 2051                 return (NULL);
 2052         }
 2053         mp->mnt_markercnt++;
 2054         (*mvp)->v_mount = mp;
 2055         TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
 2056         return (vp);
 2057 }
 2058 
 2059 
 2060 void
 2061 __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp)
 2062 {
 2063 
 2064         if (*mvp == NULL)
 2065                 return;
 2066 
 2067         mtx_assert(MNT_MTX(mp), MA_OWNED);
 2068 
 2069         KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 2070         TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
 2071         MNT_IUNLOCK(mp);
 2072         free(*mvp, M_VNODE_MARKER);
 2073         MNT_ILOCK(mp);
 2074         *mvp = NULL;
 2075 
 2076         mp->mnt_markercnt--;
 2077         mp->mnt_holdcnt--;
 2078         if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0)
 2079                 wakeup(&mp->mnt_holdcnt);
 2080 }
 2081 
 2082 
 2083 int
 2084 __vfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
 2085 {
 2086         int error;
 2087 
 2088         error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat, td);
 2089         if (sbp != &mp->mnt_stat)
 2090                 *sbp = mp->mnt_stat;
 2091         return (error);
 2092 }
 2093 
 2094 void
 2095 vfs_mountedfrom(struct mount *mp, const char *from)
 2096 {
 2097 
 2098         bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname);
 2099         strlcpy(mp->mnt_stat.f_mntfromname, from,
 2100             sizeof mp->mnt_stat.f_mntfromname);
 2101 }
 2102 
 2103 /*
 2104  * ---------------------------------------------------------------------
 2105  * This is the api for building mount args and mounting filesystems from
 2106  * inside the kernel.
 2107  *
 2108  * The API works by accumulation of individual args.  First error is
 2109  * latched.
 2110  *
 2111  * XXX: should be documented in new manpage kernel_mount(9)
 2112  */
 2113 
 2114 /* A memory allocation which must be freed when we are done */
 2115 struct mntaarg {
 2116         SLIST_ENTRY(mntaarg)    next;
 2117 };
 2118 
 2119 /* The header for the mount arguments */
 2120 struct mntarg {
 2121         struct iovec *v;
 2122         int len;
 2123         int error;
 2124         SLIST_HEAD(, mntaarg)   list;
 2125 };
 2126 
 2127 /*
 2128  * Add a boolean argument.
 2129  *
 2130  * flag is the boolean value.
 2131  * name must start with "no".
 2132  */
 2133 struct mntarg *
 2134 mount_argb(struct mntarg *ma, int flag, const char *name)
 2135 {
 2136 
 2137         KASSERT(name[0] == 'n' && name[1] == 'o',
 2138             ("mount_argb(...,%s): name must start with 'no'", name));
 2139 
 2140         return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0));
 2141 }
 2142 
 2143 /*
 2144  * Add an argument printf style
 2145  */
 2146 struct mntarg *
 2147 mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...)
 2148 {
 2149         va_list ap;
 2150         struct mntaarg *maa;
 2151         struct sbuf *sb;
 2152         int len;
 2153 
 2154         if (ma == NULL) {
 2155                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2156                 SLIST_INIT(&ma->list);
 2157         }
 2158         if (ma->error)
 2159                 return (ma);
 2160 
 2161         ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 2162             M_MOUNT, M_WAITOK);
 2163         ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 2164         ma->v[ma->len].iov_len = strlen(name) + 1;
 2165         ma->len++;
 2166 
 2167         sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
 2168         va_start(ap, fmt);
 2169         sbuf_vprintf(sb, fmt, ap);
 2170         va_end(ap);
 2171         sbuf_finish(sb);
 2172         len = sbuf_len(sb) + 1;
 2173         maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 2174         SLIST_INSERT_HEAD(&ma->list, maa, next);
 2175         bcopy(sbuf_data(sb), maa + 1, len);
 2176         sbuf_delete(sb);
 2177 
 2178         ma->v[ma->len].iov_base = maa + 1;
 2179         ma->v[ma->len].iov_len = len;
 2180         ma->len++;
 2181 
 2182         return (ma);
 2183 }
 2184 
 2185 /*
 2186  * Add an argument which is a userland string.
 2187  */
 2188 struct mntarg *
 2189 mount_argsu(struct mntarg *ma, const char *name, const void *val, int len)
 2190 {
 2191         struct mntaarg *maa;
 2192         char *tbuf;
 2193 
 2194         if (val == NULL)
 2195                 return (ma);
 2196         if (ma == NULL) {
 2197                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2198                 SLIST_INIT(&ma->list);
 2199         }
 2200         if (ma->error)
 2201                 return (ma);
 2202         maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 2203         SLIST_INSERT_HEAD(&ma->list, maa, next);
 2204         tbuf = (void *)(maa + 1);
 2205         ma->error = copyinstr(val, tbuf, len, NULL);
 2206         return (mount_arg(ma, name, tbuf, -1));
 2207 }
 2208 
 2209 /*
 2210  * Plain argument.
 2211  *
 2212  * If length is -1, use printf.
 2213  */
 2214 struct mntarg *
 2215 mount_arg(struct mntarg *ma, const char *name, const void *val, int len)
 2216 {
 2217 
 2218         if (ma == NULL) {
 2219                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2220                 SLIST_INIT(&ma->list);
 2221         }
 2222         if (ma->error)
 2223                 return (ma);
 2224 
 2225         ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 2226             M_MOUNT, M_WAITOK);
 2227         ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 2228         ma->v[ma->len].iov_len = strlen(name) + 1;
 2229         ma->len++;
 2230 
 2231         ma->v[ma->len].iov_base = (void *)(uintptr_t)val;
 2232         if (len < 0)
 2233                 ma->v[ma->len].iov_len = strlen(val) + 1;
 2234         else
 2235                 ma->v[ma->len].iov_len = len;
 2236         ma->len++;
 2237         return (ma);
 2238 }
 2239 
 2240 /*
 2241  * Free a mntarg structure
 2242  */
 2243 static void
 2244 free_mntarg(struct mntarg *ma)
 2245 {
 2246         struct mntaarg *maa;
 2247 
 2248         while (!SLIST_EMPTY(&ma->list)) {
 2249                 maa = SLIST_FIRST(&ma->list);
 2250                 SLIST_REMOVE_HEAD(&ma->list, next);
 2251                 free(maa, M_MOUNT);
 2252         }
 2253         free(ma->v, M_MOUNT);
 2254         free(ma, M_MOUNT);
 2255 }
 2256 
 2257 /*
 2258  * Mount a filesystem
 2259  */
 2260 int
 2261 kernel_mount(struct mntarg *ma, int flags)
 2262 {
 2263         struct uio auio;
 2264         int error;
 2265 
 2266         KASSERT(ma != NULL, ("kernel_mount NULL ma"));
 2267         KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v"));
 2268         KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len));
 2269 
 2270         auio.uio_iov = ma->v;
 2271         auio.uio_iovcnt = ma->len;
 2272         auio.uio_segflg = UIO_SYSSPACE;
 2273 
 2274         error = ma->error;
 2275         if (!error)
 2276                 error = vfs_donmount(curthread, flags, &auio);
 2277         free_mntarg(ma);
 2278         return (error);
 2279 }
 2280 
 2281 /*
 2282  * A printflike function to mount a filesystem.
 2283  */
 2284 int
 2285 kernel_vmount(int flags, ...)
 2286 {
 2287         struct mntarg *ma = NULL;
 2288         va_list ap;
 2289         const char *cp;
 2290         const void *vp;
 2291         int error;
 2292 
 2293         va_start(ap, flags);
 2294         for (;;) {
 2295                 cp = va_arg(ap, const char *);
 2296                 if (cp == NULL)
 2297                         break;
 2298                 vp = va_arg(ap, const void *);
 2299                 ma = mount_arg(ma, cp, vp, -1);
 2300         }
 2301         va_end(ap);
 2302 
 2303         error = kernel_mount(ma, flags);
 2304         return (error);
 2305 }

Cache object: e6e5247a3bb3c88731592880dad060b3


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.