vfs_mount.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1999-2004 Poul-Henning Kamp
    3  * Copyright (c) 1999 Michael Smith
    4  * Copyright (c) 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD$");
   39 
   40 #include <sys/param.h>
   41 #include <sys/conf.h>
   42 #include <sys/clock.h>
   43 #include <sys/jail.h>
   44 #include <sys/kernel.h>
   45 #include <sys/libkern.h>
   46 #include <sys/malloc.h>
   47 #include <sys/mount.h>
   48 #include <sys/mutex.h>
   49 #include <sys/namei.h>
   50 #include <sys/priv.h>
   51 #include <sys/proc.h>
   52 #include <sys/filedesc.h>
   53 #include <sys/reboot.h>
   54 #include <sys/syscallsubr.h>
   55 #include <sys/sysproto.h>
   56 #include <sys/sx.h>
   57 #include <sys/sysctl.h>
   58 #include <sys/sysent.h>
   59 #include <sys/systm.h>
   60 #include <sys/vnode.h>
   61 #include <vm/uma.h>
   62 
   63 #include <geom/geom.h>
   64 
   65 #include <machine/stdarg.h>
   66 
   67 #include <security/audit/audit.h>
   68 #include <security/mac/mac_framework.h>
   69 
   70 #include "opt_rootdevname.h"
   71 #include "opt_ddb.h"
   72 #include "opt_mac.h"
   73 
   74 #ifdef DDB
   75 #include <ddb/ddb.h>
   76 #endif
   77 
   78 #define ROOTNAME                "root_device"
   79 #define VFS_MOUNTARG_SIZE_MAX   (1024 * 64)
   80 
   81 static int      vfs_domount(struct thread *td, const char *fstype,
   82                     char *fspath, int fsflags, void *fsdata);
   83 static int      vfs_mountroot_ask(void);
   84 static int      vfs_mountroot_try(const char *mountfrom);
   85 static void     free_mntarg(struct mntarg *ma);
   86 static int      vfs_getopt_pos(struct vfsoptlist *opts, const char *name);
   87 
   88 static int      usermount = 0;
   89 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
   90     "Unprivileged users may mount and unmount file systems");
   91 
   92 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
   93 MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker");
   94 static uma_zone_t mount_zone;
   95 
   96 /* List of mounted filesystems. */
   97 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
   98 
   99 /* For any iteration/modification of mountlist */
  100 struct mtx mountlist_mtx;
  101 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF);
  102 
  103 TAILQ_HEAD(vfsoptlist, vfsopt);
  104 struct vfsopt {
  105         TAILQ_ENTRY(vfsopt) link;
  106         char    *name;
  107         void    *value;
  108         int     len;
  109 };
  110 
  111 /*
  112  * The vnode of the system's root (/ in the filesystem, without chroot
  113  * active.)
  114  */
  115 struct vnode    *rootvnode;
  116 
  117 /*
  118  * The root filesystem is detailed in the kernel environment variable
  119  * vfs.root.mountfrom, which is expected to be in the general format
  120  *
  121  * <vfsname>:[<path>]
  122  * vfsname   := the name of a VFS known to the kernel and capable
  123  *              of being mounted as root
  124  * path      := disk device name or other data used by the filesystem
  125  *              to locate its physical store
  126  */
  127 
  128 /*
  129  * Global opts, taken by all filesystems
  130  */
  131 static const char *global_opts[] = {
  132         "errmsg",
  133         "fstype",
  134         "fspath",
  135         "ro",
  136         "rw",
  137         "nosuid",
  138         "noexec",
  139         NULL
  140 };
  141 
  142 /*
  143  * The root specifiers we will try if RB_CDROM is specified.
  144  */
  145 static char *cdrom_rootdevnames[] = {
  146         "cd9660:cd0",
  147         "cd9660:acd0",
  148         NULL
  149 };
  150 
  151 /* legacy find-root code */
  152 char            *rootdevnames[2] = {NULL, NULL};
  153 #ifndef ROOTDEVNAME
  154 #  define ROOTDEVNAME NULL
  155 #endif
  156 static const char       *ctrootdevname = ROOTDEVNAME;
  157 
  158 /*
  159  * ---------------------------------------------------------------------
  160  * Functions for building and sanitizing the mount options
  161  */
  162 
  163 /* Remove one mount option. */
  164 static void
  165 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
  166 {
  167 
  168         TAILQ_REMOVE(opts, opt, link);
  169         free(opt->name, M_MOUNT);
  170         if (opt->value != NULL)
  171                 free(opt->value, M_MOUNT);
  172 #ifdef INVARIANTS
  173         else if (opt->len != 0)
  174                 panic("%s: mount option with NULL value but length != 0",
  175                     __func__);
  176 #endif
  177         free(opt, M_MOUNT);
  178 }
  179 
  180 /* Release all resources related to the mount options. */
  181 void
  182 vfs_freeopts(struct vfsoptlist *opts)
  183 {
  184         struct vfsopt *opt;
  185 
  186         while (!TAILQ_EMPTY(opts)) {
  187                 opt = TAILQ_FIRST(opts);
  188                 vfs_freeopt(opts, opt);
  189         }
  190         free(opts, M_MOUNT);
  191 }
  192 
  193 void
  194 vfs_deleteopt(struct vfsoptlist *opts, const char *name)
  195 {
  196         struct vfsopt *opt, *temp;
  197 
  198         if (opts == NULL)
  199                 return;
  200         TAILQ_FOREACH_SAFE(opt, opts, link, temp)  {
  201                 if (strcmp(opt->name, name) == 0)
  202                         vfs_freeopt(opts, opt);
  203         }
  204 }
  205 
  206 /*
  207  * Check if options are equal (with or without the "no" prefix).
  208  */
  209 static int
  210 vfs_equalopts(const char *opt1, const char *opt2)
  211 {
  212 
  213         /* "opt" vs. "opt" or "noopt" vs. "noopt" */
  214         if (strcmp(opt1, opt2) == 0)
  215                 return (1);
  216         /* "noopt" vs. "opt" */
  217         if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
  218                 return (1);
  219         /* "opt" vs. "noopt" */
  220         if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
  221                 return (1);
  222         return (0);
  223 }
  224 
  225 /*
  226  * If a mount option is specified several times,
  227  * (with or without the "no" prefix) only keep
  228  * the last occurence of it.
  229  */
  230 static void
  231 vfs_sanitizeopts(struct vfsoptlist *opts)
  232 {
  233         struct vfsopt *opt, *opt2, *tmp;
  234 
  235         TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
  236                 opt2 = TAILQ_PREV(opt, vfsoptlist, link);
  237                 while (opt2 != NULL) {
  238                         if (vfs_equalopts(opt->name, opt2->name)) {
  239                                 tmp = TAILQ_PREV(opt2, vfsoptlist, link);
  240                                 vfs_freeopt(opts, opt2);
  241                                 opt2 = tmp;
  242                         } else {
  243                                 opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
  244                         }
  245                 }
  246         }
  247 }
  248 
  249 /*
  250  * Build a linked list of mount options from a struct uio.
  251  */
  252 static int
  253 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
  254 {
  255         struct vfsoptlist *opts;
  256         struct vfsopt *opt;
  257         size_t memused;
  258         unsigned int i, iovcnt;
  259         int error, namelen, optlen;
  260 
  261         opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
  262         TAILQ_INIT(opts);
  263         memused = 0;
  264         iovcnt = auio->uio_iovcnt;
  265         for (i = 0; i < iovcnt; i += 2) {
  266                 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  267                 namelen = auio->uio_iov[i].iov_len;
  268                 optlen = auio->uio_iov[i + 1].iov_len;
  269                 opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
  270                 opt->value = NULL;
  271                 opt->len = 0;
  272 
  273                 /*
  274                  * Do this early, so jumps to "bad" will free the current
  275                  * option.
  276                  */
  277                 TAILQ_INSERT_TAIL(opts, opt, link);
  278                 memused += sizeof(struct vfsopt) + optlen + namelen;
  279 
  280                 /*
  281                  * Avoid consuming too much memory, and attempts to overflow
  282                  * memused.
  283                  */
  284                 if (memused > VFS_MOUNTARG_SIZE_MAX ||
  285                     optlen > VFS_MOUNTARG_SIZE_MAX ||
  286                     namelen > VFS_MOUNTARG_SIZE_MAX) {
  287                         error = EINVAL;
  288                         goto bad;
  289                 }
  290 
  291                 if (auio->uio_segflg == UIO_SYSSPACE) {
  292                         bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
  293                 } else {
  294                         error = copyin(auio->uio_iov[i].iov_base, opt->name,
  295                             namelen);
  296                         if (error)
  297                                 goto bad;
  298                 }
  299                 /* Ensure names are null-terminated strings. */
  300                 if (opt->name[namelen - 1] != '\0') {
  301                         error = EINVAL;
  302                         goto bad;
  303                 }
  304                 if (optlen != 0) {
  305                         opt->len = optlen;
  306                         opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
  307                         if (auio->uio_segflg == UIO_SYSSPACE) {
  308                                 bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
  309                                     optlen);
  310                         } else {
  311                                 error = copyin(auio->uio_iov[i + 1].iov_base,
  312                                     opt->value, optlen);
  313                                 if (error)
  314                                         goto bad;
  315                         }
  316                 }
  317         }
  318         vfs_sanitizeopts(opts);
  319         *options = opts;
  320         return (0);
  321 bad:
  322         vfs_freeopts(opts);
  323         return (error);
  324 }
  325 
  326 /*
  327  * Merge the old mount options with the new ones passed
  328  * in the MNT_UPDATE case.
  329  */
  330 static void
  331 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
  332 {
  333         struct vfsopt *opt, *opt2, *new;
  334 
  335         TAILQ_FOREACH(opt, opts, link) {
  336                 /*
  337                  * Check that this option hasn't been redefined
  338                  * nor cancelled with a "no" mount option.
  339                  */
  340                 opt2 = TAILQ_FIRST(toopts);
  341                 while (opt2 != NULL) {
  342                         if (strcmp(opt2->name, opt->name) == 0)
  343                                 goto next;
  344                         if (strncmp(opt2->name, "no", 2) == 0 &&
  345                             strcmp(opt2->name + 2, opt->name) == 0) {
  346                                 vfs_freeopt(toopts, opt2);
  347                                 goto next;
  348                         }
  349                         opt2 = TAILQ_NEXT(opt2, link);
  350                 }
  351                 /* We want this option, duplicate it. */
  352                 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  353                 new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
  354                 strcpy(new->name, opt->name);
  355                 if (opt->len != 0) {
  356                         new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
  357                         bcopy(opt->value, new->value, opt->len);
  358                 } else {
  359                         new->value = NULL;
  360                 }
  361                 new->len = opt->len;
  362                 TAILQ_INSERT_TAIL(toopts, new, link);
  363 next:
  364                 continue;
  365         }
  366 }
  367 
  368 /*
  369  * Mount a filesystem.
  370  */
  371 int
  372 nmount(td, uap)
  373         struct thread *td;
  374         struct nmount_args /* {
  375                 struct iovec *iovp;
  376                 unsigned int iovcnt;
  377                 int flags;
  378         } */ *uap;
  379 {
  380         struct uio *auio;
  381         struct iovec *iov;
  382         unsigned int i;
  383         int error;
  384         u_int iovcnt;
  385 
  386         AUDIT_ARG(fflags, uap->flags);
  387 
  388         /*
  389          * Filter out MNT_ROOTFS.  We do not want clients of nmount() in
  390          * userspace to set this flag, but we must filter it out if we want
  391          * MNT_UPDATE on the root file system to work.
  392          * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
  393          */
  394         uap->flags &= ~MNT_ROOTFS;
  395 
  396         iovcnt = uap->iovcnt;
  397         /*
  398          * Check that we have an even number of iovec's
  399          * and that we have at least two options.
  400          */
  401         if ((iovcnt & 1) || (iovcnt < 4))
  402                 return (EINVAL);
  403 
  404         error = copyinuio(uap->iovp, iovcnt, &auio);
  405         if (error)
  406                 return (error);
  407         iov = auio->uio_iov;
  408         for (i = 0; i < iovcnt; i++) {
  409                 if (iov->iov_len > MMAXOPTIONLEN) {
  410                         free(auio, M_IOV);
  411                         return (EINVAL);
  412                 }
  413                 iov++;
  414         }
  415         error = vfs_donmount(td, uap->flags, auio);
  416 
  417         free(auio, M_IOV);
  418         return (error);
  419 }
  420 
  421 /*
  422  * ---------------------------------------------------------------------
  423  * Various utility functions
  424  */
  425 
  426 void
  427 vfs_ref(struct mount *mp)
  428 {
  429 
  430         MNT_ILOCK(mp);
  431         MNT_REF(mp);
  432         MNT_IUNLOCK(mp);
  433 }
  434 
  435 void
  436 vfs_rel(struct mount *mp)
  437 {
  438 
  439         MNT_ILOCK(mp);
  440         MNT_REL(mp);
  441         MNT_IUNLOCK(mp);
  442 }
  443 
  444 static int
  445 mount_init(void *mem, int size, int flags)
  446 {
  447         struct mount *mp;
  448 
  449         mp = (struct mount *)mem;
  450         mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
  451         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
  452         lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
  453         return (0);
  454 }
  455 
  456 static void
  457 mount_fini(void *mem, int size)
  458 {
  459         struct mount *mp;
  460 
  461         mp = (struct mount *)mem;
  462         lockdestroy(&mp->mnt_explock);
  463         lockdestroy(&mp->mnt_lock);
  464         mtx_destroy(&mp->mnt_mtx);
  465 }
  466 
  467 /*
  468  * Allocate and initialize the mount point struct.
  469  */
  470 struct mount *
  471 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp,
  472     const char *fspath, struct thread *td)
  473 {
  474         struct mount *mp;
  475 
  476         mp = uma_zalloc(mount_zone, M_WAITOK);
  477         bzero(&mp->mnt_startzero,
  478             __rangeof(struct mount, mnt_startzero, mnt_endzero));
  479         TAILQ_INIT(&mp->mnt_nvnodelist);
  480         mp->mnt_nvnodelistsize = 0;
  481         mp->mnt_ref = 0;
  482         (void) vfs_busy(mp, LK_NOWAIT, 0, td);
  483         mp->mnt_op = vfsp->vfc_vfsops;
  484         mp->mnt_vfc = vfsp;
  485         vfsp->vfc_refcount++;   /* XXX Unlocked */
  486         mp->mnt_stat.f_type = vfsp->vfc_typenum;
  487         mp->mnt_gen++;
  488         strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
  489         mp->mnt_vnodecovered = vp;
  490         mp->mnt_cred = crdup(td->td_ucred);
  491         mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
  492         strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
  493         mp->mnt_iosize_max = DFLTPHYS;
  494 #ifdef MAC
  495         mac_init_mount(mp);
  496         mac_create_mount(td->td_ucred, mp);
  497 #endif
  498         arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
  499         return (mp);
  500 }
  501 
  502 /*
  503  * Destroy the mount struct previously allocated by vfs_mount_alloc().
  504  */
  505 void
  506 vfs_mount_destroy(struct mount *mp)
  507 {
  508         int i;
  509 
  510         MNT_ILOCK(mp);
  511         mp->mnt_kern_flag |= MNTK_REFEXPIRE;
  512         if (mp->mnt_kern_flag & MNTK_MWAIT) {
  513                 mp->mnt_kern_flag &= ~MNTK_MWAIT;
  514                 wakeup(mp);
  515         }
  516         for (i = 0; mp->mnt_ref && i < 3; i++)
  517                 msleep(mp, MNT_MTX(mp), PVFS, "mntref", hz);
  518         /*
  519          * This will always cause a 3 second delay in rebooting due to
  520          * refs on the root mountpoint that never go away.  Most of these
  521          * are held by init which never exits.
  522          */
  523         if (i == 3 && (!rebooting || bootverbose))
  524                 printf("Mount point %s had %d dangling refs\n",
  525                     mp->mnt_stat.f_mntonname, mp->mnt_ref);
  526         if (mp->mnt_holdcnt != 0) {
  527                 printf("Waiting for mount point to be unheld\n");
  528                 while (mp->mnt_holdcnt != 0) {
  529                         mp->mnt_holdcntwaiters++;
  530                         msleep(&mp->mnt_holdcnt, MNT_MTX(mp),
  531                                PZERO, "mntdestroy", 0);
  532                         mp->mnt_holdcntwaiters--;
  533                 }
  534                 printf("mount point unheld\n");
  535         }
  536         if (mp->mnt_writeopcount > 0) {
  537                 printf("Waiting for mount point write ops\n");
  538                 while (mp->mnt_writeopcount > 0) {
  539                         mp->mnt_kern_flag |= MNTK_SUSPEND;
  540                         msleep(&mp->mnt_writeopcount,
  541                                MNT_MTX(mp),
  542                                PZERO, "mntdestroy2", 0);
  543                 }
  544                 printf("mount point write ops completed\n");
  545         }
  546         if (mp->mnt_secondary_writes > 0) {
  547                 printf("Waiting for mount point secondary write ops\n");
  548                 while (mp->mnt_secondary_writes > 0) {
  549                         mp->mnt_kern_flag |= MNTK_SUSPEND;
  550                         msleep(&mp->mnt_secondary_writes,
  551                                MNT_MTX(mp),
  552                                PZERO, "mntdestroy3", 0);
  553                 }
  554                 printf("mount point secondary write ops completed\n");
  555         }
  556         MNT_IUNLOCK(mp);
  557         mp->mnt_vfc->vfc_refcount--;
  558         if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
  559                 struct vnode *vp;
  560 
  561                 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes)
  562                         vprint("", vp);
  563                 panic("unmount: dangling vnode");
  564         }
  565         MNT_ILOCK(mp);
  566         if (mp->mnt_kern_flag & MNTK_MWAIT)
  567                 wakeup(mp);
  568         if (mp->mnt_writeopcount != 0)
  569                 panic("vfs_mount_destroy: nonzero writeopcount");
  570         if (mp->mnt_secondary_writes != 0)
  571                 panic("vfs_mount_destroy: nonzero secondary_writes");
  572         if (mp->mnt_nvnodelistsize != 0)
  573                 panic("vfs_mount_destroy: nonzero nvnodelistsize");
  574         mp->mnt_writeopcount = -1000;
  575         mp->mnt_nvnodelistsize = -1000;
  576         mp->mnt_secondary_writes = -1000;
  577         MNT_IUNLOCK(mp);
  578 #ifdef MAC
  579         mac_destroy_mount(mp);
  580 #endif
  581         if (mp->mnt_opt != NULL)
  582                 vfs_freeopts(mp->mnt_opt);
  583         crfree(mp->mnt_cred);
  584         uma_zfree(mount_zone, mp);
  585 }
  586 
  587 int
  588 vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions)
  589 {
  590         struct vfsoptlist *optlist;
  591         struct vfsopt *opt, *noro_opt, *tmp_opt;
  592         char *fstype, *fspath, *errmsg;
  593         int error, fstypelen, fspathlen, errmsg_len, errmsg_pos;
  594         int has_rw, has_noro;
  595 
  596         errmsg = NULL;
  597         errmsg_len = 0;
  598         errmsg_pos = -1;
  599         has_rw = 0;
  600         has_noro = 0;
  601 
  602         error = vfs_buildopts(fsoptions, &optlist);
  603         if (error)
  604                 return (error);
  605 
  606         if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0)
  607                 errmsg_pos = vfs_getopt_pos(optlist, "errmsg");
  608 
  609         /*
  610          * We need these two options before the others,
  611          * and they are mandatory for any filesystem.
  612          * Ensure they are NUL terminated as well.
  613          */
  614         fstypelen = 0;
  615         error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
  616         if (error || fstype[fstypelen - 1] != '\0') {
  617                 error = EINVAL;
  618                 if (errmsg != NULL)
  619                         strncpy(errmsg, "Invalid fstype", errmsg_len);
  620                 goto bail;
  621         }
  622         fspathlen = 0;
  623         error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
  624         if (error || fspath[fspathlen - 1] != '\0') {
  625                 error = EINVAL;
  626                 if (errmsg != NULL)
  627                         strncpy(errmsg, "Invalid fspath", errmsg_len);
  628                 goto bail;
  629         }
  630 
  631         /*
  632          * We need to see if we have the "update" option
  633          * before we call vfs_domount(), since vfs_domount() has special
  634          * logic based on MNT_UPDATE.  This is very important
  635          * when we want to update the root filesystem.
  636          */
  637         TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) {
  638                 if (strcmp(opt->name, "update") == 0) {
  639                         fsflags |= MNT_UPDATE;
  640                         vfs_freeopt(optlist, opt);
  641                 }
  642                 else if (strcmp(opt->name, "async") == 0)
  643                         fsflags |= MNT_ASYNC;
  644                 else if (strcmp(opt->name, "force") == 0) {
  645                         fsflags |= MNT_FORCE;
  646                         vfs_freeopt(optlist, opt);
  647                 }
  648                 else if (strcmp(opt->name, "reload") == 0) {
  649                         fsflags |= MNT_RELOAD;
  650                         vfs_freeopt(optlist, opt);
  651                 }
  652                 else if (strcmp(opt->name, "multilabel") == 0)
  653                         fsflags |= MNT_MULTILABEL;
  654                 else if (strcmp(opt->name, "noasync") == 0)
  655                         fsflags &= ~MNT_ASYNC;
  656                 else if (strcmp(opt->name, "noatime") == 0)
  657                         fsflags |= MNT_NOATIME;
  658                 else if (strcmp(opt->name, "atime") == 0) {
  659                         free(opt->name, M_MOUNT);
  660                         opt->name = strdup("nonoatime", M_MOUNT);
  661                 }
  662                 else if (strcmp(opt->name, "noclusterr") == 0)
  663                         fsflags |= MNT_NOCLUSTERR;
  664                 else if (strcmp(opt->name, "clusterr") == 0) {
  665                         free(opt->name, M_MOUNT);
  666                         opt->name = strdup("nonoclusterr", M_MOUNT);
  667                 }
  668                 else if (strcmp(opt->name, "noclusterw") == 0)
  669                         fsflags |= MNT_NOCLUSTERW;
  670                 else if (strcmp(opt->name, "clusterw") == 0) {
  671                         free(opt->name, M_MOUNT);
  672                         opt->name = strdup("nonoclusterw", M_MOUNT);
  673                 }
  674                 else if (strcmp(opt->name, "noexec") == 0)
  675                         fsflags |= MNT_NOEXEC;
  676                 else if (strcmp(opt->name, "exec") == 0) {
  677                         free(opt->name, M_MOUNT);
  678                         opt->name = strdup("nonoexec", M_MOUNT);
  679                 }
  680                 else if (strcmp(opt->name, "nosuid") == 0)
  681                         fsflags |= MNT_NOSUID;
  682                 else if (strcmp(opt->name, "suid") == 0) {
  683                         free(opt->name, M_MOUNT);
  684                         opt->name = strdup("nonosuid", M_MOUNT);
  685                 }
  686                 else if (strcmp(opt->name, "nosymfollow") == 0)
  687                         fsflags |= MNT_NOSYMFOLLOW;
  688                 else if (strcmp(opt->name, "symfollow") == 0) {
  689                         free(opt->name, M_MOUNT);
  690                         opt->name = strdup("nonosymfollow", M_MOUNT);
  691                 }
  692                 else if (strcmp(opt->name, "noro") == 0) {
  693                         fsflags &= ~MNT_RDONLY;
  694                         has_noro = 1;
  695                 }
  696                 else if (strcmp(opt->name, "rw") == 0) {
  697                         fsflags &= ~MNT_RDONLY;
  698                         has_rw = 1;
  699                 }
  700                 else if (strcmp(opt->name, "ro") == 0)
  701                         fsflags |= MNT_RDONLY;
  702                 else if (strcmp(opt->name, "rdonly") == 0) {
  703                         free(opt->name, M_MOUNT);
  704                         opt->name = strdup("ro", M_MOUNT);
  705                         fsflags |= MNT_RDONLY;
  706                 }
  707                 else if (strcmp(opt->name, "suiddir") == 0)
  708                         fsflags |= MNT_SUIDDIR;
  709                 else if (strcmp(opt->name, "sync") == 0)
  710                         fsflags |= MNT_SYNCHRONOUS;
  711                 else if (strcmp(opt->name, "union") == 0)
  712                         fsflags |= MNT_UNION;
  713         }
  714 
  715         /*
  716          * If "rw" was specified as a mount option, and we
  717          * are trying to update a mount-point from "ro" to "rw",
  718          * we need a mount option "noro", since in vfs_mergeopts(),
  719          * "noro" will cancel "ro", but "rw" will not do anything.
  720          */
  721         if (has_rw && !has_noro) {
  722                 noro_opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  723                 noro_opt->name = strdup("noro", M_MOUNT);
  724                 noro_opt->value = NULL;
  725                 noro_opt->len = 0;
  726                 TAILQ_INSERT_TAIL(optlist, noro_opt, link);
  727         }
  728 
  729         /*
  730          * Be ultra-paranoid about making sure the type and fspath
  731          * variables will fit in our mp buffers, including the
  732          * terminating NUL.
  733          */
  734         if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
  735                 error = ENAMETOOLONG;
  736                 goto bail;
  737         }
  738 
  739         mtx_lock(&Giant);
  740         error = vfs_domount(td, fstype, fspath, fsflags, optlist);
  741         mtx_unlock(&Giant);
  742 bail:
  743         /* copyout the errmsg */
  744         if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt)
  745             && errmsg_len > 0 && errmsg != NULL) {
  746                 if (fsoptions->uio_segflg == UIO_SYSSPACE) {
  747                         bcopy(errmsg,
  748                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
  749                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
  750                 } else {
  751                         copyout(errmsg,
  752                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
  753                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
  754                 }
  755         }
  756 
  757         if (error != 0)
  758                 vfs_freeopts(optlist);
  759         return (error);
  760 }
  761 
  762 /*
  763  * Old mount API.
  764  */
  765 #ifndef _SYS_SYSPROTO_H_
  766 struct mount_args {
  767         char    *type;
  768         char    *path;
  769         int     flags;
  770         caddr_t data;
  771 };
  772 #endif
  773 /* ARGSUSED */
  774 int
  775 mount(td, uap)
  776         struct thread *td;
  777         struct mount_args /* {
  778                 char *type;
  779                 char *path;
  780                 int flags;
  781                 caddr_t data;
  782         } */ *uap;
  783 {
  784         char *fstype;
  785         struct vfsconf *vfsp = NULL;
  786         struct mntarg *ma = NULL;
  787         int error;
  788 
  789         AUDIT_ARG(fflags, uap->flags);
  790 
  791         /*
  792          * Filter out MNT_ROOTFS.  We do not want clients of mount() in
  793          * userspace to set this flag, but we must filter it out if we want
  794          * MNT_UPDATE on the root file system to work.
  795          * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
  796          */
  797         uap->flags &= ~MNT_ROOTFS;
  798 
  799         fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
  800         error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
  801         if (error) {
  802                 free(fstype, M_TEMP);
  803                 return (error);
  804         }
  805 
  806         AUDIT_ARG(text, fstype);
  807         mtx_lock(&Giant);
  808         vfsp = vfs_byname_kld(fstype, td, &error);
  809         free(fstype, M_TEMP);
  810         if (vfsp == NULL) {
  811                 mtx_unlock(&Giant);
  812                 return (ENOENT);
  813         }
  814         if (vfsp->vfc_vfsops->vfs_cmount == NULL) {
  815                 mtx_unlock(&Giant);
  816                 return (EOPNOTSUPP);
  817         }
  818 
  819         ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN);
  820         ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
  821         ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro");
  822         ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid");
  823         ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec");
  824 
  825         error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td);
  826         mtx_unlock(&Giant);
  827         return (error);
  828 }
  829 
  830 
  831 /*
  832  * vfs_domount(): actually attempt a filesystem mount.
  833  */
  834 static int
  835 vfs_domount(
  836         struct thread *td,      /* Calling thread. */
  837         const char *fstype,     /* Filesystem type. */
  838         char *fspath,           /* Mount path. */
  839         int fsflags,            /* Flags common to all filesystems. */
  840         void *fsdata            /* Options local to the filesystem. */
  841         )
  842 {
  843         struct vnode *vp;
  844         struct mount *mp;
  845         struct vfsconf *vfsp;
  846         struct export_args export;
  847         int error, flag = 0;
  848         struct vattr va;
  849         struct nameidata nd;
  850 
  851         mtx_assert(&Giant, MA_OWNED);
  852         /*
  853          * Be ultra-paranoid about making sure the type and fspath
  854          * variables will fit in our mp buffers, including the
  855          * terminating NUL.
  856          */
  857         if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
  858                 return (ENAMETOOLONG);
  859 
  860         if (jailed(td->td_ucred) || usermount == 0) {
  861                 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0)
  862                         return (error);
  863         }
  864 
  865         /*
  866          * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
  867          */
  868         if (fsflags & MNT_EXPORTED) {
  869                 error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED);
  870                 if (error)
  871                         return (error);
  872         }
  873         if (fsflags & MNT_SUIDDIR) {
  874                 error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR);
  875                 if (error)
  876                         return (error);
  877         }
  878         /*
  879          * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users.
  880          */
  881         if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) {
  882                 if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0)
  883                         fsflags |= MNT_NOSUID | MNT_USER;
  884         }
  885 
  886         /* Load KLDs before we lock the covered vnode to avoid reversals. */
  887         vfsp = NULL;
  888         if ((fsflags & MNT_UPDATE) == 0) {
  889                 /* Don't try to load KLDs if we're mounting the root. */
  890                 if (fsflags & MNT_ROOTFS)
  891                         vfsp = vfs_byname(fstype);
  892                 else
  893                         vfsp = vfs_byname_kld(fstype, td, &error);
  894                 if (vfsp == NULL)
  895                         return (ENODEV);
  896                 if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL))
  897                         return (EPERM);
  898         }
  899         /*
  900          * Get vnode to be covered
  901          */
  902         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE,
  903             fspath, td);
  904         if ((error = namei(&nd)) != 0)
  905                 return (error);
  906         NDFREE(&nd, NDF_ONLY_PNBUF);
  907         vp = nd.ni_vp;
  908         if (fsflags & MNT_UPDATE) {
  909                 if ((vp->v_vflag & VV_ROOT) == 0) {
  910                         vput(vp);
  911                         return (EINVAL);
  912                 }
  913                 mp = vp->v_mount;
  914                 MNT_ILOCK(mp);
  915                 flag = mp->mnt_flag;
  916                 /*
  917                  * We only allow the filesystem to be reloaded if it
  918                  * is currently mounted read-only.
  919                  */
  920                 if ((fsflags & MNT_RELOAD) &&
  921                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
  922                         MNT_IUNLOCK(mp);
  923                         vput(vp);
  924                         return (EOPNOTSUPP);    /* Needs translation */
  925                 }
  926                 MNT_IUNLOCK(mp);
  927                 /*
  928                  * Only privileged root, or (if MNT_USER is set) the user that
  929                  * did the original mount is permitted to update it.
  930                  */
  931                 error = vfs_suser(mp, td);
  932                 if (error) {
  933                         vput(vp);
  934                         return (error);
  935                 }
  936                 if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
  937                         vput(vp);
  938                         return (EBUSY);
  939                 }
  940                 VI_LOCK(vp);
  941                 if ((vp->v_iflag & VI_MOUNT) != 0 ||
  942                     vp->v_mountedhere != NULL) {
  943                         VI_UNLOCK(vp);
  944                         vfs_unbusy(mp, td);
  945                         vput(vp);
  946                         return (EBUSY);
  947                 }
  948                 vp->v_iflag |= VI_MOUNT;
  949                 VI_UNLOCK(vp);
  950                 MNT_ILOCK(mp);
  951                 mp->mnt_flag |= fsflags &
  952                     (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS);
  953                 MNT_IUNLOCK(mp);
  954                 VOP_UNLOCK(vp, 0, td);
  955                 mp->mnt_optnew = fsdata;
  956                 vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
  957         } else {
  958                 /*
  959                  * If the user is not root, ensure that they own the directory
  960                  * onto which we are attempting to mount.
  961                  */
  962                 error = VOP_GETATTR(vp, &va, td->td_ucred, td);
  963                 if (error) {
  964                         vput(vp);
  965                         return (error);
  966                 }
  967                 if (va.va_uid != td->td_ucred->cr_uid) {
  968                         error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN,
  969                             0);
  970                         if (error) {
  971                                 vput(vp);
  972                                 return (error);
  973                         }
  974                 }
  975                 error = vinvalbuf(vp, V_SAVE, td, 0, 0);
  976                 if (error != 0) {
  977                         vput(vp);
  978                         return (error);
  979                 }
  980                 if (vp->v_type != VDIR) {
  981                         vput(vp);
  982                         return (ENOTDIR);
  983                 }
  984                 VI_LOCK(vp);
  985                 if ((vp->v_iflag & VI_MOUNT) != 0 ||
  986                     vp->v_mountedhere != NULL) {
  987                         VI_UNLOCK(vp);
  988                         vput(vp);
  989                         return (EBUSY);
  990                 }
  991                 vp->v_iflag |= VI_MOUNT;
  992                 VI_UNLOCK(vp);
  993 
  994                 /*
  995                  * Allocate and initialize the filesystem.
  996                  */
  997                 mp = vfs_mount_alloc(vp, vfsp, fspath, td);
  998                 VOP_UNLOCK(vp, 0, td);
  999 
 1000                 /* XXXMAC: pass to vfs_mount_alloc? */
 1001                 mp->mnt_optnew = fsdata;
 1002         }
 1003 
 1004         /*
 1005          * Set the mount level flags.
 1006          */
 1007         MNT_ILOCK(mp);
 1008         mp->mnt_flag = (mp->mnt_flag & ~MNT_UPDATEMASK) |
 1009                 (fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS |
 1010                             MNT_RDONLY));
 1011         if ((mp->mnt_flag & MNT_ASYNC) == 0)
 1012                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1013         MNT_IUNLOCK(mp);
 1014         /*
 1015          * Mount the filesystem.
 1016          * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
 1017          * get.  No freeing of cn_pnbuf.
 1018          */
 1019         error = VFS_MOUNT(mp, td);
 1020 
 1021         /*
 1022          * Process the export option only if we are
 1023          * updating mount options.
 1024          */
 1025         if (!error && (fsflags & MNT_UPDATE)) {
 1026                 if (vfs_copyopt(mp->mnt_optnew, "export", &export,
 1027                     sizeof(export)) == 0)
 1028                         error = vfs_export(mp, &export);
 1029         }
 1030 
 1031         if (!error) {
 1032                 if (mp->mnt_opt != NULL)
 1033                         vfs_freeopts(mp->mnt_opt);
 1034                 mp->mnt_opt = mp->mnt_optnew;
 1035                 (void)VFS_STATFS(mp, &mp->mnt_stat, td);
 1036         }
 1037         /*
 1038          * Prevent external consumers of mount options from reading
 1039          * mnt_optnew.
 1040         */
 1041         mp->mnt_optnew = NULL;
 1042         if (mp->mnt_flag & MNT_UPDATE) {
 1043                 MNT_ILOCK(mp);
 1044                 if (error)
 1045                         mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) |
 1046                                 (flag & ~MNT_QUOTA);
 1047                 else
 1048                         mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD |
 1049                                           MNT_FORCE | MNT_SNAPSHOT);
 1050                 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 1051                         mp->mnt_kern_flag |= MNTK_ASYNC;
 1052                 else
 1053                         mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1054                 MNT_IUNLOCK(mp);
 1055                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
 1056                         if (mp->mnt_syncer == NULL)
 1057                                 error = vfs_allocate_syncvnode(mp);
 1058                 } else {
 1059                         if (mp->mnt_syncer != NULL)
 1060                                 vrele(mp->mnt_syncer);
 1061                         mp->mnt_syncer = NULL;
 1062                 }
 1063                 vfs_unbusy(mp, td);
 1064                 VI_LOCK(vp);
 1065                 vp->v_iflag &= ~VI_MOUNT;
 1066                 VI_UNLOCK(vp);
 1067                 vrele(vp);
 1068                 return (error);
 1069         }
 1070         MNT_ILOCK(mp);
 1071         if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 1072                 mp->mnt_kern_flag |= MNTK_ASYNC;
 1073         else
 1074                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1075         MNT_IUNLOCK(mp);
 1076         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1077         /*
 1078          * Put the new filesystem on the mount list after root.
 1079          */
 1080         cache_purge(vp);
 1081         if (!error) {
 1082                 struct vnode *newdp;
 1083 
 1084                 VI_LOCK(vp);
 1085                 vp->v_iflag &= ~VI_MOUNT;
 1086                 VI_UNLOCK(vp);
 1087                 vp->v_mountedhere = mp;
 1088                 mtx_lock(&mountlist_mtx);
 1089                 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 1090                 mtx_unlock(&mountlist_mtx);
 1091                 vfs_event_signal(NULL, VQ_MOUNT, 0);
 1092                 if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp, td))
 1093                         panic("mount: lost mount");
 1094                 mountcheckdirs(vp, newdp);
 1095                 vput(newdp);
 1096                 VOP_UNLOCK(vp, 0, td);
 1097                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
 1098                         error = vfs_allocate_syncvnode(mp);
 1099                 vfs_unbusy(mp, td);
 1100                 if (error)
 1101                         vrele(vp);
 1102         } else {
 1103                 VI_LOCK(vp);
 1104                 vp->v_iflag &= ~VI_MOUNT;
 1105                 VI_UNLOCK(vp);
 1106                 vfs_unbusy(mp, td);
 1107                 vfs_mount_destroy(mp);
 1108                 vput(vp);
 1109         }
 1110         return (error);
 1111 }
 1112 
 1113 /*
 1114  * Unmount a filesystem.
 1115  *
 1116  * Note: unmount takes a path to the vnode mounted on as argument, not
 1117  * special file (as before).
 1118  */
 1119 #ifndef _SYS_SYSPROTO_H_
 1120 struct unmount_args {
 1121         char    *path;
 1122         int     flags;
 1123 };
 1124 #endif
 1125 /* ARGSUSED */
 1126 int
 1127 unmount(td, uap)
 1128         struct thread *td;
 1129         register struct unmount_args /* {
 1130                 char *path;
 1131                 int flags;
 1132         } */ *uap;
 1133 {
 1134         struct mount *mp;
 1135         char *pathbuf;
 1136         int error, id0, id1;
 1137 
 1138         if (jailed(td->td_ucred) || usermount == 0) {
 1139                 error = priv_check(td, PRIV_VFS_UNMOUNT);
 1140                 if (error)
 1141                         return (error);
 1142         }
 1143 
 1144         pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
 1145         error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL);
 1146         if (error) {
 1147                 free(pathbuf, M_TEMP);
 1148                 return (error);
 1149         }
 1150         AUDIT_ARG(upath, td, pathbuf, ARG_UPATH1);
 1151         mtx_lock(&Giant);
 1152         if (uap->flags & MNT_BYFSID) {
 1153                 /* Decode the filesystem ID. */
 1154                 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
 1155                         mtx_unlock(&Giant);
 1156                         free(pathbuf, M_TEMP);
 1157                         return (EINVAL);
 1158                 }
 1159 
 1160                 mtx_lock(&mountlist_mtx);
 1161                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 1162                         if (mp->mnt_stat.f_fsid.val[0] == id0 &&
 1163                             mp->mnt_stat.f_fsid.val[1] == id1)
 1164                                 break;
 1165                 }
 1166                 mtx_unlock(&mountlist_mtx);
 1167         } else {
 1168                 mtx_lock(&mountlist_mtx);
 1169                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 1170                         if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0)
 1171                                 break;
 1172                 }
 1173                 mtx_unlock(&mountlist_mtx);
 1174         }
 1175         free(pathbuf, M_TEMP);
 1176         if (mp == NULL) {
 1177                 /*
 1178                  * Previously we returned ENOENT for a nonexistent path and
 1179                  * EINVAL for a non-mountpoint.  We cannot tell these apart
 1180                  * now, so in the !MNT_BYFSID case return the more likely
 1181                  * EINVAL for compatibility.
 1182                  */
 1183                 mtx_unlock(&Giant);
 1184                 return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL);
 1185         }
 1186 
 1187         /*
 1188          * Don't allow unmounting the root filesystem.
 1189          */
 1190         if (mp->mnt_flag & MNT_ROOTFS) {
 1191                 mtx_unlock(&Giant);
 1192                 return (EINVAL);
 1193         }
 1194         error = dounmount(mp, uap->flags, td);
 1195         mtx_unlock(&Giant);
 1196         return (error);
 1197 }
 1198 
 1199 /*
 1200  * Do the actual filesystem unmount.
 1201  */
 1202 int
 1203 dounmount(mp, flags, td)
 1204         struct mount *mp;
 1205         int flags;
 1206         struct thread *td;
 1207 {
 1208         struct vnode *coveredvp, *fsrootvp;
 1209         int error;
 1210         int async_flag;
 1211         int mnt_gen_r;
 1212 
 1213         mtx_assert(&Giant, MA_OWNED);
 1214 
 1215         if ((coveredvp = mp->mnt_vnodecovered) != NULL) {
 1216                 mnt_gen_r = mp->mnt_gen;
 1217                 VI_LOCK(coveredvp);
 1218                 vholdl(coveredvp);
 1219                 vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, td);
 1220                 vdrop(coveredvp);
 1221                 /*
 1222                  * Check for mp being unmounted while waiting for the
 1223                  * covered vnode lock.
 1224                  */
 1225                 if (coveredvp->v_mountedhere != mp ||
 1226                     coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) {
 1227                         VOP_UNLOCK(coveredvp, 0, td);
 1228                         return (EBUSY);
 1229                 }
 1230         }
 1231         /*
 1232          * Only privileged root, or (if MNT_USER is set) the user that did the
 1233          * original mount is permitted to unmount this filesystem.
 1234          */
 1235         error = vfs_suser(mp, td);
 1236         if (error) {
 1237                 if (coveredvp)
 1238                         VOP_UNLOCK(coveredvp, 0, td);
 1239                 return (error);
 1240         }
 1241 
 1242         MNT_ILOCK(mp);
 1243         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 1244                 MNT_IUNLOCK(mp);
 1245                 if (coveredvp)
 1246                         VOP_UNLOCK(coveredvp, 0, td);
 1247                 return (EBUSY);
 1248         }
 1249         mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_NOINSMNTQ;
 1250         /* Allow filesystems to detect that a forced unmount is in progress. */
 1251         if (flags & MNT_FORCE)
 1252                 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
 1253         error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
 1254             ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), MNT_MTX(mp), td);
 1255         if (error) {
 1256                 MNT_ILOCK(mp);
 1257                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_NOINSMNTQ |
 1258                     MNTK_UNMOUNTF);
 1259                 if (mp->mnt_kern_flag & MNTK_MWAIT)
 1260                         wakeup(mp);
 1261                 MNT_IUNLOCK(mp);
 1262                 if (coveredvp)
 1263                         VOP_UNLOCK(coveredvp, 0, td);
 1264                 return (error);
 1265         }
 1266         vn_start_write(NULL, &mp, V_WAIT);
 1267 
 1268         if (mp->mnt_flag & MNT_EXPUBLIC)
 1269                 vfs_setpublicfs(NULL, NULL, NULL);
 1270 
 1271         vfs_msync(mp, MNT_WAIT);
 1272         MNT_ILOCK(mp);
 1273         async_flag = mp->mnt_flag & MNT_ASYNC;
 1274         mp->mnt_flag &= ~MNT_ASYNC;
 1275         mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1276         MNT_IUNLOCK(mp);
 1277         cache_purgevfs(mp);     /* remove cache entries for this file sys */
 1278         if (mp->mnt_syncer != NULL)
 1279                 vrele(mp->mnt_syncer);
 1280         /*
 1281          * For forced unmounts, move process cdir/rdir refs on the fs root
 1282          * vnode to the covered vnode.  For non-forced unmounts we want
 1283          * such references to cause an EBUSY error.
 1284          */
 1285         if ((flags & MNT_FORCE) &&
 1286             VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
 1287                 if (mp->mnt_vnodecovered != NULL)
 1288                         mountcheckdirs(fsrootvp, mp->mnt_vnodecovered);
 1289                 if (fsrootvp == rootvnode) {
 1290                         vrele(rootvnode);
 1291                         rootvnode = NULL;
 1292                 }
 1293                 vput(fsrootvp);
 1294         }
 1295         if (((mp->mnt_flag & MNT_RDONLY) ||
 1296              (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) ||
 1297             (flags & MNT_FORCE)) {
 1298                 error = VFS_UNMOUNT(mp, flags, td);
 1299         }
 1300         vn_finished_write(mp);
 1301         /*
 1302          * If we failed to flush the dirty blocks for this mount point,
 1303          * undo all the cdir/rdir and rootvnode changes we made above.
 1304          * Unless we failed to do so because the device is reporting that
 1305          * it doesn't exist anymore.
 1306          */
 1307         if (error && error != ENXIO) {
 1308                 if ((flags & MNT_FORCE) &&
 1309                     VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
 1310                         if (mp->mnt_vnodecovered != NULL)
 1311                                 mountcheckdirs(mp->mnt_vnodecovered, fsrootvp);
 1312                         if (rootvnode == NULL) {
 1313                                 rootvnode = fsrootvp;
 1314                                 vref(rootvnode);
 1315                         }
 1316                         vput(fsrootvp);
 1317                 }
 1318                 MNT_ILOCK(mp);
 1319                 mp->mnt_kern_flag &= ~MNTK_NOINSMNTQ;
 1320                 if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) {
 1321                         MNT_IUNLOCK(mp);
 1322                         (void) vfs_allocate_syncvnode(mp);
 1323                         MNT_ILOCK(mp);
 1324                 }
 1325                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
 1326                 mp->mnt_flag |= async_flag;
 1327                 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 1328                         mp->mnt_kern_flag |= MNTK_ASYNC;
 1329                 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
 1330                 if (mp->mnt_kern_flag & MNTK_MWAIT)
 1331                         wakeup(mp);
 1332                 MNT_IUNLOCK(mp);
 1333                 if (coveredvp)
 1334                         VOP_UNLOCK(coveredvp, 0, td);
 1335                 return (error);
 1336         }
 1337         mtx_lock(&mountlist_mtx);
 1338         TAILQ_REMOVE(&mountlist, mp, mnt_list);
 1339         mtx_unlock(&mountlist_mtx);
 1340         if (coveredvp != NULL) {
 1341                 coveredvp->v_mountedhere = NULL;
 1342                 vput(coveredvp);
 1343         }
 1344         vfs_event_signal(NULL, VQ_UNMOUNT, 0);
 1345         lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
 1346         vfs_mount_destroy(mp);
 1347         return (0);
 1348 }
 1349 
 1350 /*
 1351  * ---------------------------------------------------------------------
 1352  * Mounting of root filesystem
 1353  *
 1354  */
 1355 
 1356 struct root_hold_token {
 1357         const char                      *who;
 1358         LIST_ENTRY(root_hold_token)     list;
 1359 };
 1360 
 1361 static LIST_HEAD(, root_hold_token)     root_holds =
 1362     LIST_HEAD_INITIALIZER(&root_holds);
 1363 
 1364 static int root_mount_complete;
 1365 
 1366 /*
 1367  * Hold root mount.
 1368  */
 1369 struct root_hold_token *
 1370 root_mount_hold(const char *identifier)
 1371 {
 1372         struct root_hold_token *h;
 1373 
 1374         h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
 1375         h->who = identifier;
 1376         mtx_lock(&mountlist_mtx);
 1377         LIST_INSERT_HEAD(&root_holds, h, list);
 1378         mtx_unlock(&mountlist_mtx);
 1379         return (h);
 1380 }
 1381 
 1382 /*
 1383  * Release root mount.
 1384  */
 1385 void
 1386 root_mount_rel(struct root_hold_token *h)
 1387 {
 1388 
 1389         mtx_lock(&mountlist_mtx);
 1390         LIST_REMOVE(h, list);
 1391         wakeup(&root_holds);
 1392         mtx_unlock(&mountlist_mtx);
 1393         free(h, M_DEVBUF);
 1394 }
 1395 
 1396 /*
 1397  * Wait for all subsystems to release root mount.
 1398  */
 1399 static void
 1400 root_mount_prepare(void)
 1401 {
 1402         struct root_hold_token *h;
 1403 
 1404         for (;;) {
 1405                 DROP_GIANT();
 1406                 g_waitidle();
 1407                 PICKUP_GIANT();
 1408                 mtx_lock(&mountlist_mtx);
 1409                 if (LIST_EMPTY(&root_holds)) {
 1410                         mtx_unlock(&mountlist_mtx);
 1411                         break;
 1412                 }
 1413                 printf("Root mount waiting for:");
 1414                 LIST_FOREACH(h, &root_holds, list)
 1415                         printf(" %s", h->who);
 1416                 printf("\n");
 1417                 msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold",
 1418                     hz);
 1419         }
 1420 }
 1421 
 1422 /*
 1423  * Root was mounted, share the good news.
 1424  */
 1425 static void
 1426 root_mount_done(void)
 1427 {
 1428 
 1429         /*
 1430          * Use a mutex to prevent the wakeup being missed and waiting for
 1431          * an extra 1 second sleep.
 1432          */
 1433         mtx_lock(&mountlist_mtx);
 1434         root_mount_complete = 1;
 1435         wakeup(&root_mount_complete);
 1436         mtx_unlock(&mountlist_mtx);
 1437 }
 1438 
 1439 /*
 1440  * Return true if root is already mounted.
 1441  */
 1442 int
 1443 root_mounted(void)
 1444 {
 1445 
 1446         /* No mutex is acquired here because int stores are atomic. */
 1447         return (root_mount_complete);
 1448 }
 1449 
 1450 /*
 1451  * Wait until root is mounted.
 1452  */
 1453 void
 1454 root_mount_wait(void)
 1455 {
 1456 
 1457         /*
 1458          * Panic on an obvious deadlock - the function can't be called from
 1459          * a thread which is doing the whole SYSINIT stuff.
 1460          */
 1461         KASSERT(curthread->td_proc->p_pid != 0,
 1462             ("root_mount_wait: cannot be called from the swapper thread"));
 1463         mtx_lock(&mountlist_mtx);
 1464         while (!root_mount_complete) {
 1465                 msleep(&root_mount_complete, &mountlist_mtx, PZERO, "rootwait",
 1466                     hz);
 1467         }
 1468         mtx_unlock(&mountlist_mtx);
 1469 }
 1470 
 1471 static void
 1472 set_rootvnode(struct thread *td)
 1473 {
 1474         struct proc *p;
 1475 
 1476         if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode, td))
 1477                 panic("Cannot find root vnode");
 1478 
 1479         p = td->td_proc;
 1480         FILEDESC_SLOCK(p->p_fd);
 1481 
 1482         if (p->p_fd->fd_cdir != NULL)
 1483                 vrele(p->p_fd->fd_cdir);
 1484         p->p_fd->fd_cdir = rootvnode;
 1485         VREF(rootvnode);
 1486 
 1487         if (p->p_fd->fd_rdir != NULL)
 1488                 vrele(p->p_fd->fd_rdir);
 1489         p->p_fd->fd_rdir = rootvnode;
 1490         VREF(rootvnode);
 1491 
 1492         FILEDESC_SUNLOCK(p->p_fd);
 1493 
 1494         VOP_UNLOCK(rootvnode, 0, td);
 1495 }
 1496 
 1497 /*
 1498  * Mount /devfs as our root filesystem, but do not put it on the mountlist
 1499  * yet.  Create a /dev -> / symlink so that absolute pathnames will lookup.
 1500  */
 1501 
 1502 static void
 1503 devfs_first(void)
 1504 {
 1505         struct thread *td = curthread;
 1506         struct vfsoptlist *opts;
 1507         struct vfsconf *vfsp;
 1508         struct mount *mp = NULL;
 1509         int error;
 1510 
 1511         vfsp = vfs_byname("devfs");
 1512         KASSERT(vfsp != NULL, ("Could not find devfs by name"));
 1513         if (vfsp == NULL)
 1514                 return;
 1515 
 1516         mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td);
 1517 
 1518         error = VFS_MOUNT(mp, td);
 1519         KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
 1520         if (error)
 1521                 return;
 1522 
 1523         opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
 1524         TAILQ_INIT(opts);
 1525         mp->mnt_opt = opts;
 1526 
 1527         mtx_lock(&mountlist_mtx);
 1528         TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
 1529         mtx_unlock(&mountlist_mtx);
 1530 
 1531         set_rootvnode(td);
 1532 
 1533         error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);
 1534         if (error)
 1535                 printf("kern_symlink /dev -> / returns %d\n", error);
 1536 }
 1537 
 1538 /*
 1539  * Surgically move our devfs to be mounted on /dev.
 1540  */
 1541 
 1542 static void
 1543 devfs_fixup(struct thread *td)
 1544 {
 1545         struct nameidata nd;
 1546         int error;
 1547         struct vnode *vp, *dvp;
 1548         struct mount *mp;
 1549 
 1550         /* Remove our devfs mount from the mountlist and purge the cache */
 1551         mtx_lock(&mountlist_mtx);
 1552         mp = TAILQ_FIRST(&mountlist);
 1553         TAILQ_REMOVE(&mountlist, mp, mnt_list);
 1554         mtx_unlock(&mountlist_mtx);
 1555         cache_purgevfs(mp);
 1556 
 1557         VFS_ROOT(mp, LK_EXCLUSIVE, &dvp, td);
 1558         VI_LOCK(dvp);
 1559         dvp->v_iflag &= ~VI_MOUNT;
 1560         VI_UNLOCK(dvp);
 1561         dvp->v_mountedhere = NULL;
 1562 
 1563         /* Set up the real rootvnode, and purge the cache */
 1564         TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL;
 1565         set_rootvnode(td);
 1566         cache_purgevfs(rootvnode->v_mount);
 1567 
 1568         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
 1569         error = namei(&nd);
 1570         if (error) {
 1571                 printf("Lookup of /dev for devfs, error: %d\n", error);
 1572                 return;
 1573         }
 1574         NDFREE(&nd, NDF_ONLY_PNBUF);
 1575         vp = nd.ni_vp;
 1576         if (vp->v_type != VDIR) {
 1577                 vput(vp);
 1578         }
 1579         error = vinvalbuf(vp, V_SAVE, td, 0, 0);
 1580         if (error) {
 1581                 vput(vp);
 1582         }
 1583         cache_purge(vp);
 1584         mp->mnt_vnodecovered = vp;
 1585         vp->v_mountedhere = mp;
 1586         mtx_lock(&mountlist_mtx);
 1587         TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 1588         mtx_unlock(&mountlist_mtx);
 1589         VOP_UNLOCK(vp, 0, td);
 1590         vput(dvp);
 1591         vfs_unbusy(mp, td);
 1592 
 1593         /* Unlink the no longer needed /dev/dev -> / symlink */
 1594         kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
 1595 }
 1596 
 1597 /*
 1598  * Report errors during filesystem mounting.
 1599  */
 1600 void
 1601 vfs_mount_error(struct mount *mp, const char *fmt, ...)
 1602 {
 1603         struct vfsoptlist *moptlist = mp->mnt_optnew;
 1604         va_list ap;
 1605         int error, len;
 1606         char *errmsg;
 1607 
 1608         error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len);
 1609         if (error || errmsg == NULL || len <= 0)
 1610                 return;
 1611 
 1612         va_start(ap, fmt);
 1613         vsnprintf(errmsg, (size_t)len, fmt, ap);
 1614         va_end(ap);
 1615 }
 1616 
 1617 /*
 1618  * Find and mount the root filesystem
 1619  */
 1620 void
 1621 vfs_mountroot(void)
 1622 {
 1623         char *cp;
 1624         int error, i, asked = 0;
 1625 
 1626         root_mount_prepare();
 1627 
 1628         mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount),
 1629             NULL, NULL, mount_init, mount_fini,
 1630             UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 1631         devfs_first();
 1632 
 1633         /*
 1634          * We are booted with instructions to prompt for the root filesystem.
 1635          */
 1636         if (boothowto & RB_ASKNAME) {
 1637                 if (!vfs_mountroot_ask())
 1638                         goto mounted;
 1639                 asked = 1;
 1640         }
 1641 
 1642         /*
 1643          * The root filesystem information is compiled in, and we are
 1644          * booted with instructions to use it.
 1645          */
 1646         if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) {
 1647                 if (!vfs_mountroot_try(ctrootdevname))
 1648                         goto mounted;
 1649                 ctrootdevname = NULL;
 1650         }
 1651 
 1652         /*
 1653          * We've been given the generic "use CDROM as root" flag.  This is
 1654          * necessary because one media may be used in many different
 1655          * devices, so we need to search for them.
 1656          */
 1657         if (boothowto & RB_CDROM) {
 1658                 for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
 1659                         if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
 1660                                 goto mounted;
 1661                 }
 1662         }
 1663 
 1664         /*
 1665          * Try to use the value read by the loader from /etc/fstab, or
 1666          * supplied via some other means.  This is the preferred
 1667          * mechanism.
 1668          */
 1669         cp = getenv("vfs.root.mountfrom");
 1670         if (cp != NULL) {
 1671                 error = vfs_mountroot_try(cp);
 1672                 freeenv(cp);
 1673                 if (!error)
 1674                         goto mounted;
 1675         }
 1676 
 1677         /*
 1678          * Try values that may have been computed by code during boot
 1679          */
 1680         if (!vfs_mountroot_try(rootdevnames[0]))
 1681                 goto mounted;
 1682         if (!vfs_mountroot_try(rootdevnames[1]))
 1683                 goto mounted;
 1684 
 1685         /*
 1686          * If we (still) have a compiled-in default, try it.
 1687          */
 1688         if (ctrootdevname != NULL)
 1689                 if (!vfs_mountroot_try(ctrootdevname))
 1690                         goto mounted;
 1691         /*
 1692          * Everything so far has failed, prompt on the console if we haven't
 1693          * already tried that.
 1694          */
 1695         if (!asked)
 1696                 if (!vfs_mountroot_ask())
 1697                         goto mounted;
 1698 
 1699         panic("Root mount failed, startup aborted.");
 1700 
 1701 mounted:
 1702         root_mount_done();
 1703 }
 1704 
 1705 /*
 1706  * Mount (mountfrom) as the root filesystem.
 1707  */
 1708 static int
 1709 vfs_mountroot_try(const char *mountfrom)
 1710 {
 1711         struct mount    *mp;
 1712         char            *vfsname, *path;
 1713         time_t          timebase;
 1714         int             error;
 1715         char            patt[32];
 1716 
 1717         vfsname = NULL;
 1718         path    = NULL;
 1719         mp      = NULL;
 1720         error   = EINVAL;
 1721 
 1722         if (mountfrom == NULL)
 1723                 return (error);         /* don't complain */
 1724         printf("Trying to mount root from %s\n", mountfrom);
 1725 
 1726         /* parse vfs name and path */
 1727         vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
 1728         path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
 1729         vfsname[0] = path[0] = 0;
 1730         sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
 1731         if (sscanf(mountfrom, patt, vfsname, path) < 1)
 1732                 goto out;
 1733 
 1734         if (path[0] == '\0')
 1735                 strcpy(path, ROOTNAME);
 1736 
 1737         error = kernel_vmount(
 1738             MNT_RDONLY | MNT_ROOTFS,
 1739             "fstype", vfsname,
 1740             "fspath", "/",
 1741             "from", path,
 1742             NULL);
 1743         if (error == 0) {
 1744                 /*
 1745                  * We mount devfs prior to mounting the / FS, so the first
 1746                  * entry will typically be devfs.
 1747                  */
 1748                 mp = TAILQ_FIRST(&mountlist);
 1749                 KASSERT(mp != NULL, ("%s: mountlist is empty", __func__));
 1750 
 1751                 /*
 1752                  * Iterate over all currently mounted file systems and use
 1753                  * the time stamp found to check and/or initialize the RTC.
 1754                  * Typically devfs has no time stamp and the only other FS
 1755                  * is the actual / FS.
 1756                  * Call inittodr() only once and pass it the largest of the
 1757                  * timestamps we encounter.
 1758                  */
 1759                 timebase = 0;
 1760                 do {
 1761                         if (mp->mnt_time > timebase)
 1762                                 timebase = mp->mnt_time;
 1763                         mp = TAILQ_NEXT(mp, mnt_list);
 1764                 } while (mp != NULL);
 1765                 inittodr(timebase);
 1766 
 1767                 devfs_fixup(curthread);
 1768         }
 1769 out:
 1770         free(path, M_MOUNT);
 1771         free(vfsname, M_MOUNT);
 1772         return (error);
 1773 }
 1774 
 1775 /*
 1776  * ---------------------------------------------------------------------
 1777  * Interactive root filesystem selection code.
 1778  */
 1779 
 1780 static int
 1781 vfs_mountroot_ask(void)
 1782 {
 1783         char name[128];
 1784 
 1785         for(;;) {
 1786                 printf("\nManual root filesystem specification:\n");
 1787                 printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
 1788 #if defined(__amd64__) || defined(__i386__) || defined(__ia64__)
 1789                 printf("                       eg. ufs:da0s1a\n");
 1790 #else
 1791                 printf("                       eg. ufs:/dev/da0a\n");
 1792 #endif
 1793                 printf("  ?                  List valid disk boot devices\n");
 1794                 printf("  <empty line>       Abort manual input\n");
 1795                 printf("\nmountroot> ");
 1796                 gets(name, sizeof(name), 1);
 1797                 if (name[0] == '\0')
 1798                         return (1);
 1799                 if (name[0] == '?') {
 1800                         printf("\nList of GEOM managed disk devices:\n  ");
 1801                         g_dev_print();
 1802                         continue;
 1803                 }
 1804                 if (!vfs_mountroot_try(name))
 1805                         return (0);
 1806         }
 1807 }
 1808 
 1809 /*
 1810  * ---------------------------------------------------------------------
 1811  * Functions for querying mount options/arguments from filesystems.
 1812  */
 1813 
 1814 /*
 1815  * Check that no unknown options are given
 1816  */
 1817 int
 1818 vfs_filteropt(struct vfsoptlist *opts, const char **legal)
 1819 {
 1820         struct vfsopt *opt;
 1821         char errmsg[255];
 1822         const char **t, *p, *q;
 1823         int ret = 0;
 1824 
 1825         TAILQ_FOREACH(opt, opts, link) {
 1826                 p = opt->name;
 1827                 q = NULL;
 1828                 if (p[0] == 'n' && p[1] == 'o')
 1829                         q = p + 2;
 1830                 for(t = global_opts; *t != NULL; t++) {
 1831                         if (strcmp(*t, p) == 0)
 1832                                 break;
 1833                         if (q != NULL) {
 1834                                 if (strcmp(*t, q) == 0)
 1835                                         break;
 1836                         }
 1837                 }
 1838                 if (*t != NULL)
 1839                         continue;
 1840                 for(t = legal; *t != NULL; t++) {
 1841                         if (strcmp(*t, p) == 0)
 1842                                 break;
 1843                         if (q != NULL) {
 1844                                 if (strcmp(*t, q) == 0)
 1845                                         break;
 1846                         }
 1847                 }
 1848                 if (*t != NULL)
 1849                         continue;
 1850                 snprintf(errmsg, sizeof(errmsg),
 1851                     "mount option <%s> is unknown", p);
 1852                 printf("%s\n", errmsg);
 1853                 ret = EINVAL;
 1854         }
 1855         if (ret != 0) {
 1856                 TAILQ_FOREACH(opt, opts, link) {
 1857                         if (strcmp(opt->name, "errmsg") == 0) {
 1858                                 strncpy((char *)opt->value, errmsg, opt->len);
 1859                         }
 1860                 }
 1861         }
 1862         return (ret);
 1863 }
 1864 
 1865 /*
 1866  * Get a mount option by its name.
 1867  *
 1868  * Return 0 if the option was found, ENOENT otherwise.
 1869  * If len is non-NULL it will be filled with the length
 1870  * of the option. If buf is non-NULL, it will be filled
 1871  * with the address of the option.
 1872  */
 1873 int
 1874 vfs_getopt(opts, name, buf, len)
 1875         struct vfsoptlist *opts;
 1876         const char *name;
 1877         void **buf;
 1878         int *len;
 1879 {
 1880         struct vfsopt *opt;
 1881 
 1882         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 1883 
 1884         TAILQ_FOREACH(opt, opts, link) {
 1885                 if (strcmp(name, opt->name) == 0) {
 1886                         if (len != NULL)
 1887                                 *len = opt->len;
 1888                         if (buf != NULL)
 1889                                 *buf = opt->value;
 1890                         return (0);
 1891                 }
 1892         }
 1893         return (ENOENT);
 1894 }
 1895 
 1896 static int
 1897 vfs_getopt_pos(struct vfsoptlist *opts, const char *name)
 1898 {
 1899         struct vfsopt *opt;
 1900         int i;
 1901 
 1902         if (opts == NULL)
 1903                 return (-1);
 1904 
 1905         i = 0;
 1906         TAILQ_FOREACH(opt, opts, link) {
 1907                 if (strcmp(name, opt->name) == 0)
 1908                         return (i);
 1909                 ++i;
 1910         }
 1911         return (-1);
 1912 }
 1913 
 1914 char *
 1915 vfs_getopts(struct vfsoptlist *opts, const char *name, int *error)
 1916 {
 1917         struct vfsopt *opt;
 1918 
 1919         *error = 0;
 1920         TAILQ_FOREACH(opt, opts, link) {
 1921                 if (strcmp(name, opt->name) != 0)
 1922                         continue;
 1923                 if (((char *)opt->value)[opt->len - 1] != '\0') {
 1924                         *error = EINVAL;
 1925                         return (NULL);
 1926                 }
 1927                 return (opt->value);
 1928         }
 1929         *error = ENOENT;
 1930         return (NULL);
 1931 }
 1932 
 1933 int
 1934 vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val)
 1935 {
 1936         struct vfsopt *opt;
 1937 
 1938         TAILQ_FOREACH(opt, opts, link) {
 1939                 if (strcmp(name, opt->name) == 0) {
 1940                         if (w != NULL)
 1941                                 *w |= val;
 1942                         return (1);
 1943                 }
 1944         }
 1945         if (w != NULL)
 1946                 *w &= ~val;
 1947         return (0);
 1948 }
 1949 
 1950 int
 1951 vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...)
 1952 {
 1953         va_list ap;
 1954         struct vfsopt *opt;
 1955         int ret;
 1956 
 1957         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 1958 
 1959         TAILQ_FOREACH(opt, opts, link) {
 1960                 if (strcmp(name, opt->name) != 0)
 1961                         continue;
 1962                 if (opt->len == 0 || opt->value == NULL)
 1963                         return (0);
 1964                 if (((char *)opt->value)[opt->len - 1] != '\0')
 1965                         return (0);
 1966                 va_start(ap, fmt);
 1967                 ret = vsscanf(opt->value, fmt, ap);
 1968                 va_end(ap);
 1969                 return (ret);
 1970         }
 1971         return (0);
 1972 }
 1973 
 1974 /*
 1975  * Find and copy a mount option.
 1976  *
 1977  * The size of the buffer has to be specified
 1978  * in len, if it is not the same length as the
 1979  * mount option, EINVAL is returned.
 1980  * Returns ENOENT if the option is not found.
 1981  */
 1982 int
 1983 vfs_copyopt(opts, name, dest, len)
 1984         struct vfsoptlist *opts;
 1985         const char *name;
 1986         void *dest;
 1987         int len;
 1988 {
 1989         struct vfsopt *opt;
 1990 
 1991         KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
 1992 
 1993         TAILQ_FOREACH(opt, opts, link) {
 1994                 if (strcmp(name, opt->name) == 0) {
 1995                         if (len != opt->len)
 1996                                 return (EINVAL);
 1997                         bcopy(opt->value, dest, opt->len);
 1998                         return (0);
 1999                 }
 2000         }
 2001         return (ENOENT);
 2002 }
 2003 
 2004 /*
 2005  * This is a helper function for filesystems to traverse their
 2006  * vnodes.  See MNT_VNODE_FOREACH() in sys/mount.h
 2007  */
 2008 
 2009 struct vnode *
 2010 __mnt_vnode_next(struct vnode **mvp, struct mount *mp)
 2011 {
 2012         struct vnode *vp;
 2013 
 2014         mtx_assert(MNT_MTX(mp), MA_OWNED);
 2015 
 2016         KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 2017         if ((*mvp)->v_yield++ == 500) {
 2018                 MNT_IUNLOCK(mp);
 2019                 (*mvp)->v_yield = 0;
 2020                 uio_yield();
 2021                 MNT_ILOCK(mp);
 2022         }
 2023         vp = TAILQ_NEXT(*mvp, v_nmntvnodes);
 2024         while (vp != NULL && vp->v_type == VMARKER)
 2025                 vp = TAILQ_NEXT(vp, v_nmntvnodes);
 2026 
 2027         /* Check if we are done */
 2028         if (vp == NULL) {
 2029                 __mnt_vnode_markerfree(mvp, mp);
 2030                 return (NULL);
 2031         }
 2032         TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
 2033         TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
 2034         return (vp);
 2035 }
 2036 
 2037 struct vnode *
 2038 __mnt_vnode_first(struct vnode **mvp, struct mount *mp)
 2039 {
 2040         struct vnode *vp;
 2041 
 2042         mtx_assert(MNT_MTX(mp), MA_OWNED);
 2043 
 2044         vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
 2045         while (vp != NULL && vp->v_type == VMARKER)
 2046                 vp = TAILQ_NEXT(vp, v_nmntvnodes);
 2047 
 2048         /* Check if we are done */
 2049         if (vp == NULL) {
 2050                 *mvp = NULL;
 2051                 return (NULL);
 2052         }
 2053         mp->mnt_holdcnt++;
 2054         MNT_IUNLOCK(mp);
 2055         *mvp = (struct vnode *) malloc(sizeof(struct vnode),
 2056                                        M_VNODE_MARKER,
 2057                                        M_WAITOK | M_ZERO);
 2058         MNT_ILOCK(mp);
 2059         (*mvp)->v_type = VMARKER;
 2060 
 2061         vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
 2062         while (vp != NULL && vp->v_type == VMARKER)
 2063                 vp = TAILQ_NEXT(vp, v_nmntvnodes);
 2064 
 2065         /* Check if we are done */
 2066         if (vp == NULL) {
 2067                 MNT_IUNLOCK(mp);
 2068                 free(*mvp, M_VNODE_MARKER);
 2069                 MNT_ILOCK(mp);
 2070                 *mvp = NULL;
 2071                 mp->mnt_holdcnt--;
 2072                 if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0)
 2073                         wakeup(&mp->mnt_holdcnt);
 2074                 return (NULL);
 2075         }
 2076         mp->mnt_markercnt++;
 2077         (*mvp)->v_mount = mp;
 2078         TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
 2079         return (vp);
 2080 }
 2081 
 2082 
 2083 void
 2084 __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp)
 2085 {
 2086 
 2087         if (*mvp == NULL)
 2088                 return;
 2089 
 2090         mtx_assert(MNT_MTX(mp), MA_OWNED);
 2091 
 2092         KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 2093         TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
 2094         MNT_IUNLOCK(mp);
 2095         free(*mvp, M_VNODE_MARKER);
 2096         MNT_ILOCK(mp);
 2097         *mvp = NULL;
 2098 
 2099         mp->mnt_markercnt--;
 2100         mp->mnt_holdcnt--;
 2101         if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0)
 2102                 wakeup(&mp->mnt_holdcnt);
 2103 }
 2104 
 2105 
 2106 int
 2107 __vfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
 2108 {
 2109         int error;
 2110 
 2111         error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat, td);
 2112         if (sbp != &mp->mnt_stat)
 2113                 *sbp = mp->mnt_stat;
 2114         return (error);
 2115 }
 2116 
 2117 void
 2118 vfs_mountedfrom(struct mount *mp, const char *from)
 2119 {
 2120 
 2121         bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname);
 2122         strlcpy(mp->mnt_stat.f_mntfromname, from,
 2123             sizeof mp->mnt_stat.f_mntfromname);
 2124 }
 2125 
 2126 /*
 2127  * ---------------------------------------------------------------------
 2128  * This is the api for building mount args and mounting filesystems from
 2129  * inside the kernel.
 2130  *
 2131  * The API works by accumulation of individual args.  First error is
 2132  * latched.
 2133  *
 2134  * XXX: should be documented in new manpage kernel_mount(9)
 2135  */
 2136 
 2137 /* A memory allocation which must be freed when we are done */
 2138 struct mntaarg {
 2139         SLIST_ENTRY(mntaarg)    next;
 2140 };
 2141 
 2142 /* The header for the mount arguments */
 2143 struct mntarg {
 2144         struct iovec *v;
 2145         int len;
 2146         int error;
 2147         SLIST_HEAD(, mntaarg)   list;
 2148 };
 2149 
 2150 /*
 2151  * Add a boolean argument.
 2152  *
 2153  * flag is the boolean value.
 2154  * name must start with "no".
 2155  */
 2156 struct mntarg *
 2157 mount_argb(struct mntarg *ma, int flag, const char *name)
 2158 {
 2159 
 2160         KASSERT(name[0] == 'n' && name[1] == 'o',
 2161             ("mount_argb(...,%s): name must start with 'no'", name));
 2162 
 2163         return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0));
 2164 }
 2165 
 2166 /*
 2167  * Add an argument printf style
 2168  */
 2169 struct mntarg *
 2170 mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...)
 2171 {
 2172         va_list ap;
 2173         struct mntaarg *maa;
 2174         struct sbuf *sb;
 2175         int len;
 2176 
 2177         if (ma == NULL) {
 2178                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2179                 SLIST_INIT(&ma->list);
 2180         }
 2181         if (ma->error)
 2182                 return (ma);
 2183 
 2184         ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 2185             M_MOUNT, M_WAITOK);
 2186         ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 2187         ma->v[ma->len].iov_len = strlen(name) + 1;
 2188         ma->len++;
 2189 
 2190         sb = sbuf_new_auto();
 2191         va_start(ap, fmt);
 2192         sbuf_vprintf(sb, fmt, ap);
 2193         va_end(ap);
 2194         sbuf_finish(sb);
 2195         len = sbuf_len(sb) + 1;
 2196         maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 2197         SLIST_INSERT_HEAD(&ma->list, maa, next);
 2198         bcopy(sbuf_data(sb), maa + 1, len);
 2199         sbuf_delete(sb);
 2200 
 2201         ma->v[ma->len].iov_base = maa + 1;
 2202         ma->v[ma->len].iov_len = len;
 2203         ma->len++;
 2204 
 2205         return (ma);
 2206 }
 2207 
 2208 /*
 2209  * Add an argument which is a userland string.
 2210  */
 2211 struct mntarg *
 2212 mount_argsu(struct mntarg *ma, const char *name, const void *val, int len)
 2213 {
 2214         struct mntaarg *maa;
 2215         char *tbuf;
 2216 
 2217         if (val == NULL)
 2218                 return (ma);
 2219         if (ma == NULL) {
 2220                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2221                 SLIST_INIT(&ma->list);
 2222         }
 2223         if (ma->error)
 2224                 return (ma);
 2225         maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 2226         SLIST_INSERT_HEAD(&ma->list, maa, next);
 2227         tbuf = (void *)(maa + 1);
 2228         ma->error = copyinstr(val, tbuf, len, NULL);
 2229         return (mount_arg(ma, name, tbuf, -1));
 2230 }
 2231 
 2232 /*
 2233  * Plain argument.
 2234  *
 2235  * If length is -1, use printf.
 2236  */
 2237 struct mntarg *
 2238 mount_arg(struct mntarg *ma, const char *name, const void *val, int len)
 2239 {
 2240 
 2241         if (ma == NULL) {
 2242                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2243                 SLIST_INIT(&ma->list);
 2244         }
 2245         if (ma->error)
 2246                 return (ma);
 2247 
 2248         ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 2249             M_MOUNT, M_WAITOK);
 2250         ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 2251         ma->v[ma->len].iov_len = strlen(name) + 1;
 2252         ma->len++;
 2253 
 2254         ma->v[ma->len].iov_base = (void *)(uintptr_t)val;
 2255         if (len < 0)
 2256                 ma->v[ma->len].iov_len = strlen(val) + 1;
 2257         else
 2258                 ma->v[ma->len].iov_len = len;
 2259         ma->len++;
 2260         return (ma);
 2261 }
 2262 
 2263 /*
 2264  * Free a mntarg structure
 2265  */
 2266 static void
 2267 free_mntarg(struct mntarg *ma)
 2268 {
 2269         struct mntaarg *maa;
 2270 
 2271         while (!SLIST_EMPTY(&ma->list)) {
 2272                 maa = SLIST_FIRST(&ma->list);
 2273                 SLIST_REMOVE_HEAD(&ma->list, next);
 2274                 free(maa, M_MOUNT);
 2275         }
 2276         free(ma->v, M_MOUNT);
 2277         free(ma, M_MOUNT);
 2278 }
 2279 
 2280 /*
 2281  * Mount a filesystem
 2282  */
 2283 int
 2284 kernel_mount(struct mntarg *ma, int flags)
 2285 {
 2286         struct uio auio;
 2287         int error;
 2288 
 2289         KASSERT(ma != NULL, ("kernel_mount NULL ma"));
 2290         KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v"));
 2291         KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len));
 2292 
 2293         auio.uio_iov = ma->v;
 2294         auio.uio_iovcnt = ma->len;
 2295         auio.uio_segflg = UIO_SYSSPACE;
 2296 
 2297         error = ma->error;
 2298         if (!error)
 2299                 error = vfs_donmount(curthread, flags, &auio);
 2300         free_mntarg(ma);
 2301         return (error);
 2302 }
 2303 
 2304 /*
 2305  * A printflike function to mount a filesystem.
 2306  */
 2307 int
 2308 kernel_vmount(int flags, ...)
 2309 {
 2310         struct mntarg *ma = NULL;
 2311         va_list ap;
 2312         const char *cp;
 2313         const void *vp;
 2314         int error;
 2315 
 2316         va_start(ap, flags);
 2317         for (;;) {
 2318                 cp = va_arg(ap, const char *);
 2319                 if (cp == NULL)
 2320                         break;
 2321                 vp = va_arg(ap, const void *);
 2322                 ma = mount_arg(ma, cp, vp, -1);
 2323         }
 2324         va_end(ap);
 2325 
 2326         error = kernel_mount(ma, flags);
 2327         return (error);
 2328 }
Cache object: 2586c47ee8d6a65956f7f30a6f473d10
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_mount.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_mount.c