The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_mount.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1999-2004 Poul-Henning Kamp
    3  * Copyright (c) 1999 Michael Smith
    4  * Copyright (c) 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD$");
   39 
   40 #include <sys/param.h>
   41 #include <sys/conf.h>
   42 #include <sys/clock.h>
   43 #include <sys/jail.h>
   44 #include <sys/kernel.h>
   45 #include <sys/libkern.h>
   46 #include <sys/malloc.h>
   47 #include <sys/mount.h>
   48 #include <sys/mutex.h>
   49 #include <sys/namei.h>
   50 #include <sys/priv.h>
   51 #include <sys/proc.h>
   52 #include <sys/filedesc.h>
   53 #include <sys/reboot.h>
   54 #include <sys/syscallsubr.h>
   55 #include <sys/sysproto.h>
   56 #include <sys/sx.h>
   57 #include <sys/sysctl.h>
   58 #include <sys/sysent.h>
   59 #include <sys/systm.h>
   60 #include <sys/vnode.h>
   61 #include <vm/uma.h>
   62 
   63 #include <geom/geom.h>
   64 
   65 #include <machine/stdarg.h>
   66 
   67 #include <security/audit/audit.h>
   68 #include <security/mac/mac_framework.h>
   69 
   70 #include "opt_rootdevname.h"
   71 #include "opt_ddb.h"
   72 #include "opt_mac.h"
   73 
   74 #ifdef DDB
   75 #include <ddb/ddb.h>
   76 #endif
   77 
   78 #define ROOTNAME                "root_device"
   79 #define VFS_MOUNTARG_SIZE_MAX   (1024 * 64)
   80 
   81 static int      vfs_domount(struct thread *td, const char *fstype,
   82                     char *fspath, int fsflags, void *fsdata);
   83 static int      vfs_mountroot_ask(void);
   84 static int      vfs_mountroot_try(const char *mountfrom);
   85 static void     free_mntarg(struct mntarg *ma);
   86 static int      vfs_getopt_pos(struct vfsoptlist *opts, const char *name);
   87 
   88 static int      usermount = 0;
   89 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
   90     "Unprivileged users may mount and unmount file systems");
   91 
   92 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
   93 MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker");
   94 static uma_zone_t mount_zone;
   95 
   96 /* List of mounted filesystems. */
   97 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
   98 
   99 /* For any iteration/modification of mountlist */
  100 struct mtx mountlist_mtx;
  101 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF);
  102 
  103 TAILQ_HEAD(vfsoptlist, vfsopt);
  104 struct vfsopt {
  105         TAILQ_ENTRY(vfsopt) link;
  106         char    *name;
  107         void    *value;
  108         int     len;
  109 };
  110 
  111 /*
  112  * The vnode of the system's root (/ in the filesystem, without chroot
  113  * active.)
  114  */
  115 struct vnode    *rootvnode;
  116 
  117 /*
  118  * The root filesystem is detailed in the kernel environment variable
  119  * vfs.root.mountfrom, which is expected to be in the general format
  120  *
  121  * <vfsname>:[<path>]
  122  * vfsname   := the name of a VFS known to the kernel and capable
  123  *              of being mounted as root
  124  * path      := disk device name or other data used by the filesystem
  125  *              to locate its physical store
  126  */
  127 
  128 /*
  129  * Global opts, taken by all filesystems
  130  */
  131 static const char *global_opts[] = {
  132         "errmsg",
  133         "fstype",
  134         "fspath",
  135         "ro",
  136         "rw",
  137         "nosuid",
  138         "noexec",
  139         NULL
  140 };
  141 
  142 /*
  143  * The root specifiers we will try if RB_CDROM is specified.
  144  */
  145 static char *cdrom_rootdevnames[] = {
  146         "cd9660:cd0",
  147         "cd9660:acd0",
  148         NULL
  149 };
  150 
  151 /* legacy find-root code */
  152 char            *rootdevnames[2] = {NULL, NULL};
  153 #ifndef ROOTDEVNAME
  154 #  define ROOTDEVNAME NULL
  155 #endif
  156 static const char       *ctrootdevname = ROOTDEVNAME;
  157 
  158 /*
  159  * ---------------------------------------------------------------------
  160  * Functions for building and sanitizing the mount options
  161  */
  162 
  163 /* Remove one mount option. */
  164 static void
  165 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
  166 {
  167 
  168         TAILQ_REMOVE(opts, opt, link);
  169         free(opt->name, M_MOUNT);
  170         if (opt->value != NULL)
  171                 free(opt->value, M_MOUNT);
  172 #ifdef INVARIANTS
  173         else if (opt->len != 0)
  174                 panic("%s: mount option with NULL value but length != 0",
  175                     __func__);
  176 #endif
  177         free(opt, M_MOUNT);
  178 }
  179 
  180 /* Release all resources related to the mount options. */
  181 void
  182 vfs_freeopts(struct vfsoptlist *opts)
  183 {
  184         struct vfsopt *opt;
  185 
  186         while (!TAILQ_EMPTY(opts)) {
  187                 opt = TAILQ_FIRST(opts);
  188                 vfs_freeopt(opts, opt);
  189         }
  190         free(opts, M_MOUNT);
  191 }
  192 
  193 void
  194 vfs_deleteopt(struct vfsoptlist *opts, const char *name)
  195 {
  196         struct vfsopt *opt, *temp;
  197 
  198         if (opts == NULL)
  199                 return;
  200         TAILQ_FOREACH_SAFE(opt, opts, link, temp)  {
  201                 if (strcmp(opt->name, name) == 0)
  202                         vfs_freeopt(opts, opt);
  203         }
  204 }
  205 
  206 /*
  207  * Check if options are equal (with or without the "no" prefix).
  208  */
  209 static int
  210 vfs_equalopts(const char *opt1, const char *opt2)
  211 {
  212 
  213         /* "opt" vs. "opt" or "noopt" vs. "noopt" */
  214         if (strcmp(opt1, opt2) == 0)
  215                 return (1);
  216         /* "noopt" vs. "opt" */
  217         if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
  218                 return (1);
  219         /* "opt" vs. "noopt" */
  220         if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
  221                 return (1);
  222         return (0);
  223 }
  224 
  225 /*
  226  * If a mount option is specified several times,
  227  * (with or without the "no" prefix) only keep
  228  * the last occurence of it.
  229  */
  230 static void
  231 vfs_sanitizeopts(struct vfsoptlist *opts)
  232 {
  233         struct vfsopt *opt, *opt2, *tmp;
  234 
  235         TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
  236                 opt2 = TAILQ_PREV(opt, vfsoptlist, link);
  237                 while (opt2 != NULL) {
  238                         if (vfs_equalopts(opt->name, opt2->name)) {
  239                                 tmp = TAILQ_PREV(opt2, vfsoptlist, link);
  240                                 vfs_freeopt(opts, opt2);
  241                                 opt2 = tmp;
  242                         } else {
  243                                 opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
  244                         }
  245                 }
  246         }
  247 }
  248 
  249 /*
  250  * Build a linked list of mount options from a struct uio.
  251  */
  252 static int
  253 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
  254 {
  255         struct vfsoptlist *opts;
  256         struct vfsopt *opt;
  257         size_t memused;
  258         unsigned int i, iovcnt;
  259         int error, namelen, optlen;
  260 
  261         opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
  262         TAILQ_INIT(opts);
  263         memused = 0;
  264         iovcnt = auio->uio_iovcnt;
  265         for (i = 0; i < iovcnt; i += 2) {
  266                 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  267                 namelen = auio->uio_iov[i].iov_len;
  268                 optlen = auio->uio_iov[i + 1].iov_len;
  269                 opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
  270                 opt->value = NULL;
  271                 opt->len = 0;
  272 
  273                 /*
  274                  * Do this early, so jumps to "bad" will free the current
  275                  * option.
  276                  */
  277                 TAILQ_INSERT_TAIL(opts, opt, link);
  278                 memused += sizeof(struct vfsopt) + optlen + namelen;
  279 
  280                 /*
  281                  * Avoid consuming too much memory, and attempts to overflow
  282                  * memused.
  283                  */
  284                 if (memused > VFS_MOUNTARG_SIZE_MAX ||
  285                     optlen > VFS_MOUNTARG_SIZE_MAX ||
  286                     namelen > VFS_MOUNTARG_SIZE_MAX) {
  287                         error = EINVAL;
  288                         goto bad;
  289                 }
  290 
  291                 if (auio->uio_segflg == UIO_SYSSPACE) {
  292                         bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
  293                 } else {
  294                         error = copyin(auio->uio_iov[i].iov_base, opt->name,
  295                             namelen);
  296                         if (error)
  297                                 goto bad;
  298                 }
  299                 /* Ensure names are null-terminated strings. */
  300                 if (opt->name[namelen - 1] != '\0') {
  301                         error = EINVAL;
  302                         goto bad;
  303                 }
  304                 if (optlen != 0) {
  305                         opt->len = optlen;
  306                         opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
  307                         if (auio->uio_segflg == UIO_SYSSPACE) {
  308                                 bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
  309                                     optlen);
  310                         } else {
  311                                 error = copyin(auio->uio_iov[i + 1].iov_base,
  312                                     opt->value, optlen);
  313                                 if (error)
  314                                         goto bad;
  315                         }
  316                 }
  317         }
  318         vfs_sanitizeopts(opts);
  319         *options = opts;
  320         return (0);
  321 bad:
  322         vfs_freeopts(opts);
  323         return (error);
  324 }
  325 
  326 /*
  327  * Merge the old mount options with the new ones passed
  328  * in the MNT_UPDATE case.
  329  */
  330 static void
  331 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
  332 {
  333         struct vfsopt *opt, *opt2, *new;
  334 
  335         TAILQ_FOREACH(opt, opts, link) {
  336                 /*
  337                  * Check that this option hasn't been redefined
  338                  * nor cancelled with a "no" mount option.
  339                  */
  340                 opt2 = TAILQ_FIRST(toopts);
  341                 while (opt2 != NULL) {
  342                         if (strcmp(opt2->name, opt->name) == 0)
  343                                 goto next;
  344                         if (strncmp(opt2->name, "no", 2) == 0 &&
  345                             strcmp(opt2->name + 2, opt->name) == 0) {
  346                                 vfs_freeopt(toopts, opt2);
  347                                 goto next;
  348                         }
  349                         opt2 = TAILQ_NEXT(opt2, link);
  350                 }
  351                 /* We want this option, duplicate it. */
  352                 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  353                 new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
  354                 strcpy(new->name, opt->name);
  355                 if (opt->len != 0) {
  356                         new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
  357                         bcopy(opt->value, new->value, opt->len);
  358                 } else {
  359                         new->value = NULL;
  360                 }
  361                 new->len = opt->len;
  362                 TAILQ_INSERT_TAIL(toopts, new, link);
  363 next:
  364                 continue;
  365         }
  366 }
  367 
  368 /*
  369  * Mount a filesystem.
  370  */
  371 int
  372 nmount(td, uap)
  373         struct thread *td;
  374         struct nmount_args /* {
  375                 struct iovec *iovp;
  376                 unsigned int iovcnt;
  377                 int flags;
  378         } */ *uap;
  379 {
  380         struct uio *auio;
  381         struct iovec *iov;
  382         unsigned int i;
  383         int error;
  384         u_int iovcnt;
  385 
  386         AUDIT_ARG(fflags, uap->flags);
  387 
  388         /*
  389          * Filter out MNT_ROOTFS.  We do not want clients of nmount() in
  390          * userspace to set this flag, but we must filter it out if we want
  391          * MNT_UPDATE on the root file system to work.
  392          * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
  393          */
  394         uap->flags &= ~MNT_ROOTFS;
  395 
  396         iovcnt = uap->iovcnt;
  397         /*
  398          * Check that we have an even number of iovec's
  399          * and that we have at least two options.
  400          */
  401         if ((iovcnt & 1) || (iovcnt < 4))
  402                 return (EINVAL);
  403 
  404         error = copyinuio(uap->iovp, iovcnt, &auio);
  405         if (error)
  406                 return (error);
  407         iov = auio->uio_iov;
  408         for (i = 0; i < iovcnt; i++) {
  409                 if (iov->iov_len > MMAXOPTIONLEN) {
  410                         free(auio, M_IOV);
  411                         return (EINVAL);
  412                 }
  413                 iov++;
  414         }
  415         error = vfs_donmount(td, uap->flags, auio);
  416 
  417         free(auio, M_IOV);
  418         return (error);
  419 }
  420 
  421 /*
  422  * ---------------------------------------------------------------------
  423  * Various utility functions
  424  */
  425 
  426 void
  427 vfs_ref(struct mount *mp)
  428 {
  429 
  430         MNT_ILOCK(mp);
  431         MNT_REF(mp);
  432         MNT_IUNLOCK(mp);
  433 }
  434 
  435 void
  436 vfs_rel(struct mount *mp)
  437 {
  438 
  439         MNT_ILOCK(mp);
  440         MNT_REL(mp);
  441         MNT_IUNLOCK(mp);
  442 }
  443 
  444 static int
  445 mount_init(void *mem, int size, int flags)
  446 {
  447         struct mount *mp;
  448 
  449         mp = (struct mount *)mem;
  450         mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
  451         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
  452         lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
  453         return (0);
  454 }
  455 
  456 static void
  457 mount_fini(void *mem, int size)
  458 {
  459         struct mount *mp;
  460 
  461         mp = (struct mount *)mem;
  462         lockdestroy(&mp->mnt_explock);
  463         lockdestroy(&mp->mnt_lock);
  464         mtx_destroy(&mp->mnt_mtx);
  465 }
  466 
  467 /*
  468  * Allocate and initialize the mount point struct.
  469  */
  470 struct mount *
  471 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp,
  472     const char *fspath, struct thread *td)
  473 {
  474         struct mount *mp;
  475 
  476         mp = uma_zalloc(mount_zone, M_WAITOK);
  477         bzero(&mp->mnt_startzero,
  478             __rangeof(struct mount, mnt_startzero, mnt_endzero));
  479         TAILQ_INIT(&mp->mnt_nvnodelist);
  480         mp->mnt_nvnodelistsize = 0;
  481         mp->mnt_ref = 0;
  482         (void) vfs_busy(mp, LK_NOWAIT, 0, td);
  483         mp->mnt_op = vfsp->vfc_vfsops;
  484         mp->mnt_vfc = vfsp;
  485         vfsp->vfc_refcount++;   /* XXX Unlocked */
  486         mp->mnt_stat.f_type = vfsp->vfc_typenum;
  487         mp->mnt_gen++;
  488         strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
  489         mp->mnt_vnodecovered = vp;
  490         mp->mnt_cred = crdup(td->td_ucred);
  491         mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
  492         strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
  493         mp->mnt_iosize_max = DFLTPHYS;
  494 #ifdef MAC
  495         mac_init_mount(mp);
  496         mac_create_mount(td->td_ucred, mp);
  497 #endif
  498         arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
  499         return (mp);
  500 }
  501 
  502 /*
  503  * Destroy the mount struct previously allocated by vfs_mount_alloc().
  504  */
  505 void
  506 vfs_mount_destroy(struct mount *mp)
  507 {
  508         int i;
  509 
  510         MNT_ILOCK(mp);
  511         for (i = 0; mp->mnt_ref && i < 3; i++)
  512                 msleep(mp, MNT_MTX(mp), PVFS, "mntref", hz);
  513         /*
  514          * This will always cause a 3 second delay in rebooting due to
  515          * refs on the root mountpoint that never go away.  Most of these
  516          * are held by init which never exits.
  517          */
  518         if (i == 3 && (!rebooting || bootverbose))
  519                 printf("Mount point %s had %d dangling refs\n",
  520                     mp->mnt_stat.f_mntonname, mp->mnt_ref);
  521         if (mp->mnt_holdcnt != 0) {
  522                 printf("Waiting for mount point to be unheld\n");
  523                 while (mp->mnt_holdcnt != 0) {
  524                         mp->mnt_holdcntwaiters++;
  525                         msleep(&mp->mnt_holdcnt, MNT_MTX(mp),
  526                                PZERO, "mntdestroy", 0);
  527                         mp->mnt_holdcntwaiters--;
  528                 }
  529                 printf("mount point unheld\n");
  530         }
  531         if (mp->mnt_writeopcount > 0) {
  532                 printf("Waiting for mount point write ops\n");
  533                 while (mp->mnt_writeopcount > 0) {
  534                         mp->mnt_kern_flag |= MNTK_SUSPEND;
  535                         msleep(&mp->mnt_writeopcount,
  536                                MNT_MTX(mp),
  537                                PZERO, "mntdestroy2", 0);
  538                 }
  539                 printf("mount point write ops completed\n");
  540         }
  541         if (mp->mnt_secondary_writes > 0) {
  542                 printf("Waiting for mount point secondary write ops\n");
  543                 while (mp->mnt_secondary_writes > 0) {
  544                         mp->mnt_kern_flag |= MNTK_SUSPEND;
  545                         msleep(&mp->mnt_secondary_writes,
  546                                MNT_MTX(mp),
  547                                PZERO, "mntdestroy3", 0);
  548                 }
  549                 printf("mount point secondary write ops completed\n");
  550         }
  551         MNT_IUNLOCK(mp);
  552         mp->mnt_vfc->vfc_refcount--;
  553         if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
  554                 struct vnode *vp;
  555 
  556                 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes)
  557                         vprint("", vp);
  558                 panic("unmount: dangling vnode");
  559         }
  560         MNT_ILOCK(mp);
  561         if (mp->mnt_kern_flag & MNTK_MWAIT)
  562                 wakeup(mp);
  563         if (mp->mnt_writeopcount != 0)
  564                 panic("vfs_mount_destroy: nonzero writeopcount");
  565         if (mp->mnt_secondary_writes != 0)
  566                 panic("vfs_mount_destroy: nonzero secondary_writes");
  567         if (mp->mnt_nvnodelistsize != 0)
  568                 panic("vfs_mount_destroy: nonzero nvnodelistsize");
  569         mp->mnt_writeopcount = -1000;
  570         mp->mnt_nvnodelistsize = -1000;
  571         mp->mnt_secondary_writes = -1000;
  572         MNT_IUNLOCK(mp);
  573 #ifdef MAC
  574         mac_destroy_mount(mp);
  575 #endif
  576         if (mp->mnt_opt != NULL)
  577                 vfs_freeopts(mp->mnt_opt);
  578         crfree(mp->mnt_cred);
  579         uma_zfree(mount_zone, mp);
  580 }
  581 
  582 int
  583 vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions)
  584 {
  585         struct vfsoptlist *optlist;
  586         struct vfsopt *opt, *noro_opt, *tmp_opt;
  587         char *fstype, *fspath, *errmsg;
  588         int error, fstypelen, fspathlen, errmsg_len, errmsg_pos;
  589         int has_rw, has_noro;
  590 
  591         errmsg = NULL;
  592         errmsg_len = 0;
  593         errmsg_pos = -1;
  594         has_rw = 0;
  595         has_noro = 0;
  596 
  597         error = vfs_buildopts(fsoptions, &optlist);
  598         if (error)
  599                 return (error);
  600 
  601         if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0)
  602                 errmsg_pos = vfs_getopt_pos(optlist, "errmsg");
  603 
  604         /*
  605          * We need these two options before the others,
  606          * and they are mandatory for any filesystem.
  607          * Ensure they are NUL terminated as well.
  608          */
  609         fstypelen = 0;
  610         error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
  611         if (error || fstype[fstypelen - 1] != '\0') {
  612                 error = EINVAL;
  613                 if (errmsg != NULL)
  614                         strncpy(errmsg, "Invalid fstype", errmsg_len);
  615                 goto bail;
  616         }
  617         fspathlen = 0;
  618         error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
  619         if (error || fspath[fspathlen - 1] != '\0') {
  620                 error = EINVAL;
  621                 if (errmsg != NULL)
  622                         strncpy(errmsg, "Invalid fspath", errmsg_len);
  623                 goto bail;
  624         }
  625 
  626         /*
  627          * We need to see if we have the "update" option
  628          * before we call vfs_domount(), since vfs_domount() has special
  629          * logic based on MNT_UPDATE.  This is very important
  630          * when we want to update the root filesystem.
  631          */
  632         TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) {
  633                 if (strcmp(opt->name, "update") == 0) {
  634                         fsflags |= MNT_UPDATE;
  635                         vfs_freeopt(optlist, opt);
  636                 }
  637                 else if (strcmp(opt->name, "async") == 0)
  638                         fsflags |= MNT_ASYNC;
  639                 else if (strcmp(opt->name, "force") == 0) {
  640                         fsflags |= MNT_FORCE;
  641                         vfs_freeopt(optlist, opt);
  642                 }
  643                 else if (strcmp(opt->name, "reload") == 0) {
  644                         fsflags |= MNT_RELOAD;
  645                         vfs_freeopt(optlist, opt);
  646                 }
  647                 else if (strcmp(opt->name, "multilabel") == 0)
  648                         fsflags |= MNT_MULTILABEL;
  649                 else if (strcmp(opt->name, "noasync") == 0)
  650                         fsflags &= ~MNT_ASYNC;
  651                 else if (strcmp(opt->name, "noatime") == 0)
  652                         fsflags |= MNT_NOATIME;
  653                 else if (strcmp(opt->name, "atime") == 0) {
  654                         free(opt->name, M_MOUNT);
  655                         opt->name = strdup("nonoatime", M_MOUNT);
  656                 }
  657                 else if (strcmp(opt->name, "noclusterr") == 0)
  658                         fsflags |= MNT_NOCLUSTERR;
  659                 else if (strcmp(opt->name, "clusterr") == 0) {
  660                         free(opt->name, M_MOUNT);
  661                         opt->name = strdup("nonoclusterr", M_MOUNT);
  662                 }
  663                 else if (strcmp(opt->name, "noclusterw") == 0)
  664                         fsflags |= MNT_NOCLUSTERW;
  665                 else if (strcmp(opt->name, "clusterw") == 0) {
  666                         free(opt->name, M_MOUNT);
  667                         opt->name = strdup("nonoclusterw", M_MOUNT);
  668                 }
  669                 else if (strcmp(opt->name, "noexec") == 0)
  670                         fsflags |= MNT_NOEXEC;
  671                 else if (strcmp(opt->name, "exec") == 0) {
  672                         free(opt->name, M_MOUNT);
  673                         opt->name = strdup("nonoexec", M_MOUNT);
  674                 }
  675                 else if (strcmp(opt->name, "nosuid") == 0)
  676                         fsflags |= MNT_NOSUID;
  677                 else if (strcmp(opt->name, "suid") == 0) {
  678                         free(opt->name, M_MOUNT);
  679                         opt->name = strdup("nonosuid", M_MOUNT);
  680                 }
  681                 else if (strcmp(opt->name, "nosymfollow") == 0)
  682                         fsflags |= MNT_NOSYMFOLLOW;
  683                 else if (strcmp(opt->name, "symfollow") == 0) {
  684                         free(opt->name, M_MOUNT);
  685                         opt->name = strdup("nonosymfollow", M_MOUNT);
  686                 }
  687                 else if (strcmp(opt->name, "noro") == 0) {
  688                         fsflags &= ~MNT_RDONLY;
  689                         has_noro = 1;
  690                 }
  691                 else if (strcmp(opt->name, "rw") == 0) {
  692                         fsflags &= ~MNT_RDONLY;
  693                         has_rw = 1;
  694                 }
  695                 else if (strcmp(opt->name, "ro") == 0)
  696                         fsflags |= MNT_RDONLY;
  697                 else if (strcmp(opt->name, "rdonly") == 0) {
  698                         free(opt->name, M_MOUNT);
  699                         opt->name = strdup("ro", M_MOUNT);
  700                         fsflags |= MNT_RDONLY;
  701                 }
  702                 else if (strcmp(opt->name, "suiddir") == 0)
  703                         fsflags |= MNT_SUIDDIR;
  704                 else if (strcmp(opt->name, "sync") == 0)
  705                         fsflags |= MNT_SYNCHRONOUS;
  706                 else if (strcmp(opt->name, "union") == 0)
  707                         fsflags |= MNT_UNION;
  708         }
  709 
  710         /*
  711          * If "rw" was specified as a mount option, and we
  712          * are trying to update a mount-point from "ro" to "rw",
  713          * we need a mount option "noro", since in vfs_mergeopts(),
  714          * "noro" will cancel "ro", but "rw" will not do anything.
  715          */
  716         if (has_rw && !has_noro) {
  717                 noro_opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  718                 noro_opt->name = strdup("noro", M_MOUNT);
  719                 noro_opt->value = NULL;
  720                 noro_opt->len = 0;
  721                 TAILQ_INSERT_TAIL(optlist, noro_opt, link);
  722         }
  723 
  724         /*
  725          * Be ultra-paranoid about making sure the type and fspath
  726          * variables will fit in our mp buffers, including the
  727          * terminating NUL.
  728          */
  729         if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
  730                 error = ENAMETOOLONG;
  731                 goto bail;
  732         }
  733 
  734         mtx_lock(&Giant);
  735         error = vfs_domount(td, fstype, fspath, fsflags, optlist);
  736         mtx_unlock(&Giant);
  737 bail:
  738         /* copyout the errmsg */
  739         if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt)
  740             && errmsg_len > 0 && errmsg != NULL) {
  741                 if (fsoptions->uio_segflg == UIO_SYSSPACE) {
  742                         bcopy(errmsg,
  743                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
  744                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
  745                 } else {
  746                         copyout(errmsg,
  747                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
  748                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
  749                 }
  750         }
  751 
  752         if (error != 0)
  753                 vfs_freeopts(optlist);
  754         return (error);
  755 }
  756 
  757 /*
  758  * Old mount API.
  759  */
  760 #ifndef _SYS_SYSPROTO_H_
  761 struct mount_args {
  762         char    *type;
  763         char    *path;
  764         int     flags;
  765         caddr_t data;
  766 };
  767 #endif
  768 /* ARGSUSED */
  769 int
  770 mount(td, uap)
  771         struct thread *td;
  772         struct mount_args /* {
  773                 char *type;
  774                 char *path;
  775                 int flags;
  776                 caddr_t data;
  777         } */ *uap;
  778 {
  779         char *fstype;
  780         struct vfsconf *vfsp = NULL;
  781         struct mntarg *ma = NULL;
  782         int error;
  783 
  784         AUDIT_ARG(fflags, uap->flags);
  785 
  786         /*
  787          * Filter out MNT_ROOTFS.  We do not want clients of mount() in
  788          * userspace to set this flag, but we must filter it out if we want
  789          * MNT_UPDATE on the root file system to work.
  790          * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
  791          */
  792         uap->flags &= ~MNT_ROOTFS;
  793 
  794         fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
  795         error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
  796         if (error) {
  797                 free(fstype, M_TEMP);
  798                 return (error);
  799         }
  800 
  801         AUDIT_ARG(text, fstype);
  802         mtx_lock(&Giant);
  803         vfsp = vfs_byname_kld(fstype, td, &error);
  804         free(fstype, M_TEMP);
  805         if (vfsp == NULL) {
  806                 mtx_unlock(&Giant);
  807                 return (ENOENT);
  808         }
  809         if (vfsp->vfc_vfsops->vfs_cmount == NULL) {
  810                 mtx_unlock(&Giant);
  811                 return (EOPNOTSUPP);
  812         }
  813 
  814         ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN);
  815         ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
  816         ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro");
  817         ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid");
  818         ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec");
  819 
  820         error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td);
  821         mtx_unlock(&Giant);
  822         return (error);
  823 }
  824 
  825 
  826 /*
  827  * vfs_domount(): actually attempt a filesystem mount.
  828  */
  829 static int
  830 vfs_domount(
  831         struct thread *td,      /* Calling thread. */
  832         const char *fstype,     /* Filesystem type. */
  833         char *fspath,           /* Mount path. */
  834         int fsflags,            /* Flags common to all filesystems. */
  835         void *fsdata            /* Options local to the filesystem. */
  836         )
  837 {
  838         struct vnode *vp;
  839         struct mount *mp;
  840         struct vfsconf *vfsp;
  841         struct export_args export;
  842         int error, flag = 0;
  843         struct vattr va;
  844         struct nameidata nd;
  845 
  846         mtx_assert(&Giant, MA_OWNED);
  847         /*
  848          * Be ultra-paranoid about making sure the type and fspath
  849          * variables will fit in our mp buffers, including the
  850          * terminating NUL.
  851          */
  852         if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
  853                 return (ENAMETOOLONG);
  854 
  855         if (jailed(td->td_ucred) || usermount == 0) {
  856                 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0)
  857                         return (error);
  858         }
  859 
  860         /*
  861          * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
  862          */
  863         if (fsflags & MNT_EXPORTED) {
  864                 error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED);
  865                 if (error)
  866                         return (error);
  867         }
  868         if (fsflags & MNT_SUIDDIR) {
  869                 error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR);
  870                 if (error)
  871                         return (error);
  872         }
  873         /*
  874          * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users.
  875          */
  876         if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) {
  877                 if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0)
  878                         fsflags |= MNT_NOSUID | MNT_USER;
  879         }
  880 
  881         /* Load KLDs before we lock the covered vnode to avoid reversals. */
  882         vfsp = NULL;
  883         if ((fsflags & MNT_UPDATE) == 0) {
  884                 /* Don't try to load KLDs if we're mounting the root. */
  885                 if (fsflags & MNT_ROOTFS)
  886                         vfsp = vfs_byname(fstype);
  887                 else
  888                         vfsp = vfs_byname_kld(fstype, td, &error);
  889                 if (vfsp == NULL)
  890                         return (ENODEV);
  891                 if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL))
  892                         return (EPERM);
  893         }
  894         /*
  895          * Get vnode to be covered
  896          */
  897         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE,
  898             fspath, td);
  899         if ((error = namei(&nd)) != 0)
  900                 return (error);
  901         NDFREE(&nd, NDF_ONLY_PNBUF);
  902         vp = nd.ni_vp;
  903         if (fsflags & MNT_UPDATE) {
  904                 if ((vp->v_vflag & VV_ROOT) == 0) {
  905                         vput(vp);
  906                         return (EINVAL);
  907                 }
  908                 mp = vp->v_mount;
  909                 MNT_ILOCK(mp);
  910                 flag = mp->mnt_flag;
  911                 /*
  912                  * We only allow the filesystem to be reloaded if it
  913                  * is currently mounted read-only.
  914                  */
  915                 if ((fsflags & MNT_RELOAD) &&
  916                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
  917                         MNT_IUNLOCK(mp);
  918                         vput(vp);
  919                         return (EOPNOTSUPP);    /* Needs translation */
  920                 }
  921                 MNT_IUNLOCK(mp);
  922                 /*
  923                  * Only privileged root, or (if MNT_USER is set) the user that
  924                  * did the original mount is permitted to update it.
  925                  */
  926                 error = vfs_suser(mp, td);
  927                 if (error) {
  928                         vput(vp);
  929                         return (error);
  930                 }
  931                 if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
  932                         vput(vp);
  933                         return (EBUSY);
  934                 }
  935                 VI_LOCK(vp);
  936                 if ((vp->v_iflag & VI_MOUNT) != 0 ||
  937                     vp->v_mountedhere != NULL) {
  938                         VI_UNLOCK(vp);
  939                         vfs_unbusy(mp, td);
  940                         vput(vp);
  941                         return (EBUSY);
  942                 }
  943                 vp->v_iflag |= VI_MOUNT;
  944                 VI_UNLOCK(vp);
  945                 MNT_ILOCK(mp);
  946                 mp->mnt_flag |= fsflags &
  947                     (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS);
  948                 MNT_IUNLOCK(mp);
  949                 VOP_UNLOCK(vp, 0, td);
  950                 mp->mnt_optnew = fsdata;
  951                 vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
  952         } else {
  953                 /*
  954                  * If the user is not root, ensure that they own the directory
  955                  * onto which we are attempting to mount.
  956                  */
  957                 error = VOP_GETATTR(vp, &va, td->td_ucred, td);
  958                 if (error) {
  959                         vput(vp);
  960                         return (error);
  961                 }
  962                 if (va.va_uid != td->td_ucred->cr_uid) {
  963                         error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN,
  964                             0);
  965                         if (error) {
  966                                 vput(vp);
  967                                 return (error);
  968                         }
  969                 }
  970                 error = vinvalbuf(vp, V_SAVE, td, 0, 0);
  971                 if (error != 0) {
  972                         vput(vp);
  973                         return (error);
  974                 }
  975                 if (vp->v_type != VDIR) {
  976                         vput(vp);
  977                         return (ENOTDIR);
  978                 }
  979                 VI_LOCK(vp);
  980                 if ((vp->v_iflag & VI_MOUNT) != 0 ||
  981                     vp->v_mountedhere != NULL) {
  982                         VI_UNLOCK(vp);
  983                         vput(vp);
  984                         return (EBUSY);
  985                 }
  986                 vp->v_iflag |= VI_MOUNT;
  987                 VI_UNLOCK(vp);
  988 
  989                 /*
  990                  * Allocate and initialize the filesystem.
  991                  */
  992                 mp = vfs_mount_alloc(vp, vfsp, fspath, td);
  993                 VOP_UNLOCK(vp, 0, td);
  994 
  995                 /* XXXMAC: pass to vfs_mount_alloc? */
  996                 mp->mnt_optnew = fsdata;
  997         }
  998 
  999         /*
 1000          * Set the mount level flags.
 1001          */
 1002         MNT_ILOCK(mp);
 1003         mp->mnt_flag = (mp->mnt_flag & ~MNT_UPDATEMASK) |
 1004                 (fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS |
 1005                             MNT_RDONLY));
 1006         if ((mp->mnt_flag & MNT_ASYNC) == 0)
 1007                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1008         MNT_IUNLOCK(mp);
 1009         /*
 1010          * Mount the filesystem.
 1011          * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
 1012          * get.  No freeing of cn_pnbuf.
 1013          */
 1014         error = VFS_MOUNT(mp, td);
 1015 
 1016         /*
 1017          * Process the export option only if we are
 1018          * updating mount options.
 1019          */
 1020         if (!error && (fsflags & MNT_UPDATE)) {
 1021                 if (vfs_copyopt(mp->mnt_optnew, "export", &export,
 1022                     sizeof(export)) == 0)
 1023                         error = vfs_export(mp, &export);
 1024         }
 1025 
 1026         if (!error) {
 1027                 if (mp->mnt_opt != NULL)
 1028                         vfs_freeopts(mp->mnt_opt);
 1029                 mp->mnt_opt = mp->mnt_optnew;
 1030                 (void)VFS_STATFS(mp, &mp->mnt_stat, td);
 1031         }
 1032         /*
 1033          * Prevent external consumers of mount options from reading
 1034          * mnt_optnew.
 1035         */
 1036         mp->mnt_optnew = NULL;
 1037         if (mp->mnt_flag & MNT_UPDATE) {
 1038                 MNT_ILOCK(mp);
 1039                 if (error)
 1040                         mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) |
 1041                                 (flag & ~MNT_QUOTA);
 1042                 else
 1043                         mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD |
 1044                                           MNT_FORCE | MNT_SNAPSHOT);
 1045                 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 1046                         mp->mnt_kern_flag |= MNTK_ASYNC;
 1047                 else
 1048                         mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1049                 MNT_IUNLOCK(mp);
 1050                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
 1051                         if (mp->mnt_syncer == NULL)
 1052                                 error = vfs_allocate_syncvnode(mp);
 1053                 } else {
 1054                         if (mp->mnt_syncer != NULL)
 1055                                 vrele(mp->mnt_syncer);
 1056                         mp->mnt_syncer = NULL;
 1057                 }
 1058                 vfs_unbusy(mp, td);
 1059                 VI_LOCK(vp);
 1060                 vp->v_iflag &= ~VI_MOUNT;
 1061                 VI_UNLOCK(vp);
 1062                 vrele(vp);
 1063                 return (error);
 1064         }
 1065         MNT_ILOCK(mp);
 1066         if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 1067                 mp->mnt_kern_flag |= MNTK_ASYNC;
 1068         else
 1069                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1070         MNT_IUNLOCK(mp);
 1071         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1072         /*
 1073          * Put the new filesystem on the mount list after root.
 1074          */
 1075         cache_purge(vp);
 1076         if (!error) {
 1077                 struct vnode *newdp;
 1078 
 1079                 VI_LOCK(vp);
 1080                 vp->v_iflag &= ~VI_MOUNT;
 1081                 VI_UNLOCK(vp);
 1082                 vp->v_mountedhere = mp;
 1083                 mtx_lock(&mountlist_mtx);
 1084                 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 1085                 mtx_unlock(&mountlist_mtx);
 1086                 vfs_event_signal(NULL, VQ_MOUNT, 0);
 1087                 if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp, td))
 1088                         panic("mount: lost mount");
 1089                 mountcheckdirs(vp, newdp);
 1090                 vput(newdp);
 1091                 VOP_UNLOCK(vp, 0, td);
 1092                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
 1093                         error = vfs_allocate_syncvnode(mp);
 1094                 vfs_unbusy(mp, td);
 1095                 if (error)
 1096                         vrele(vp);
 1097         } else {
 1098                 VI_LOCK(vp);
 1099                 vp->v_iflag &= ~VI_MOUNT;
 1100                 VI_UNLOCK(vp);
 1101                 vfs_unbusy(mp, td);
 1102                 vfs_mount_destroy(mp);
 1103                 vput(vp);
 1104         }
 1105         return (error);
 1106 }
 1107 
 1108 /*
 1109  * Unmount a filesystem.
 1110  *
 1111  * Note: unmount takes a path to the vnode mounted on as argument, not
 1112  * special file (as before).
 1113  */
 1114 #ifndef _SYS_SYSPROTO_H_
 1115 struct unmount_args {
 1116         char    *path;
 1117         int     flags;
 1118 };
 1119 #endif
 1120 /* ARGSUSED */
 1121 int
 1122 unmount(td, uap)
 1123         struct thread *td;
 1124         register struct unmount_args /* {
 1125                 char *path;
 1126                 int flags;
 1127         } */ *uap;
 1128 {
 1129         struct mount *mp;
 1130         char *pathbuf;
 1131         int error, id0, id1;
 1132 
 1133         if (jailed(td->td_ucred) || usermount == 0) {
 1134                 error = priv_check(td, PRIV_VFS_UNMOUNT);
 1135                 if (error)
 1136                         return (error);
 1137         }
 1138 
 1139         pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
 1140         error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL);
 1141         if (error) {
 1142                 free(pathbuf, M_TEMP);
 1143                 return (error);
 1144         }
 1145         AUDIT_ARG(upath, td, pathbuf, ARG_UPATH1);
 1146         mtx_lock(&Giant);
 1147         if (uap->flags & MNT_BYFSID) {
 1148                 /* Decode the filesystem ID. */
 1149                 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
 1150                         mtx_unlock(&Giant);
 1151                         free(pathbuf, M_TEMP);
 1152                         return (EINVAL);
 1153                 }
 1154 
 1155                 mtx_lock(&mountlist_mtx);
 1156                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 1157                         if (mp->mnt_stat.f_fsid.val[0] == id0 &&
 1158                             mp->mnt_stat.f_fsid.val[1] == id1)
 1159                                 break;
 1160                 }
 1161                 mtx_unlock(&mountlist_mtx);
 1162         } else {
 1163                 mtx_lock(&mountlist_mtx);
 1164                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 1165                         if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0)
 1166                                 break;
 1167                 }
 1168                 mtx_unlock(&mountlist_mtx);
 1169         }
 1170         free(pathbuf, M_TEMP);
 1171         if (mp == NULL) {
 1172                 /*
 1173                  * Previously we returned ENOENT for a nonexistent path and
 1174                  * EINVAL for a non-mountpoint.  We cannot tell these apart
 1175                  * now, so in the !MNT_BYFSID case return the more likely
 1176                  * EINVAL for compatibility.
 1177                  */
 1178                 mtx_unlock(&Giant);
 1179                 return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL);
 1180         }
 1181 
 1182         /*
 1183          * Don't allow unmounting the root filesystem.
 1184          */
 1185         if (mp->mnt_flag & MNT_ROOTFS) {
 1186                 mtx_unlock(&Giant);
 1187                 return (EINVAL);
 1188         }
 1189         error = dounmount(mp, uap->flags, td);
 1190         mtx_unlock(&Giant);
 1191         return (error);
 1192 }
 1193 
 1194 /*
 1195  * Do the actual filesystem unmount.
 1196  */
 1197 int
 1198 dounmount(mp, flags, td)
 1199         struct mount *mp;
 1200         int flags;
 1201         struct thread *td;
 1202 {
 1203         struct vnode *coveredvp, *fsrootvp;
 1204         int error;
 1205         int async_flag;
 1206         int mnt_gen_r;
 1207 
 1208         mtx_assert(&Giant, MA_OWNED);
 1209 
 1210         if ((coveredvp = mp->mnt_vnodecovered) != NULL) {
 1211                 mnt_gen_r = mp->mnt_gen;
 1212                 VI_LOCK(coveredvp);
 1213                 vholdl(coveredvp);
 1214                 vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, td);
 1215                 vdrop(coveredvp);
 1216                 /*
 1217                  * Check for mp being unmounted while waiting for the
 1218                  * covered vnode lock.
 1219                  */
 1220                 if (coveredvp->v_mountedhere != mp ||
 1221                     coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) {
 1222                         VOP_UNLOCK(coveredvp, 0, td);
 1223                         return (EBUSY);
 1224                 }
 1225         }
 1226         /*
 1227          * Only privileged root, or (if MNT_USER is set) the user that did the
 1228          * original mount is permitted to unmount this filesystem.
 1229          */
 1230         error = vfs_suser(mp, td);
 1231         if (error) {
 1232                 if (coveredvp)
 1233                         VOP_UNLOCK(coveredvp, 0, td);
 1234                 return (error);
 1235         }
 1236 
 1237         MNT_ILOCK(mp);
 1238         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 1239                 MNT_IUNLOCK(mp);
 1240                 if (coveredvp)
 1241                         VOP_UNLOCK(coveredvp, 0, td);
 1242                 return (EBUSY);
 1243         }
 1244         mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_NOINSMNTQ;
 1245         /* Allow filesystems to detect that a forced unmount is in progress. */
 1246         if (flags & MNT_FORCE)
 1247                 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
 1248         error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
 1249             ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), MNT_MTX(mp), td);
 1250         if (error) {
 1251                 MNT_ILOCK(mp);
 1252                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_NOINSMNTQ |
 1253                     MNTK_UNMOUNTF);
 1254                 if (mp->mnt_kern_flag & MNTK_MWAIT)
 1255                         wakeup(mp);
 1256                 MNT_IUNLOCK(mp);
 1257                 if (coveredvp)
 1258                         VOP_UNLOCK(coveredvp, 0, td);
 1259                 return (error);
 1260         }
 1261         vn_start_write(NULL, &mp, V_WAIT);
 1262 
 1263         if (mp->mnt_flag & MNT_EXPUBLIC)
 1264                 vfs_setpublicfs(NULL, NULL, NULL);
 1265 
 1266         vfs_msync(mp, MNT_WAIT);
 1267         MNT_ILOCK(mp);
 1268         async_flag = mp->mnt_flag & MNT_ASYNC;
 1269         mp->mnt_flag &= ~MNT_ASYNC;
 1270         mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1271         MNT_IUNLOCK(mp);
 1272         cache_purgevfs(mp);     /* remove cache entries for this file sys */
 1273         if (mp->mnt_syncer != NULL)
 1274                 vrele(mp->mnt_syncer);
 1275         /*
 1276          * For forced unmounts, move process cdir/rdir refs on the fs root
 1277          * vnode to the covered vnode.  For non-forced unmounts we want
 1278          * such references to cause an EBUSY error.
 1279          */
 1280         if ((flags & MNT_FORCE) &&
 1281             VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
 1282                 if (mp->mnt_vnodecovered != NULL)
 1283                         mountcheckdirs(fsrootvp, mp->mnt_vnodecovered);
 1284                 if (fsrootvp == rootvnode) {
 1285                         vrele(rootvnode);
 1286                         rootvnode = NULL;
 1287                 }
 1288                 vput(fsrootvp);
 1289         }
 1290         if (((mp->mnt_flag & MNT_RDONLY) ||
 1291              (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) ||
 1292             (flags & MNT_FORCE)) {
 1293                 error = VFS_UNMOUNT(mp, flags, td);
 1294         }
 1295         vn_finished_write(mp);
 1296         if (error) {
 1297                 /* Undo cdir/rdir and rootvnode changes made above. */
 1298                 if ((flags & MNT_FORCE) &&
 1299                     VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
 1300                         if (mp->mnt_vnodecovered != NULL)
 1301                                 mountcheckdirs(mp->mnt_vnodecovered, fsrootvp);
 1302                         if (rootvnode == NULL) {
 1303                                 rootvnode = fsrootvp;
 1304                                 vref(rootvnode);
 1305                         }
 1306                         vput(fsrootvp);
 1307                 }
 1308                 MNT_ILOCK(mp);
 1309                 mp->mnt_kern_flag &= ~MNTK_NOINSMNTQ;
 1310                 if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) {
 1311                         MNT_IUNLOCK(mp);
 1312                         (void) vfs_allocate_syncvnode(mp);
 1313                         MNT_ILOCK(mp);
 1314                 }
 1315                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
 1316                 mp->mnt_flag |= async_flag;
 1317                 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 1318                         mp->mnt_kern_flag |= MNTK_ASYNC;
 1319                 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
 1320                 if (mp->mnt_kern_flag & MNTK_MWAIT)
 1321                         wakeup(mp);
 1322                 MNT_IUNLOCK(mp);
 1323                 if (coveredvp)
 1324                         VOP_UNLOCK(coveredvp, 0, td);
 1325                 return (error);
 1326         }
 1327         mtx_lock(&mountlist_mtx);
 1328         TAILQ_REMOVE(&mountlist, mp, mnt_list);
 1329         mtx_unlock(&mountlist_mtx);
 1330         if (coveredvp != NULL) {
 1331                 coveredvp->v_mountedhere = NULL;
 1332                 vput(coveredvp);
 1333         }
 1334         vfs_event_signal(NULL, VQ_UNMOUNT, 0);
 1335         lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
 1336         vfs_mount_destroy(mp);
 1337         return (0);
 1338 }
 1339 
 1340 /*
 1341  * ---------------------------------------------------------------------
 1342  * Mounting of root filesystem
 1343  *
 1344  */
 1345 
 1346 struct root_hold_token {
 1347         const char                      *who;
 1348         LIST_ENTRY(root_hold_token)     list;
 1349 };
 1350 
 1351 static LIST_HEAD(, root_hold_token)     root_holds =
 1352     LIST_HEAD_INITIALIZER(&root_holds);
 1353 
 1354 static int root_mount_complete;
 1355 
 1356 /*
 1357  * Hold root mount.
 1358  */
 1359 struct root_hold_token *
 1360 root_mount_hold(const char *identifier)
 1361 {
 1362         struct root_hold_token *h;
 1363 
 1364         h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
 1365         h->who = identifier;
 1366         mtx_lock(&mountlist_mtx);
 1367         LIST_INSERT_HEAD(&root_holds, h, list);
 1368         mtx_unlock(&mountlist_mtx);
 1369         return (h);
 1370 }
 1371 
 1372 /*
 1373  * Release root mount.
 1374  */
 1375 void
 1376 root_mount_rel(struct root_hold_token *h)
 1377 {
 1378 
 1379         mtx_lock(&mountlist_mtx);
 1380         LIST_REMOVE(h, list);
 1381         wakeup(&root_holds);
 1382         mtx_unlock(&mountlist_mtx);
 1383         free(h, M_DEVBUF);
 1384 }
 1385 
 1386 /*
 1387  * Wait for all subsystems to release root mount.
 1388  */
 1389 static void
 1390 root_mount_prepare(void)
 1391 {
 1392         struct root_hold_token *h;
 1393 
 1394         for (;;) {
 1395                 DROP_GIANT();
 1396                 g_waitidle();
 1397                 PICKUP_GIANT();
 1398                 mtx_lock(&mountlist_mtx);
 1399                 if (LIST_EMPTY(&root_holds)) {
 1400                         mtx_unlock(&mountlist_mtx);
 1401                         break;
 1402                 }
 1403                 printf("Root mount waiting for:");
 1404                 LIST_FOREACH(h, &root_holds, list)
 1405                         printf(" %s", h->who);
 1406                 printf("\n");
 1407                 msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold",
 1408                     hz);
 1409         }
 1410 }
 1411 
 1412 /*
 1413  * Root was mounted, share the good news.
 1414  */
 1415 static void
 1416 root_mount_done(void)
 1417 {
 1418 
 1419         /*
 1420          * Use a mutex to prevent the wakeup being missed and waiting for
 1421          * an extra 1 second sleep.
 1422          */
 1423         mtx_lock(&mountlist_mtx);
 1424         root_mount_complete = 1;
 1425         wakeup(&root_mount_complete);
 1426         mtx_unlock(&mountlist_mtx);
 1427 }
 1428 
 1429 /*
 1430  * Return true if root is already mounted.
 1431  */
 1432 int
 1433 root_mounted(void)
 1434 {
 1435 
 1436         /* No mutex is acquired here because int stores are atomic. */
 1437         return (root_mount_complete);
 1438 }
 1439 
 1440 /*
 1441  * Wait until root is mounted.
 1442  */
 1443 void
 1444 root_mount_wait(void)
 1445 {
 1446 
 1447         /*
 1448          * Panic on an obvious deadlock - the function can't be called from
 1449          * a thread which is doing the whole SYSINIT stuff.
 1450          */
 1451         KASSERT(curthread->td_proc->p_pid != 0,
 1452             ("root_mount_wait: cannot be called from the swapper thread"));
 1453         mtx_lock(&mountlist_mtx);
 1454         while (!root_mount_complete) {
 1455                 msleep(&root_mount_complete, &mountlist_mtx, PZERO, "rootwait",
 1456                     hz);
 1457         }
 1458         mtx_unlock(&mountlist_mtx);
 1459 }
 1460 
 1461 static void
 1462 set_rootvnode(struct thread *td)
 1463 {
 1464         struct proc *p;
 1465 
 1466         if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode, td))
 1467                 panic("Cannot find root vnode");
 1468 
 1469         p = td->td_proc;
 1470         FILEDESC_SLOCK(p->p_fd);
 1471 
 1472         if (p->p_fd->fd_cdir != NULL)
 1473                 vrele(p->p_fd->fd_cdir);
 1474         p->p_fd->fd_cdir = rootvnode;
 1475         VREF(rootvnode);
 1476 
 1477         if (p->p_fd->fd_rdir != NULL)
 1478                 vrele(p->p_fd->fd_rdir);
 1479         p->p_fd->fd_rdir = rootvnode;
 1480         VREF(rootvnode);
 1481 
 1482         FILEDESC_SUNLOCK(p->p_fd);
 1483 
 1484         VOP_UNLOCK(rootvnode, 0, td);
 1485 }
 1486 
 1487 /*
 1488  * Mount /devfs as our root filesystem, but do not put it on the mountlist
 1489  * yet.  Create a /dev -> / symlink so that absolute pathnames will lookup.
 1490  */
 1491 
 1492 static void
 1493 devfs_first(void)
 1494 {
 1495         struct thread *td = curthread;
 1496         struct vfsoptlist *opts;
 1497         struct vfsconf *vfsp;
 1498         struct mount *mp = NULL;
 1499         int error;
 1500 
 1501         vfsp = vfs_byname("devfs");
 1502         KASSERT(vfsp != NULL, ("Could not find devfs by name"));
 1503         if (vfsp == NULL)
 1504                 return;
 1505 
 1506         mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td);
 1507 
 1508         error = VFS_MOUNT(mp, td);
 1509         KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
 1510         if (error)
 1511                 return;
 1512 
 1513         opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
 1514         TAILQ_INIT(opts);
 1515         mp->mnt_opt = opts;
 1516 
 1517         mtx_lock(&mountlist_mtx);
 1518         TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
 1519         mtx_unlock(&mountlist_mtx);
 1520 
 1521         set_rootvnode(td);
 1522 
 1523         error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);
 1524         if (error)
 1525                 printf("kern_symlink /dev -> / returns %d\n", error);
 1526 }
 1527 
 1528 /*
 1529  * Surgically move our devfs to be mounted on /dev.
 1530  */
 1531 
 1532 static void
 1533 devfs_fixup(struct thread *td)
 1534 {
 1535         struct nameidata nd;
 1536         int error;
 1537         struct vnode *vp, *dvp;
 1538         struct mount *mp;
 1539 
 1540         /* Remove our devfs mount from the mountlist and purge the cache */
 1541         mtx_lock(&mountlist_mtx);
 1542         mp = TAILQ_FIRST(&mountlist);
 1543         TAILQ_REMOVE(&mountlist, mp, mnt_list);
 1544         mtx_unlock(&mountlist_mtx);
 1545         cache_purgevfs(mp);
 1546 
 1547         VFS_ROOT(mp, LK_EXCLUSIVE, &dvp, td);
 1548         VI_LOCK(dvp);
 1549         dvp->v_iflag &= ~VI_MOUNT;
 1550         VI_UNLOCK(dvp);
 1551         dvp->v_mountedhere = NULL;
 1552 
 1553         /* Set up the real rootvnode, and purge the cache */
 1554         TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL;
 1555         set_rootvnode(td);
 1556         cache_purgevfs(rootvnode->v_mount);
 1557 
 1558         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
 1559         error = namei(&nd);
 1560         if (error) {
 1561                 printf("Lookup of /dev for devfs, error: %d\n", error);
 1562                 return;
 1563         }
 1564         NDFREE(&nd, NDF_ONLY_PNBUF);
 1565         vp = nd.ni_vp;
 1566         if (vp->v_type != VDIR) {
 1567                 vput(vp);
 1568         }
 1569         error = vinvalbuf(vp, V_SAVE, td, 0, 0);
 1570         if (error) {
 1571                 vput(vp);
 1572         }
 1573         cache_purge(vp);
 1574         mp->mnt_vnodecovered = vp;
 1575         vp->v_mountedhere = mp;
 1576         mtx_lock(&mountlist_mtx);
 1577         TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 1578         mtx_unlock(&mountlist_mtx);
 1579         VOP_UNLOCK(vp, 0, td);
 1580         vput(dvp);
 1581         vfs_unbusy(mp, td);
 1582 
 1583         /* Unlink the no longer needed /dev/dev -> / symlink */
 1584         kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
 1585 }
 1586 
 1587 /*
 1588  * Report errors during filesystem mounting.
 1589  */
 1590 void
 1591 vfs_mount_error(struct mount *mp, const char *fmt, ...)
 1592 {
 1593         struct vfsoptlist *moptlist = mp->mnt_optnew;
 1594         va_list ap;
 1595         int error, len;
 1596         char *errmsg;
 1597 
 1598         error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len);
 1599         if (error || errmsg == NULL || len <= 0)
 1600                 return;
 1601 
 1602         va_start(ap, fmt);
 1603         vsnprintf(errmsg, (size_t)len, fmt, ap);
 1604         va_end(ap);
 1605 }
 1606 
 1607 /*
 1608  * Find and mount the root filesystem
 1609  */
 1610 void
 1611 vfs_mountroot(void)
 1612 {
 1613         char *cp;
 1614         int error, i, asked = 0;
 1615 
 1616         root_mount_prepare();
 1617 
 1618         mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount),
 1619             NULL, NULL, mount_init, mount_fini,
 1620             UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 1621         devfs_first();
 1622 
 1623         /*
 1624          * We are booted with instructions to prompt for the root filesystem.
 1625          */
 1626         if (boothowto & RB_ASKNAME) {
 1627                 if (!vfs_mountroot_ask())
 1628                         goto mounted;
 1629                 asked = 1;
 1630         }
 1631 
 1632         /*
 1633          * The root filesystem information is compiled in, and we are
 1634          * booted with instructions to use it.
 1635          */
 1636         if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) {
 1637                 if (!vfs_mountroot_try(ctrootdevname))
 1638                         goto mounted;
 1639                 ctrootdevname = NULL;
 1640         }
 1641 
 1642         /*
 1643          * We've been given the generic "use CDROM as root" flag.  This is
 1644          * necessary because one media may be used in many different
 1645          * devices, so we need to search for them.
 1646          */
 1647         if (boothowto & RB_CDROM) {
 1648                 for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
 1649                         if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
 1650                                 goto mounted;
 1651                 }
 1652         }
 1653 
 1654         /*
 1655          * Try to use the value read by the loader from /etc/fstab, or
 1656          * supplied via some other means.  This is the preferred
 1657          * mechanism.
 1658          */
 1659         cp = getenv("vfs.root.mountfrom");
 1660         if (cp != NULL) {
 1661                 error = vfs_mountroot_try(cp);
 1662                 freeenv(cp);
 1663                 if (!error)
 1664                         goto mounted;
 1665         }
 1666 
 1667         /*
 1668          * Try values that may have been computed by code during boot
 1669          */
 1670         if (!vfs_mountroot_try(rootdevnames[0]))
 1671                 goto mounted;
 1672         if (!vfs_mountroot_try(rootdevnames[1]))
 1673                 goto mounted;
 1674 
 1675         /*
 1676          * If we (still) have a compiled-in default, try it.
 1677          */
 1678         if (ctrootdevname != NULL)
 1679                 if (!vfs_mountroot_try(ctrootdevname))
 1680                         goto mounted;
 1681         /*
 1682          * Everything so far has failed, prompt on the console if we haven't
 1683          * already tried that.
 1684          */
 1685         if (!asked)
 1686                 if (!vfs_mountroot_ask())
 1687                         goto mounted;
 1688 
 1689         panic("Root mount failed, startup aborted.");
 1690 
 1691 mounted:
 1692         root_mount_done();
 1693 }
 1694 
 1695 /*
 1696  * Mount (mountfrom) as the root filesystem.
 1697  */
 1698 static int
 1699 vfs_mountroot_try(const char *mountfrom)
 1700 {
 1701         struct mount    *mp;
 1702         char            *vfsname, *path;
 1703         time_t          timebase;
 1704         int             error;
 1705         char            patt[32];
 1706 
 1707         vfsname = NULL;
 1708         path    = NULL;
 1709         mp      = NULL;
 1710         error   = EINVAL;
 1711 
 1712         if (mountfrom == NULL)
 1713                 return (error);         /* don't complain */
 1714         printf("Trying to mount root from %s\n", mountfrom);
 1715 
 1716         /* parse vfs name and path */
 1717         vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
 1718         path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
 1719         vfsname[0] = path[0] = 0;
 1720         sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
 1721         if (sscanf(mountfrom, patt, vfsname, path) < 1)
 1722                 goto out;
 1723 
 1724         if (path[0] == '\0')
 1725                 strcpy(path, ROOTNAME);
 1726 
 1727         error = kernel_vmount(
 1728             MNT_RDONLY | MNT_ROOTFS,
 1729             "fstype", vfsname,
 1730             "fspath", "/",
 1731             "from", path,
 1732             NULL);
 1733         if (error == 0) {
 1734                 /*
 1735                  * We mount devfs prior to mounting the / FS, so the first
 1736                  * entry will typically be devfs.
 1737                  */
 1738                 mp = TAILQ_FIRST(&mountlist);
 1739                 KASSERT(mp != NULL, ("%s: mountlist is empty", __func__));
 1740 
 1741                 /*
 1742                  * Iterate over all currently mounted file systems and use
 1743                  * the time stamp found to check and/or initialize the RTC.
 1744                  * Typically devfs has no time stamp and the only other FS
 1745                  * is the actual / FS.
 1746                  * Call inittodr() only once and pass it the largest of the
 1747                  * timestamps we encounter.
 1748                  */
 1749                 timebase = 0;
 1750                 do {
 1751                         if (mp->mnt_time > timebase)
 1752                                 timebase = mp->mnt_time;
 1753                         mp = TAILQ_NEXT(mp, mnt_list);
 1754                 } while (mp != NULL);
 1755                 inittodr(timebase);
 1756 
 1757                 devfs_fixup(curthread);
 1758         }
 1759 out:
 1760         free(path, M_MOUNT);
 1761         free(vfsname, M_MOUNT);
 1762         return (error);
 1763 }
 1764 
 1765 /*
 1766  * ---------------------------------------------------------------------
 1767  * Interactive root filesystem selection code.
 1768  */
 1769 
 1770 static int
 1771 vfs_mountroot_ask(void)
 1772 {
 1773         char name[128];
 1774 
 1775         for(;;) {
 1776                 printf("\nManual root filesystem specification:\n");
 1777                 printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
 1778 #if defined(__amd64__) || defined(__i386__) || defined(__ia64__)
 1779                 printf("                       eg. ufs:da0s1a\n");
 1780 #else
 1781                 printf("                       eg. ufs:/dev/da0a\n");
 1782 #endif
 1783                 printf("  ?                  List valid disk boot devices\n");
 1784                 printf("  <empty line>       Abort manual input\n");
 1785                 printf("\nmountroot> ");
 1786                 gets(name, sizeof(name), 1);
 1787                 if (name[0] == '\0')
 1788                         return (1);
 1789                 if (name[0] == '?') {
 1790                         printf("\nList of GEOM managed disk devices:\n  ");
 1791                         g_dev_print();
 1792                         continue;
 1793                 }
 1794                 if (!vfs_mountroot_try(name))
 1795                         return (0);
 1796         }
 1797 }
 1798 
 1799 /*
 1800  * ---------------------------------------------------------------------
 1801  * Functions for querying mount options/arguments from filesystems.
 1802  */
 1803 
 1804 /*
 1805  * Check that no unknown options are given
 1806  */
 1807 int
 1808 vfs_filteropt(struct vfsoptlist *opts, const char **legal)
 1809 {
 1810         struct vfsopt *opt;
 1811         char errmsg[255];
 1812         const char **t, *p, *q;
 1813         int ret = 0;
 1814 
 1815         TAILQ_FOREACH(opt, opts, link) {
 1816                 p = opt->name;
 1817                 q = NULL;
 1818                 if (p[0] == 'n' && p[1] == 'o')
 1819                         q = p + 2;
 1820                 for(t = global_opts; *t != NULL; t++) {
 1821                         if (strcmp(*t, p) == 0)
 1822                                 break;
 1823                         if (q != NULL) {
 1824                                 if (strcmp(*t, q) == 0)
 1825                                         break;
 1826                         }
 1827                 }
 1828                 if (*t != NULL)
 1829                         continue;
 1830                 for(t = legal; *t != NULL; t++) {
 1831                         if (strcmp(*t, p) == 0)
 1832                                 break;
 1833                         if (q != NULL) {
 1834                                 if (strcmp(*t, q) == 0)
 1835                                         break;
 1836                         }
 1837                 }
 1838                 if (*t != NULL)
 1839                         continue;
 1840                 snprintf(errmsg, sizeof(errmsg),
 1841                     "mount option <%s> is unknown", p);
 1842                 printf("%s\n", errmsg);
 1843                 ret = EINVAL;
 1844         }
 1845         if (ret != 0) {
 1846                 TAILQ_FOREACH(opt, opts, link) {
 1847                         if (strcmp(opt->name, "errmsg") == 0) {
 1848                                 strncpy((char *)opt->value, errmsg, opt->len);
 1849                         }
 1850                 }
 1851         }
 1852         return (ret);
 1853 }
 1854 
 1855 /*
 1856  * Get a mount option by its name.
 1857  *
 1858  * Return 0 if the option was found, ENOENT otherwise.
 1859  * If len is non-NULL it will be filled with the length
 1860  * of the option. If buf is non-NULL, it will be filled
 1861  * with the address of the option.
 1862  */
 1863 int
 1864 vfs_getopt(opts, name, buf, len)
 1865         struct vfsoptlist *opts;
 1866         const char *name;
 1867         void **buf;
 1868         int *len;
 1869 {
 1870         struct vfsopt *opt;
 1871 
 1872         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 1873 
 1874         TAILQ_FOREACH(opt, opts, link) {
 1875                 if (strcmp(name, opt->name) == 0) {
 1876                         if (len != NULL)
 1877                                 *len = opt->len;
 1878                         if (buf != NULL)
 1879                                 *buf = opt->value;
 1880                         return (0);
 1881                 }
 1882         }
 1883         return (ENOENT);
 1884 }
 1885 
 1886 static int
 1887 vfs_getopt_pos(struct vfsoptlist *opts, const char *name)
 1888 {
 1889         struct vfsopt *opt;
 1890         int i;
 1891 
 1892         if (opts == NULL)
 1893                 return (-1);
 1894 
 1895         i = 0;
 1896         TAILQ_FOREACH(opt, opts, link) {
 1897                 if (strcmp(name, opt->name) == 0)
 1898                         return (i);
 1899                 ++i;
 1900         }
 1901         return (-1);
 1902 }
 1903 
 1904 char *
 1905 vfs_getopts(struct vfsoptlist *opts, const char *name, int *error)
 1906 {
 1907         struct vfsopt *opt;
 1908 
 1909         *error = 0;
 1910         TAILQ_FOREACH(opt, opts, link) {
 1911                 if (strcmp(name, opt->name) != 0)
 1912                         continue;
 1913                 if (((char *)opt->value)[opt->len - 1] != '\0') {
 1914                         *error = EINVAL;
 1915                         return (NULL);
 1916                 }
 1917                 return (opt->value);
 1918         }
 1919         *error = ENOENT;
 1920         return (NULL);
 1921 }
 1922 
 1923 int
 1924 vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val)
 1925 {
 1926         struct vfsopt *opt;
 1927 
 1928         TAILQ_FOREACH(opt, opts, link) {
 1929                 if (strcmp(name, opt->name) == 0) {
 1930                         if (w != NULL)
 1931                                 *w |= val;
 1932                         return (1);
 1933                 }
 1934         }
 1935         if (w != NULL)
 1936                 *w &= ~val;
 1937         return (0);
 1938 }
 1939 
 1940 int
 1941 vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...)
 1942 {
 1943         va_list ap;
 1944         struct vfsopt *opt;
 1945         int ret;
 1946 
 1947         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 1948 
 1949         TAILQ_FOREACH(opt, opts, link) {
 1950                 if (strcmp(name, opt->name) != 0)
 1951                         continue;
 1952                 if (opt->len == 0 || opt->value == NULL)
 1953                         return (0);
 1954                 if (((char *)opt->value)[opt->len - 1] != '\0')
 1955                         return (0);
 1956                 va_start(ap, fmt);
 1957                 ret = vsscanf(opt->value, fmt, ap);
 1958                 va_end(ap);
 1959                 return (ret);
 1960         }
 1961         return (0);
 1962 }
 1963 
 1964 /*
 1965  * Find and copy a mount option.
 1966  *
 1967  * The size of the buffer has to be specified
 1968  * in len, if it is not the same length as the
 1969  * mount option, EINVAL is returned.
 1970  * Returns ENOENT if the option is not found.
 1971  */
 1972 int
 1973 vfs_copyopt(opts, name, dest, len)
 1974         struct vfsoptlist *opts;
 1975         const char *name;
 1976         void *dest;
 1977         int len;
 1978 {
 1979         struct vfsopt *opt;
 1980 
 1981         KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
 1982 
 1983         TAILQ_FOREACH(opt, opts, link) {
 1984                 if (strcmp(name, opt->name) == 0) {
 1985                         if (len != opt->len)
 1986                                 return (EINVAL);
 1987                         bcopy(opt->value, dest, opt->len);
 1988                         return (0);
 1989                 }
 1990         }
 1991         return (ENOENT);
 1992 }
 1993 
 1994 /*
 1995  * This is a helper function for filesystems to traverse their
 1996  * vnodes.  See MNT_VNODE_FOREACH() in sys/mount.h
 1997  */
 1998 
 1999 struct vnode *
 2000 __mnt_vnode_next(struct vnode **mvp, struct mount *mp)
 2001 {
 2002         struct vnode *vp;
 2003 
 2004         mtx_assert(MNT_MTX(mp), MA_OWNED);
 2005 
 2006         KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 2007         if ((*mvp)->v_yield++ == 500) {
 2008                 MNT_IUNLOCK(mp);
 2009                 (*mvp)->v_yield = 0;
 2010                 uio_yield();
 2011                 MNT_ILOCK(mp);
 2012         }
 2013         vp = TAILQ_NEXT(*mvp, v_nmntvnodes);
 2014         while (vp != NULL && vp->v_type == VMARKER)
 2015                 vp = TAILQ_NEXT(vp, v_nmntvnodes);
 2016 
 2017         /* Check if we are done */
 2018         if (vp == NULL) {
 2019                 __mnt_vnode_markerfree(mvp, mp);
 2020                 return (NULL);
 2021         }
 2022         TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
 2023         TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
 2024         return (vp);
 2025 }
 2026 
 2027 struct vnode *
 2028 __mnt_vnode_first(struct vnode **mvp, struct mount *mp)
 2029 {
 2030         struct vnode *vp;
 2031 
 2032         mtx_assert(MNT_MTX(mp), MA_OWNED);
 2033 
 2034         vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
 2035         while (vp != NULL && vp->v_type == VMARKER)
 2036                 vp = TAILQ_NEXT(vp, v_nmntvnodes);
 2037 
 2038         /* Check if we are done */
 2039         if (vp == NULL) {
 2040                 *mvp = NULL;
 2041                 return (NULL);
 2042         }
 2043         mp->mnt_holdcnt++;
 2044         MNT_IUNLOCK(mp);
 2045         *mvp = (struct vnode *) malloc(sizeof(struct vnode),
 2046                                        M_VNODE_MARKER,
 2047                                        M_WAITOK | M_ZERO);
 2048         MNT_ILOCK(mp);
 2049         (*mvp)->v_type = VMARKER;
 2050 
 2051         vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
 2052         while (vp != NULL && vp->v_type == VMARKER)
 2053                 vp = TAILQ_NEXT(vp, v_nmntvnodes);
 2054 
 2055         /* Check if we are done */
 2056         if (vp == NULL) {
 2057                 MNT_IUNLOCK(mp);
 2058                 free(*mvp, M_VNODE_MARKER);
 2059                 MNT_ILOCK(mp);
 2060                 *mvp = NULL;
 2061                 mp->mnt_holdcnt--;
 2062                 if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0)
 2063                         wakeup(&mp->mnt_holdcnt);
 2064                 return (NULL);
 2065         }
 2066         mp->mnt_markercnt++;
 2067         (*mvp)->v_mount = mp;
 2068         TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
 2069         return (vp);
 2070 }
 2071 
 2072 
 2073 void
 2074 __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp)
 2075 {
 2076 
 2077         if (*mvp == NULL)
 2078                 return;
 2079 
 2080         mtx_assert(MNT_MTX(mp), MA_OWNED);
 2081 
 2082         KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 2083         TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
 2084         MNT_IUNLOCK(mp);
 2085         free(*mvp, M_VNODE_MARKER);
 2086         MNT_ILOCK(mp);
 2087         *mvp = NULL;
 2088 
 2089         mp->mnt_markercnt--;
 2090         mp->mnt_holdcnt--;
 2091         if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0)
 2092                 wakeup(&mp->mnt_holdcnt);
 2093 }
 2094 
 2095 
 2096 int
 2097 __vfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
 2098 {
 2099         int error;
 2100 
 2101         error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat, td);
 2102         if (sbp != &mp->mnt_stat)
 2103                 *sbp = mp->mnt_stat;
 2104         return (error);
 2105 }
 2106 
 2107 void
 2108 vfs_mountedfrom(struct mount *mp, const char *from)
 2109 {
 2110 
 2111         bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname);
 2112         strlcpy(mp->mnt_stat.f_mntfromname, from,
 2113             sizeof mp->mnt_stat.f_mntfromname);
 2114 }
 2115 
 2116 /*
 2117  * ---------------------------------------------------------------------
 2118  * This is the api for building mount args and mounting filesystems from
 2119  * inside the kernel.
 2120  *
 2121  * The API works by accumulation of individual args.  First error is
 2122  * latched.
 2123  *
 2124  * XXX: should be documented in new manpage kernel_mount(9)
 2125  */
 2126 
 2127 /* A memory allocation which must be freed when we are done */
 2128 struct mntaarg {
 2129         SLIST_ENTRY(mntaarg)    next;
 2130 };
 2131 
 2132 /* The header for the mount arguments */
 2133 struct mntarg {
 2134         struct iovec *v;
 2135         int len;
 2136         int error;
 2137         SLIST_HEAD(, mntaarg)   list;
 2138 };
 2139 
 2140 /*
 2141  * Add a boolean argument.
 2142  *
 2143  * flag is the boolean value.
 2144  * name must start with "no".
 2145  */
 2146 struct mntarg *
 2147 mount_argb(struct mntarg *ma, int flag, const char *name)
 2148 {
 2149 
 2150         KASSERT(name[0] == 'n' && name[1] == 'o',
 2151             ("mount_argb(...,%s): name must start with 'no'", name));
 2152 
 2153         return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0));
 2154 }
 2155 
 2156 /*
 2157  * Add an argument printf style
 2158  */
 2159 struct mntarg *
 2160 mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...)
 2161 {
 2162         va_list ap;
 2163         struct mntaarg *maa;
 2164         struct sbuf *sb;
 2165         int len;
 2166 
 2167         if (ma == NULL) {
 2168                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2169                 SLIST_INIT(&ma->list);
 2170         }
 2171         if (ma->error)
 2172                 return (ma);
 2173 
 2174         ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 2175             M_MOUNT, M_WAITOK);
 2176         ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 2177         ma->v[ma->len].iov_len = strlen(name) + 1;
 2178         ma->len++;
 2179 
 2180         sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
 2181         va_start(ap, fmt);
 2182         sbuf_vprintf(sb, fmt, ap);
 2183         va_end(ap);
 2184         sbuf_finish(sb);
 2185         len = sbuf_len(sb) + 1;
 2186         maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 2187         SLIST_INSERT_HEAD(&ma->list, maa, next);
 2188         bcopy(sbuf_data(sb), maa + 1, len);
 2189         sbuf_delete(sb);
 2190 
 2191         ma->v[ma->len].iov_base = maa + 1;
 2192         ma->v[ma->len].iov_len = len;
 2193         ma->len++;
 2194 
 2195         return (ma);
 2196 }
 2197 
 2198 /*
 2199  * Add an argument which is a userland string.
 2200  */
 2201 struct mntarg *
 2202 mount_argsu(struct mntarg *ma, const char *name, const void *val, int len)
 2203 {
 2204         struct mntaarg *maa;
 2205         char *tbuf;
 2206 
 2207         if (val == NULL)
 2208                 return (ma);
 2209         if (ma == NULL) {
 2210                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2211                 SLIST_INIT(&ma->list);
 2212         }
 2213         if (ma->error)
 2214                 return (ma);
 2215         maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 2216         SLIST_INSERT_HEAD(&ma->list, maa, next);
 2217         tbuf = (void *)(maa + 1);
 2218         ma->error = copyinstr(val, tbuf, len, NULL);
 2219         return (mount_arg(ma, name, tbuf, -1));
 2220 }
 2221 
 2222 /*
 2223  * Plain argument.
 2224  *
 2225  * If length is -1, use printf.
 2226  */
 2227 struct mntarg *
 2228 mount_arg(struct mntarg *ma, const char *name, const void *val, int len)
 2229 {
 2230 
 2231         if (ma == NULL) {
 2232                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2233                 SLIST_INIT(&ma->list);
 2234         }
 2235         if (ma->error)
 2236                 return (ma);
 2237 
 2238         ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 2239             M_MOUNT, M_WAITOK);
 2240         ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 2241         ma->v[ma->len].iov_len = strlen(name) + 1;
 2242         ma->len++;
 2243 
 2244         ma->v[ma->len].iov_base = (void *)(uintptr_t)val;
 2245         if (len < 0)
 2246                 ma->v[ma->len].iov_len = strlen(val) + 1;
 2247         else
 2248                 ma->v[ma->len].iov_len = len;
 2249         ma->len++;
 2250         return (ma);
 2251 }
 2252 
 2253 /*
 2254  * Free a mntarg structure
 2255  */
 2256 static void
 2257 free_mntarg(struct mntarg *ma)
 2258 {
 2259         struct mntaarg *maa;
 2260 
 2261         while (!SLIST_EMPTY(&ma->list)) {
 2262                 maa = SLIST_FIRST(&ma->list);
 2263                 SLIST_REMOVE_HEAD(&ma->list, next);
 2264                 free(maa, M_MOUNT);
 2265         }
 2266         free(ma->v, M_MOUNT);
 2267         free(ma, M_MOUNT);
 2268 }
 2269 
 2270 /*
 2271  * Mount a filesystem
 2272  */
 2273 int
 2274 kernel_mount(struct mntarg *ma, int flags)
 2275 {
 2276         struct uio auio;
 2277         int error;
 2278 
 2279         KASSERT(ma != NULL, ("kernel_mount NULL ma"));
 2280         KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v"));
 2281         KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len));
 2282 
 2283         auio.uio_iov = ma->v;
 2284         auio.uio_iovcnt = ma->len;
 2285         auio.uio_segflg = UIO_SYSSPACE;
 2286 
 2287         error = ma->error;
 2288         if (!error)
 2289                 error = vfs_donmount(curthread, flags, &auio);
 2290         free_mntarg(ma);
 2291         return (error);
 2292 }
 2293 
 2294 /*
 2295  * A printflike function to mount a filesystem.
 2296  */
 2297 int
 2298 kernel_vmount(int flags, ...)
 2299 {
 2300         struct mntarg *ma = NULL;
 2301         va_list ap;
 2302         const char *cp;
 2303         const void *vp;
 2304         int error;
 2305 
 2306         va_start(ap, flags);
 2307         for (;;) {
 2308                 cp = va_arg(ap, const char *);
 2309                 if (cp == NULL)
 2310                         break;
 2311                 vp = va_arg(ap, const void *);
 2312                 ma = mount_arg(ma, cp, vp, -1);
 2313         }
 2314         va_end(ap);
 2315 
 2316         error = kernel_mount(ma, flags);
 2317         return (error);
 2318 }

Cache object: 36e5fe63eec8e86e93c1c681f9c10b11


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.