The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_mount.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1999-2004 Poul-Henning Kamp
    3  * Copyright (c) 1999 Michael Smith
    4  * Copyright (c) 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/7.4/sys/kern/vfs_mount.c 198244 2009-10-19 19:11:00Z gallatin $");
   39 
   40 #include <sys/param.h>
   41 #include <sys/conf.h>
   42 #include <sys/clock.h>
   43 #include <sys/jail.h>
   44 #include <sys/kernel.h>
   45 #include <sys/libkern.h>
   46 #include <sys/malloc.h>
   47 #include <sys/mount.h>
   48 #include <sys/mutex.h>
   49 #include <sys/namei.h>
   50 #include <sys/priv.h>
   51 #include <sys/proc.h>
   52 #include <sys/filedesc.h>
   53 #include <sys/reboot.h>
   54 #include <sys/syscallsubr.h>
   55 #include <sys/sysproto.h>
   56 #include <sys/sx.h>
   57 #include <sys/sysctl.h>
   58 #include <sys/sysent.h>
   59 #include <sys/systm.h>
   60 #include <sys/vnode.h>
   61 #include <vm/uma.h>
   62 
   63 #include <geom/geom.h>
   64 
   65 #include <machine/stdarg.h>
   66 
   67 #include <security/audit/audit.h>
   68 #include <security/mac/mac_framework.h>
   69 
   70 #include "opt_rootdevname.h"
   71 #include "opt_ddb.h"
   72 #include "opt_mac.h"
   73 
   74 #ifdef DDB
   75 #include <ddb/ddb.h>
   76 #endif
   77 
   78 #define ROOTNAME                "root_device"
   79 #define VFS_MOUNTARG_SIZE_MAX   (1024 * 64)
   80 
   81 static int      vfs_domount(struct thread *td, const char *fstype,
   82                     char *fspath, int fsflags, void *fsdata);
   83 static int      vfs_mountroot_ask(void);
   84 static int      vfs_mountroot_try(const char *mountfrom);
   85 static void     free_mntarg(struct mntarg *ma);
   86 static int      vfs_getopt_pos(struct vfsoptlist *opts, const char *name);
   87 
   88 static int      usermount = 0;
   89 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
   90     "Unprivileged users may mount and unmount file systems");
   91 
   92 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
   93 MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker");
   94 static uma_zone_t mount_zone;
   95 
   96 /* List of mounted filesystems. */
   97 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
   98 
   99 /* For any iteration/modification of mountlist */
  100 struct mtx mountlist_mtx;
  101 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF);
  102 
  103 /*
  104  * The vnode of the system's root (/ in the filesystem, without chroot
  105  * active.)
  106  */
  107 struct vnode    *rootvnode;
  108 
  109 /*
  110  * The root filesystem is detailed in the kernel environment variable
  111  * vfs.root.mountfrom, which is expected to be in the general format
  112  *
  113  * <vfsname>:[<path>]
  114  * vfsname   := the name of a VFS known to the kernel and capable
  115  *              of being mounted as root
  116  * path      := disk device name or other data used by the filesystem
  117  *              to locate its physical store
  118  */
  119 
  120 /*
  121  * Global opts, taken by all filesystems
  122  */
  123 static const char *global_opts[] = {
  124         "errmsg",
  125         "fstype",
  126         "fspath",
  127         "ro",
  128         "rw",
  129         "nosuid",
  130         "noexec",
  131         NULL
  132 };
  133 
  134 /*
  135  * The root specifiers we will try if RB_CDROM is specified.
  136  */
  137 static char *cdrom_rootdevnames[] = {
  138         "cd9660:cd0",
  139         "cd9660:acd0",
  140         NULL
  141 };
  142 
  143 /* legacy find-root code */
  144 char            *rootdevnames[2] = {NULL, NULL};
  145 #ifndef ROOTDEVNAME
  146 #  define ROOTDEVNAME NULL
  147 #endif
  148 static const char       *ctrootdevname = ROOTDEVNAME;
  149 
  150 /*
  151  * ---------------------------------------------------------------------
  152  * Functions for building and sanitizing the mount options
  153  */
  154 
  155 /* Remove one mount option. */
  156 static void
  157 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
  158 {
  159 
  160         TAILQ_REMOVE(opts, opt, link);
  161         free(opt->name, M_MOUNT);
  162         if (opt->value != NULL)
  163                 free(opt->value, M_MOUNT);
  164 #ifdef INVARIANTS
  165         else if (opt->len != 0)
  166                 panic("%s: mount option with NULL value but length != 0",
  167                     __func__);
  168 #endif
  169         free(opt, M_MOUNT);
  170 }
  171 
  172 /* Release all resources related to the mount options. */
  173 void
  174 vfs_freeopts(struct vfsoptlist *opts)
  175 {
  176         struct vfsopt *opt;
  177 
  178         while (!TAILQ_EMPTY(opts)) {
  179                 opt = TAILQ_FIRST(opts);
  180                 vfs_freeopt(opts, opt);
  181         }
  182         free(opts, M_MOUNT);
  183 }
  184 
  185 void
  186 vfs_deleteopt(struct vfsoptlist *opts, const char *name)
  187 {
  188         struct vfsopt *opt, *temp;
  189 
  190         if (opts == NULL)
  191                 return;
  192         TAILQ_FOREACH_SAFE(opt, opts, link, temp)  {
  193                 if (strcmp(opt->name, name) == 0)
  194                         vfs_freeopt(opts, opt);
  195         }
  196 }
  197 
  198 /*
  199  * Check if options are equal (with or without the "no" prefix).
  200  */
  201 static int
  202 vfs_equalopts(const char *opt1, const char *opt2)
  203 {
  204 
  205         /* "opt" vs. "opt" or "noopt" vs. "noopt" */
  206         if (strcmp(opt1, opt2) == 0)
  207                 return (1);
  208         /* "noopt" vs. "opt" */
  209         if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
  210                 return (1);
  211         /* "opt" vs. "noopt" */
  212         if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
  213                 return (1);
  214         return (0);
  215 }
  216 
  217 /*
  218  * If a mount option is specified several times,
  219  * (with or without the "no" prefix) only keep
  220  * the last occurence of it.
  221  */
  222 static void
  223 vfs_sanitizeopts(struct vfsoptlist *opts)
  224 {
  225         struct vfsopt *opt, *opt2, *tmp;
  226 
  227         TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
  228                 opt2 = TAILQ_PREV(opt, vfsoptlist, link);
  229                 while (opt2 != NULL) {
  230                         if (vfs_equalopts(opt->name, opt2->name)) {
  231                                 tmp = TAILQ_PREV(opt2, vfsoptlist, link);
  232                                 vfs_freeopt(opts, opt2);
  233                                 opt2 = tmp;
  234                         } else {
  235                                 opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
  236                         }
  237                 }
  238         }
  239 }
  240 
  241 /*
  242  * Build a linked list of mount options from a struct uio.
  243  */
  244 static int
  245 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
  246 {
  247         struct vfsoptlist *opts;
  248         struct vfsopt *opt;
  249         size_t memused;
  250         unsigned int i, iovcnt;
  251         int error, namelen, optlen;
  252 
  253         opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
  254         TAILQ_INIT(opts);
  255         memused = 0;
  256         iovcnt = auio->uio_iovcnt;
  257         for (i = 0; i < iovcnt; i += 2) {
  258                 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  259                 namelen = auio->uio_iov[i].iov_len;
  260                 optlen = auio->uio_iov[i + 1].iov_len;
  261                 opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
  262                 opt->value = NULL;
  263                 opt->len = 0;
  264 
  265                 /*
  266                  * Do this early, so jumps to "bad" will free the current
  267                  * option.
  268                  */
  269                 TAILQ_INSERT_TAIL(opts, opt, link);
  270                 memused += sizeof(struct vfsopt) + optlen + namelen;
  271 
  272                 /*
  273                  * Avoid consuming too much memory, and attempts to overflow
  274                  * memused.
  275                  */
  276                 if (memused > VFS_MOUNTARG_SIZE_MAX ||
  277                     optlen > VFS_MOUNTARG_SIZE_MAX ||
  278                     namelen > VFS_MOUNTARG_SIZE_MAX) {
  279                         error = EINVAL;
  280                         goto bad;
  281                 }
  282 
  283                 if (auio->uio_segflg == UIO_SYSSPACE) {
  284                         bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
  285                 } else {
  286                         error = copyin(auio->uio_iov[i].iov_base, opt->name,
  287                             namelen);
  288                         if (error)
  289                                 goto bad;
  290                 }
  291                 /* Ensure names are null-terminated strings. */
  292                 if (opt->name[namelen - 1] != '\0') {
  293                         error = EINVAL;
  294                         goto bad;
  295                 }
  296                 if (optlen != 0) {
  297                         opt->len = optlen;
  298                         opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
  299                         if (auio->uio_segflg == UIO_SYSSPACE) {
  300                                 bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
  301                                     optlen);
  302                         } else {
  303                                 error = copyin(auio->uio_iov[i + 1].iov_base,
  304                                     opt->value, optlen);
  305                                 if (error)
  306                                         goto bad;
  307                         }
  308                 }
  309         }
  310         vfs_sanitizeopts(opts);
  311         *options = opts;
  312         return (0);
  313 bad:
  314         vfs_freeopts(opts);
  315         return (error);
  316 }
  317 
  318 /*
  319  * Merge the old mount options with the new ones passed
  320  * in the MNT_UPDATE case.
  321  */
  322 static void
  323 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
  324 {
  325         struct vfsopt *opt, *opt2, *new;
  326 
  327         TAILQ_FOREACH(opt, opts, link) {
  328                 /*
  329                  * Check that this option hasn't been redefined
  330                  * nor cancelled with a "no" mount option.
  331                  */
  332                 opt2 = TAILQ_FIRST(toopts);
  333                 while (opt2 != NULL) {
  334                         if (strcmp(opt2->name, opt->name) == 0)
  335                                 goto next;
  336                         if (strncmp(opt2->name, "no", 2) == 0 &&
  337                             strcmp(opt2->name + 2, opt->name) == 0) {
  338                                 vfs_freeopt(toopts, opt2);
  339                                 goto next;
  340                         }
  341                         opt2 = TAILQ_NEXT(opt2, link);
  342                 }
  343                 /* We want this option, duplicate it. */
  344                 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  345                 new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
  346                 strcpy(new->name, opt->name);
  347                 if (opt->len != 0) {
  348                         new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
  349                         bcopy(opt->value, new->value, opt->len);
  350                 } else {
  351                         new->value = NULL;
  352                 }
  353                 new->len = opt->len;
  354                 TAILQ_INSERT_TAIL(toopts, new, link);
  355 next:
  356                 continue;
  357         }
  358 }
  359 
  360 /*
  361  * Mount a filesystem.
  362  */
  363 int
  364 nmount(td, uap)
  365         struct thread *td;
  366         struct nmount_args /* {
  367                 struct iovec *iovp;
  368                 unsigned int iovcnt;
  369                 int flags;
  370         } */ *uap;
  371 {
  372         struct uio *auio;
  373         struct iovec *iov;
  374         unsigned int i;
  375         int error;
  376         u_int iovcnt;
  377 
  378         AUDIT_ARG(fflags, uap->flags);
  379 
  380         /*
  381          * Filter out MNT_ROOTFS.  We do not want clients of nmount() in
  382          * userspace to set this flag, but we must filter it out if we want
  383          * MNT_UPDATE on the root file system to work.
  384          * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
  385          */
  386         uap->flags &= ~MNT_ROOTFS;
  387 
  388         iovcnt = uap->iovcnt;
  389         /*
  390          * Check that we have an even number of iovec's
  391          * and that we have at least two options.
  392          */
  393         if ((iovcnt & 1) || (iovcnt < 4))
  394                 return (EINVAL);
  395 
  396         error = copyinuio(uap->iovp, iovcnt, &auio);
  397         if (error)
  398                 return (error);
  399         iov = auio->uio_iov;
  400         for (i = 0; i < iovcnt; i++) {
  401                 if (iov->iov_len > MMAXOPTIONLEN) {
  402                         free(auio, M_IOV);
  403                         return (EINVAL);
  404                 }
  405                 iov++;
  406         }
  407         error = vfs_donmount(td, uap->flags, auio);
  408 
  409         free(auio, M_IOV);
  410         return (error);
  411 }
  412 
  413 /*
  414  * ---------------------------------------------------------------------
  415  * Various utility functions
  416  */
  417 
  418 void
  419 vfs_ref(struct mount *mp)
  420 {
  421 
  422         MNT_ILOCK(mp);
  423         MNT_REF(mp);
  424         MNT_IUNLOCK(mp);
  425 }
  426 
  427 void
  428 vfs_rel(struct mount *mp)
  429 {
  430 
  431         MNT_ILOCK(mp);
  432         MNT_REL(mp);
  433         MNT_IUNLOCK(mp);
  434 }
  435 
  436 static int
  437 mount_init(void *mem, int size, int flags)
  438 {
  439         struct mount *mp;
  440 
  441         mp = (struct mount *)mem;
  442         mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
  443         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
  444         lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
  445         return (0);
  446 }
  447 
  448 static void
  449 mount_fini(void *mem, int size)
  450 {
  451         struct mount *mp;
  452 
  453         mp = (struct mount *)mem;
  454         lockdestroy(&mp->mnt_explock);
  455         lockdestroy(&mp->mnt_lock);
  456         mtx_destroy(&mp->mnt_mtx);
  457 }
  458 
  459 /*
  460  * Allocate and initialize the mount point struct.
  461  */
  462 struct mount *
  463 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp,
  464     const char *fspath, struct thread *td)
  465 {
  466         struct mount *mp;
  467 
  468         mp = uma_zalloc(mount_zone, M_WAITOK);
  469         bzero(&mp->mnt_startzero,
  470             __rangeof(struct mount, mnt_startzero, mnt_endzero));
  471         TAILQ_INIT(&mp->mnt_nvnodelist);
  472         mp->mnt_nvnodelistsize = 0;
  473         mp->mnt_ref = 0;
  474         (void) vfs_busy(mp, LK_NOWAIT, 0, td);
  475         mp->mnt_op = vfsp->vfc_vfsops;
  476         mp->mnt_vfc = vfsp;
  477         vfsp->vfc_refcount++;   /* XXX Unlocked */
  478         mp->mnt_stat.f_type = vfsp->vfc_typenum;
  479         mp->mnt_gen++;
  480         strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
  481         mp->mnt_vnodecovered = vp;
  482         mp->mnt_cred = crdup(td->td_ucred);
  483         mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
  484         strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
  485         mp->mnt_iosize_max = DFLTPHYS;
  486 #ifdef MAC
  487         mac_init_mount(mp);
  488         mac_create_mount(td->td_ucred, mp);
  489 #endif
  490         arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
  491         return (mp);
  492 }
  493 
  494 /*
  495  * Destroy the mount struct previously allocated by vfs_mount_alloc().
  496  */
  497 void
  498 vfs_mount_destroy(struct mount *mp)
  499 {
  500         int i;
  501 
  502         MNT_ILOCK(mp);
  503         mp->mnt_kern_flag |= MNTK_REFEXPIRE;
  504         if (mp->mnt_kern_flag & MNTK_MWAIT) {
  505                 mp->mnt_kern_flag &= ~MNTK_MWAIT;
  506                 wakeup(mp);
  507         }
  508         for (i = 0; mp->mnt_ref && i < 3; i++)
  509                 msleep(mp, MNT_MTX(mp), PVFS, "mntref", hz);
  510         /*
  511          * This will always cause a 3 second delay in rebooting due to
  512          * refs on the root mountpoint that never go away.  Most of these
  513          * are held by init which never exits.
  514          */
  515         if (i == 3 && (!rebooting || bootverbose))
  516                 printf("Mount point %s had %d dangling refs\n",
  517                     mp->mnt_stat.f_mntonname, mp->mnt_ref);
  518         if (mp->mnt_holdcnt != 0) {
  519                 printf("Waiting for mount point to be unheld\n");
  520                 while (mp->mnt_holdcnt != 0) {
  521                         mp->mnt_holdcntwaiters++;
  522                         msleep(&mp->mnt_holdcnt, MNT_MTX(mp),
  523                                PZERO, "mntdestroy", 0);
  524                         mp->mnt_holdcntwaiters--;
  525                 }
  526                 printf("mount point unheld\n");
  527         }
  528         if (mp->mnt_writeopcount > 0) {
  529                 printf("Waiting for mount point write ops\n");
  530                 while (mp->mnt_writeopcount > 0) {
  531                         mp->mnt_kern_flag |= MNTK_SUSPEND;
  532                         msleep(&mp->mnt_writeopcount,
  533                                MNT_MTX(mp),
  534                                PZERO, "mntdestroy2", 0);
  535                 }
  536                 printf("mount point write ops completed\n");
  537         }
  538         if (mp->mnt_secondary_writes > 0) {
  539                 printf("Waiting for mount point secondary write ops\n");
  540                 while (mp->mnt_secondary_writes > 0) {
  541                         mp->mnt_kern_flag |= MNTK_SUSPEND;
  542                         msleep(&mp->mnt_secondary_writes,
  543                                MNT_MTX(mp),
  544                                PZERO, "mntdestroy3", 0);
  545                 }
  546                 printf("mount point secondary write ops completed\n");
  547         }
  548         MNT_IUNLOCK(mp);
  549         mp->mnt_vfc->vfc_refcount--;
  550         if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
  551                 struct vnode *vp;
  552 
  553                 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes)
  554                         vprint("", vp);
  555                 panic("unmount: dangling vnode");
  556         }
  557         MNT_ILOCK(mp);
  558         if (mp->mnt_kern_flag & MNTK_MWAIT)
  559                 wakeup(mp);
  560         if (mp->mnt_writeopcount != 0)
  561                 panic("vfs_mount_destroy: nonzero writeopcount");
  562         if (mp->mnt_secondary_writes != 0)
  563                 panic("vfs_mount_destroy: nonzero secondary_writes");
  564         if (mp->mnt_nvnodelistsize != 0)
  565                 panic("vfs_mount_destroy: nonzero nvnodelistsize");
  566         mp->mnt_writeopcount = -1000;
  567         mp->mnt_nvnodelistsize = -1000;
  568         mp->mnt_secondary_writes = -1000;
  569         MNT_IUNLOCK(mp);
  570 #ifdef MAC
  571         mac_destroy_mount(mp);
  572 #endif
  573         if (mp->mnt_opt != NULL)
  574                 vfs_freeopts(mp->mnt_opt);
  575         crfree(mp->mnt_cred);
  576         uma_zfree(mount_zone, mp);
  577 }
  578 
  579 int
  580 vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions)
  581 {
  582         struct vfsoptlist *optlist;
  583         struct vfsopt *opt, *noro_opt, *tmp_opt;
  584         char *fstype, *fspath, *errmsg;
  585         int error, fstypelen, fspathlen, errmsg_len, errmsg_pos;
  586         int has_rw, has_noro;
  587 
  588         errmsg = NULL;
  589         errmsg_len = 0;
  590         errmsg_pos = -1;
  591         has_rw = 0;
  592         has_noro = 0;
  593 
  594         error = vfs_buildopts(fsoptions, &optlist);
  595         if (error)
  596                 return (error);
  597 
  598         if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0)
  599                 errmsg_pos = vfs_getopt_pos(optlist, "errmsg");
  600 
  601         /*
  602          * We need these two options before the others,
  603          * and they are mandatory for any filesystem.
  604          * Ensure they are NUL terminated as well.
  605          */
  606         fstypelen = 0;
  607         error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
  608         if (error || fstype[fstypelen - 1] != '\0') {
  609                 error = EINVAL;
  610                 if (errmsg != NULL)
  611                         strncpy(errmsg, "Invalid fstype", errmsg_len);
  612                 goto bail;
  613         }
  614         fspathlen = 0;
  615         error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
  616         if (error || fspath[fspathlen - 1] != '\0') {
  617                 error = EINVAL;
  618                 if (errmsg != NULL)
  619                         strncpy(errmsg, "Invalid fspath", errmsg_len);
  620                 goto bail;
  621         }
  622 
  623         /*
  624          * We need to see if we have the "update" option
  625          * before we call vfs_domount(), since vfs_domount() has special
  626          * logic based on MNT_UPDATE.  This is very important
  627          * when we want to update the root filesystem.
  628          */
  629         TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) {
  630                 if (strcmp(opt->name, "update") == 0) {
  631                         fsflags |= MNT_UPDATE;
  632                         vfs_freeopt(optlist, opt);
  633                 }
  634                 else if (strcmp(opt->name, "async") == 0)
  635                         fsflags |= MNT_ASYNC;
  636                 else if (strcmp(opt->name, "force") == 0) {
  637                         fsflags |= MNT_FORCE;
  638                         vfs_freeopt(optlist, opt);
  639                 }
  640                 else if (strcmp(opt->name, "reload") == 0) {
  641                         fsflags |= MNT_RELOAD;
  642                         vfs_freeopt(optlist, opt);
  643                 }
  644                 else if (strcmp(opt->name, "multilabel") == 0)
  645                         fsflags |= MNT_MULTILABEL;
  646                 else if (strcmp(opt->name, "noasync") == 0)
  647                         fsflags &= ~MNT_ASYNC;
  648                 else if (strcmp(opt->name, "noatime") == 0)
  649                         fsflags |= MNT_NOATIME;
  650                 else if (strcmp(opt->name, "atime") == 0) {
  651                         free(opt->name, M_MOUNT);
  652                         opt->name = strdup("nonoatime", M_MOUNT);
  653                 }
  654                 else if (strcmp(opt->name, "noclusterr") == 0)
  655                         fsflags |= MNT_NOCLUSTERR;
  656                 else if (strcmp(opt->name, "clusterr") == 0) {
  657                         free(opt->name, M_MOUNT);
  658                         opt->name = strdup("nonoclusterr", M_MOUNT);
  659                 }
  660                 else if (strcmp(opt->name, "noclusterw") == 0)
  661                         fsflags |= MNT_NOCLUSTERW;
  662                 else if (strcmp(opt->name, "clusterw") == 0) {
  663                         free(opt->name, M_MOUNT);
  664                         opt->name = strdup("nonoclusterw", M_MOUNT);
  665                 }
  666                 else if (strcmp(opt->name, "noexec") == 0)
  667                         fsflags |= MNT_NOEXEC;
  668                 else if (strcmp(opt->name, "exec") == 0) {
  669                         free(opt->name, M_MOUNT);
  670                         opt->name = strdup("nonoexec", M_MOUNT);
  671                 }
  672                 else if (strcmp(opt->name, "nosuid") == 0)
  673                         fsflags |= MNT_NOSUID;
  674                 else if (strcmp(opt->name, "suid") == 0) {
  675                         free(opt->name, M_MOUNT);
  676                         opt->name = strdup("nonosuid", M_MOUNT);
  677                 }
  678                 else if (strcmp(opt->name, "nosymfollow") == 0)
  679                         fsflags |= MNT_NOSYMFOLLOW;
  680                 else if (strcmp(opt->name, "symfollow") == 0) {
  681                         free(opt->name, M_MOUNT);
  682                         opt->name = strdup("nonosymfollow", M_MOUNT);
  683                 }
  684                 else if (strcmp(opt->name, "noro") == 0) {
  685                         fsflags &= ~MNT_RDONLY;
  686                         has_noro = 1;
  687                 }
  688                 else if (strcmp(opt->name, "rw") == 0) {
  689                         fsflags &= ~MNT_RDONLY;
  690                         has_rw = 1;
  691                 }
  692                 else if (strcmp(opt->name, "ro") == 0)
  693                         fsflags |= MNT_RDONLY;
  694                 else if (strcmp(opt->name, "rdonly") == 0) {
  695                         free(opt->name, M_MOUNT);
  696                         opt->name = strdup("ro", M_MOUNT);
  697                         fsflags |= MNT_RDONLY;
  698                 }
  699                 else if (strcmp(opt->name, "suiddir") == 0)
  700                         fsflags |= MNT_SUIDDIR;
  701                 else if (strcmp(opt->name, "sync") == 0)
  702                         fsflags |= MNT_SYNCHRONOUS;
  703                 else if (strcmp(opt->name, "union") == 0)
  704                         fsflags |= MNT_UNION;
  705         }
  706 
  707         /*
  708          * If "rw" was specified as a mount option, and we
  709          * are trying to update a mount-point from "ro" to "rw",
  710          * we need a mount option "noro", since in vfs_mergeopts(),
  711          * "noro" will cancel "ro", but "rw" will not do anything.
  712          */
  713         if (has_rw && !has_noro) {
  714                 noro_opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  715                 noro_opt->name = strdup("noro", M_MOUNT);
  716                 noro_opt->value = NULL;
  717                 noro_opt->len = 0;
  718                 TAILQ_INSERT_TAIL(optlist, noro_opt, link);
  719         }
  720 
  721         /*
  722          * Be ultra-paranoid about making sure the type and fspath
  723          * variables will fit in our mp buffers, including the
  724          * terminating NUL.
  725          */
  726         if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
  727                 error = ENAMETOOLONG;
  728                 goto bail;
  729         }
  730 
  731         mtx_lock(&Giant);
  732         error = vfs_domount(td, fstype, fspath, fsflags, optlist);
  733         mtx_unlock(&Giant);
  734 bail:
  735         /* copyout the errmsg */
  736         if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt)
  737             && errmsg_len > 0 && errmsg != NULL) {
  738                 if (fsoptions->uio_segflg == UIO_SYSSPACE) {
  739                         bcopy(errmsg,
  740                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
  741                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
  742                 } else {
  743                         copyout(errmsg,
  744                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
  745                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
  746                 }
  747         }
  748 
  749         if (error != 0)
  750                 vfs_freeopts(optlist);
  751         return (error);
  752 }
  753 
  754 /*
  755  * Old mount API.
  756  */
  757 #ifndef _SYS_SYSPROTO_H_
  758 struct mount_args {
  759         char    *type;
  760         char    *path;
  761         int     flags;
  762         caddr_t data;
  763 };
  764 #endif
  765 /* ARGSUSED */
  766 int
  767 mount(td, uap)
  768         struct thread *td;
  769         struct mount_args /* {
  770                 char *type;
  771                 char *path;
  772                 int flags;
  773                 caddr_t data;
  774         } */ *uap;
  775 {
  776         char *fstype;
  777         struct vfsconf *vfsp = NULL;
  778         struct mntarg *ma = NULL;
  779         int error;
  780 
  781         AUDIT_ARG(fflags, uap->flags);
  782 
  783         /*
  784          * Filter out MNT_ROOTFS.  We do not want clients of mount() in
  785          * userspace to set this flag, but we must filter it out if we want
  786          * MNT_UPDATE on the root file system to work.
  787          * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
  788          */
  789         uap->flags &= ~MNT_ROOTFS;
  790 
  791         fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
  792         error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
  793         if (error) {
  794                 free(fstype, M_TEMP);
  795                 return (error);
  796         }
  797 
  798         AUDIT_ARG(text, fstype);
  799         mtx_lock(&Giant);
  800         vfsp = vfs_byname_kld(fstype, td, &error);
  801         free(fstype, M_TEMP);
  802         if (vfsp == NULL) {
  803                 mtx_unlock(&Giant);
  804                 return (ENOENT);
  805         }
  806         if (vfsp->vfc_vfsops->vfs_cmount == NULL) {
  807                 mtx_unlock(&Giant);
  808                 return (EOPNOTSUPP);
  809         }
  810 
  811         ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN);
  812         ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
  813         ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro");
  814         ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid");
  815         ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec");
  816 
  817         error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td);
  818         mtx_unlock(&Giant);
  819         return (error);
  820 }
  821 
  822 
  823 /*
  824  * vfs_domount(): actually attempt a filesystem mount.
  825  */
  826 static int
  827 vfs_domount(
  828         struct thread *td,      /* Calling thread. */
  829         const char *fstype,     /* Filesystem type. */
  830         char *fspath,           /* Mount path. */
  831         int fsflags,            /* Flags common to all filesystems. */
  832         void *fsdata            /* Options local to the filesystem. */
  833         )
  834 {
  835         struct vnode *vp;
  836         struct mount *mp;
  837         struct vfsconf *vfsp;
  838         struct export_args export;
  839         int error, flag = 0;
  840         struct vattr va;
  841         struct nameidata nd;
  842 
  843         mtx_assert(&Giant, MA_OWNED);
  844         /*
  845          * Be ultra-paranoid about making sure the type and fspath
  846          * variables will fit in our mp buffers, including the
  847          * terminating NUL.
  848          */
  849         if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
  850                 return (ENAMETOOLONG);
  851 
  852         if (jailed(td->td_ucred) || usermount == 0) {
  853                 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0)
  854                         return (error);
  855         }
  856 
  857         /*
  858          * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
  859          */
  860         if (fsflags & MNT_EXPORTED) {
  861                 error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED);
  862                 if (error)
  863                         return (error);
  864         }
  865         if (fsflags & MNT_SUIDDIR) {
  866                 error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR);
  867                 if (error)
  868                         return (error);
  869         }
  870         /*
  871          * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users.
  872          */
  873         if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) {
  874                 if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0)
  875                         fsflags |= MNT_NOSUID | MNT_USER;
  876         }
  877 
  878         /* Load KLDs before we lock the covered vnode to avoid reversals. */
  879         vfsp = NULL;
  880         if ((fsflags & MNT_UPDATE) == 0) {
  881                 /* Don't try to load KLDs if we're mounting the root. */
  882                 if (fsflags & MNT_ROOTFS)
  883                         vfsp = vfs_byname(fstype);
  884                 else
  885                         vfsp = vfs_byname_kld(fstype, td, &error);
  886                 if (vfsp == NULL)
  887                         return (ENODEV);
  888                 if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL))
  889                         return (EPERM);
  890         }
  891         /*
  892          * Get vnode to be covered
  893          */
  894         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE,
  895             fspath, td);
  896         if ((error = namei(&nd)) != 0)
  897                 return (error);
  898         NDFREE(&nd, NDF_ONLY_PNBUF);
  899         vp = nd.ni_vp;
  900         if (fsflags & MNT_UPDATE) {
  901                 if ((vp->v_vflag & VV_ROOT) == 0) {
  902                         vput(vp);
  903                         return (EINVAL);
  904                 }
  905                 mp = vp->v_mount;
  906                 MNT_ILOCK(mp);
  907                 flag = mp->mnt_flag;
  908                 /*
  909                  * We only allow the filesystem to be reloaded if it
  910                  * is currently mounted read-only.
  911                  */
  912                 if ((fsflags & MNT_RELOAD) &&
  913                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
  914                         MNT_IUNLOCK(mp);
  915                         vput(vp);
  916                         return (EOPNOTSUPP);    /* Needs translation */
  917                 }
  918                 MNT_IUNLOCK(mp);
  919                 /*
  920                  * Only privileged root, or (if MNT_USER is set) the user that
  921                  * did the original mount is permitted to update it.
  922                  */
  923                 error = vfs_suser(mp, td);
  924                 if (error) {
  925                         vput(vp);
  926                         return (error);
  927                 }
  928                 if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
  929                         vput(vp);
  930                         return (EBUSY);
  931                 }
  932                 VI_LOCK(vp);
  933                 if ((vp->v_iflag & VI_MOUNT) != 0 ||
  934                     vp->v_mountedhere != NULL) {
  935                         VI_UNLOCK(vp);
  936                         vfs_unbusy(mp, td);
  937                         vput(vp);
  938                         return (EBUSY);
  939                 }
  940                 vp->v_iflag |= VI_MOUNT;
  941                 VI_UNLOCK(vp);
  942                 MNT_ILOCK(mp);
  943                 mp->mnt_flag |= fsflags &
  944                     (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS);
  945                 MNT_IUNLOCK(mp);
  946                 VOP_UNLOCK(vp, 0, td);
  947                 mp->mnt_optnew = fsdata;
  948                 vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
  949         } else {
  950                 /*
  951                  * If the user is not root, ensure that they own the directory
  952                  * onto which we are attempting to mount.
  953                  */
  954                 error = VOP_GETATTR(vp, &va, td->td_ucred, td);
  955                 if (error) {
  956                         vput(vp);
  957                         return (error);
  958                 }
  959                 if (va.va_uid != td->td_ucred->cr_uid) {
  960                         error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN,
  961                             0);
  962                         if (error) {
  963                                 vput(vp);
  964                                 return (error);
  965                         }
  966                 }
  967                 error = vinvalbuf(vp, V_SAVE, td, 0, 0);
  968                 if (error != 0) {
  969                         vput(vp);
  970                         return (error);
  971                 }
  972                 if (vp->v_type != VDIR) {
  973                         vput(vp);
  974                         return (ENOTDIR);
  975                 }
  976                 VI_LOCK(vp);
  977                 if ((vp->v_iflag & VI_MOUNT) != 0 ||
  978                     vp->v_mountedhere != NULL) {
  979                         VI_UNLOCK(vp);
  980                         vput(vp);
  981                         return (EBUSY);
  982                 }
  983                 vp->v_iflag |= VI_MOUNT;
  984                 VI_UNLOCK(vp);
  985 
  986                 /*
  987                  * Allocate and initialize the filesystem.
  988                  */
  989                 mp = vfs_mount_alloc(vp, vfsp, fspath, td);
  990                 VOP_UNLOCK(vp, 0, td);
  991 
  992                 /* XXXMAC: pass to vfs_mount_alloc? */
  993                 mp->mnt_optnew = fsdata;
  994         }
  995 
  996         /*
  997          * Set the mount level flags.
  998          */
  999         MNT_ILOCK(mp);
 1000         mp->mnt_flag = (mp->mnt_flag & ~MNT_UPDATEMASK) |
 1001                 (fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS |
 1002                             MNT_RDONLY));
 1003         if ((mp->mnt_flag & MNT_ASYNC) == 0)
 1004                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1005         MNT_IUNLOCK(mp);
 1006         /*
 1007          * Mount the filesystem.
 1008          * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
 1009          * get.  No freeing of cn_pnbuf.
 1010          */
 1011         error = VFS_MOUNT(mp, td);
 1012 
 1013         /*
 1014          * Process the export option only if we are
 1015          * updating mount options.
 1016          */
 1017         if (!error && (fsflags & MNT_UPDATE)) {
 1018                 if (vfs_copyopt(mp->mnt_optnew, "export", &export,
 1019                     sizeof(export)) == 0)
 1020                         error = vfs_export(mp, &export);
 1021         }
 1022 
 1023         if (!error) {
 1024                 if (mp->mnt_opt != NULL)
 1025                         vfs_freeopts(mp->mnt_opt);
 1026                 mp->mnt_opt = mp->mnt_optnew;
 1027                 (void)VFS_STATFS(mp, &mp->mnt_stat, td);
 1028         }
 1029         /*
 1030          * Prevent external consumers of mount options from reading
 1031          * mnt_optnew.
 1032         */
 1033         mp->mnt_optnew = NULL;
 1034         if (mp->mnt_flag & MNT_UPDATE) {
 1035                 MNT_ILOCK(mp);
 1036                 if (error)
 1037                         mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) |
 1038                                 (flag & ~MNT_QUOTA);
 1039                 else
 1040                         mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD |
 1041                                           MNT_FORCE | MNT_SNAPSHOT);
 1042                 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 1043                         mp->mnt_kern_flag |= MNTK_ASYNC;
 1044                 else
 1045                         mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1046                 MNT_IUNLOCK(mp);
 1047                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
 1048                         if (mp->mnt_syncer == NULL)
 1049                                 error = vfs_allocate_syncvnode(mp);
 1050                 } else {
 1051                         if (mp->mnt_syncer != NULL)
 1052                                 vrele(mp->mnt_syncer);
 1053                         mp->mnt_syncer = NULL;
 1054                 }
 1055                 vfs_unbusy(mp, td);
 1056                 VI_LOCK(vp);
 1057                 vp->v_iflag &= ~VI_MOUNT;
 1058                 VI_UNLOCK(vp);
 1059                 vrele(vp);
 1060                 return (error);
 1061         }
 1062         MNT_ILOCK(mp);
 1063         if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 1064                 mp->mnt_kern_flag |= MNTK_ASYNC;
 1065         else
 1066                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1067         MNT_IUNLOCK(mp);
 1068         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1069         /*
 1070          * Put the new filesystem on the mount list after root.
 1071          */
 1072         cache_purge(vp);
 1073         if (!error) {
 1074                 struct vnode *newdp;
 1075 
 1076                 VI_LOCK(vp);
 1077                 vp->v_iflag &= ~VI_MOUNT;
 1078                 VI_UNLOCK(vp);
 1079                 vp->v_mountedhere = mp;
 1080                 mtx_lock(&mountlist_mtx);
 1081                 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 1082                 mtx_unlock(&mountlist_mtx);
 1083                 vfs_event_signal(NULL, VQ_MOUNT, 0);
 1084                 if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp, td))
 1085                         panic("mount: lost mount");
 1086                 VOP_UNLOCK(newdp, 0, td);
 1087                 VOP_UNLOCK(vp, 0, td);
 1088                 mountcheckdirs(vp, newdp);
 1089                 vrele(newdp);
 1090                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
 1091                         error = vfs_allocate_syncvnode(mp);
 1092                 vfs_unbusy(mp, td);
 1093                 if (error)
 1094                         vrele(vp);
 1095         } else {
 1096                 VI_LOCK(vp);
 1097                 vp->v_iflag &= ~VI_MOUNT;
 1098                 VI_UNLOCK(vp);
 1099                 vfs_unbusy(mp, td);
 1100                 vfs_mount_destroy(mp);
 1101                 vput(vp);
 1102         }
 1103         return (error);
 1104 }
 1105 
 1106 /*
 1107  * Unmount a filesystem.
 1108  *
 1109  * Note: unmount takes a path to the vnode mounted on as argument, not
 1110  * special file (as before).
 1111  */
 1112 #ifndef _SYS_SYSPROTO_H_
 1113 struct unmount_args {
 1114         char    *path;
 1115         int     flags;
 1116 };
 1117 #endif
 1118 /* ARGSUSED */
 1119 int
 1120 unmount(td, uap)
 1121         struct thread *td;
 1122         register struct unmount_args /* {
 1123                 char *path;
 1124                 int flags;
 1125         } */ *uap;
 1126 {
 1127         struct mount *mp;
 1128         char *pathbuf;
 1129         int error, id0, id1;
 1130 
 1131         if (jailed(td->td_ucred) || usermount == 0) {
 1132                 error = priv_check(td, PRIV_VFS_UNMOUNT);
 1133                 if (error)
 1134                         return (error);
 1135         }
 1136 
 1137         pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
 1138         error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL);
 1139         if (error) {
 1140                 free(pathbuf, M_TEMP);
 1141                 return (error);
 1142         }
 1143         AUDIT_ARG(upath, td, pathbuf, ARG_UPATH1);
 1144         mtx_lock(&Giant);
 1145         if (uap->flags & MNT_BYFSID) {
 1146                 /* Decode the filesystem ID. */
 1147                 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
 1148                         mtx_unlock(&Giant);
 1149                         free(pathbuf, M_TEMP);
 1150                         return (EINVAL);
 1151                 }
 1152 
 1153                 mtx_lock(&mountlist_mtx);
 1154                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 1155                         if (mp->mnt_stat.f_fsid.val[0] == id0 &&
 1156                             mp->mnt_stat.f_fsid.val[1] == id1)
 1157                                 break;
 1158                 }
 1159                 mtx_unlock(&mountlist_mtx);
 1160         } else {
 1161                 mtx_lock(&mountlist_mtx);
 1162                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 1163                         if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0)
 1164                                 break;
 1165                 }
 1166                 mtx_unlock(&mountlist_mtx);
 1167         }
 1168         free(pathbuf, M_TEMP);
 1169         if (mp == NULL) {
 1170                 /*
 1171                  * Previously we returned ENOENT for a nonexistent path and
 1172                  * EINVAL for a non-mountpoint.  We cannot tell these apart
 1173                  * now, so in the !MNT_BYFSID case return the more likely
 1174                  * EINVAL for compatibility.
 1175                  */
 1176                 mtx_unlock(&Giant);
 1177                 return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL);
 1178         }
 1179 
 1180         /*
 1181          * Don't allow unmounting the root filesystem.
 1182          */
 1183         if (mp->mnt_flag & MNT_ROOTFS) {
 1184                 mtx_unlock(&Giant);
 1185                 return (EINVAL);
 1186         }
 1187         error = dounmount(mp, uap->flags, td);
 1188         mtx_unlock(&Giant);
 1189         return (error);
 1190 }
 1191 
 1192 /*
 1193  * Do the actual filesystem unmount.
 1194  */
 1195 int
 1196 dounmount(mp, flags, td)
 1197         struct mount *mp;
 1198         int flags;
 1199         struct thread *td;
 1200 {
 1201         struct vnode *coveredvp, *fsrootvp;
 1202         int error;
 1203         int async_flag;
 1204         int mnt_gen_r;
 1205 
 1206         mtx_assert(&Giant, MA_OWNED);
 1207 
 1208         if ((coveredvp = mp->mnt_vnodecovered) != NULL) {
 1209                 mnt_gen_r = mp->mnt_gen;
 1210                 VI_LOCK(coveredvp);
 1211                 vholdl(coveredvp);
 1212                 vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, td);
 1213                 vdrop(coveredvp);
 1214                 /*
 1215                  * Check for mp being unmounted while waiting for the
 1216                  * covered vnode lock.
 1217                  */
 1218                 if (coveredvp->v_mountedhere != mp ||
 1219                     coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) {
 1220                         VOP_UNLOCK(coveredvp, 0, td);
 1221                         return (EBUSY);
 1222                 }
 1223         }
 1224         /*
 1225          * Only privileged root, or (if MNT_USER is set) the user that did the
 1226          * original mount is permitted to unmount this filesystem.
 1227          */
 1228         error = vfs_suser(mp, td);
 1229         if (error) {
 1230                 if (coveredvp)
 1231                         VOP_UNLOCK(coveredvp, 0, td);
 1232                 return (error);
 1233         }
 1234 
 1235         MNT_ILOCK(mp);
 1236         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 1237                 MNT_IUNLOCK(mp);
 1238                 if (coveredvp)
 1239                         VOP_UNLOCK(coveredvp, 0, td);
 1240                 return (EBUSY);
 1241         }
 1242         mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_NOINSMNTQ;
 1243         /* Allow filesystems to detect that a forced unmount is in progress. */
 1244         if (flags & MNT_FORCE)
 1245                 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
 1246         error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
 1247             ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), MNT_MTX(mp), td);
 1248         if (error) {
 1249                 MNT_ILOCK(mp);
 1250                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_NOINSMNTQ |
 1251                     MNTK_UNMOUNTF);
 1252                 if (mp->mnt_kern_flag & MNTK_MWAIT)
 1253                         wakeup(mp);
 1254                 MNT_IUNLOCK(mp);
 1255                 if (coveredvp)
 1256                         VOP_UNLOCK(coveredvp, 0, td);
 1257                 return (error);
 1258         }
 1259         vn_start_write(NULL, &mp, V_WAIT);
 1260 
 1261         if (mp->mnt_flag & MNT_EXPUBLIC)
 1262                 vfs_setpublicfs(NULL, NULL, NULL);
 1263 
 1264         vfs_msync(mp, MNT_WAIT);
 1265         MNT_ILOCK(mp);
 1266         async_flag = mp->mnt_flag & MNT_ASYNC;
 1267         mp->mnt_flag &= ~MNT_ASYNC;
 1268         mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1269         MNT_IUNLOCK(mp);
 1270         cache_purgevfs(mp);     /* remove cache entries for this file sys */
 1271         if (mp->mnt_syncer != NULL)
 1272                 vrele(mp->mnt_syncer);
 1273         /*
 1274          * For forced unmounts, move process cdir/rdir refs on the fs root
 1275          * vnode to the covered vnode.  For non-forced unmounts we want
 1276          * such references to cause an EBUSY error.
 1277          */
 1278         if ((flags & MNT_FORCE) &&
 1279             VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
 1280                 if (mp->mnt_vnodecovered != NULL)
 1281                         mountcheckdirs(fsrootvp, mp->mnt_vnodecovered);
 1282                 if (fsrootvp == rootvnode) {
 1283                         vrele(rootvnode);
 1284                         rootvnode = NULL;
 1285                 }
 1286                 vput(fsrootvp);
 1287         }
 1288         if (((mp->mnt_flag & MNT_RDONLY) ||
 1289              (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) ||
 1290             (flags & MNT_FORCE)) {
 1291                 error = VFS_UNMOUNT(mp, flags, td);
 1292         }
 1293         vn_finished_write(mp);
 1294         /*
 1295          * If we failed to flush the dirty blocks for this mount point,
 1296          * undo all the cdir/rdir and rootvnode changes we made above.
 1297          * Unless we failed to do so because the device is reporting that
 1298          * it doesn't exist anymore.
 1299          */
 1300         if (error && error != ENXIO) {
 1301                 if ((flags & MNT_FORCE) &&
 1302                     VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
 1303                         if (mp->mnt_vnodecovered != NULL)
 1304                                 mountcheckdirs(mp->mnt_vnodecovered, fsrootvp);
 1305                         if (rootvnode == NULL) {
 1306                                 rootvnode = fsrootvp;
 1307                                 vref(rootvnode);
 1308                         }
 1309                         vput(fsrootvp);
 1310                 }
 1311                 MNT_ILOCK(mp);
 1312                 mp->mnt_kern_flag &= ~MNTK_NOINSMNTQ;
 1313                 if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) {
 1314                         MNT_IUNLOCK(mp);
 1315                         (void) vfs_allocate_syncvnode(mp);
 1316                         MNT_ILOCK(mp);
 1317                 }
 1318                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
 1319                 mp->mnt_flag |= async_flag;
 1320                 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 1321                         mp->mnt_kern_flag |= MNTK_ASYNC;
 1322                 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
 1323                 if (mp->mnt_kern_flag & MNTK_MWAIT)
 1324                         wakeup(mp);
 1325                 MNT_IUNLOCK(mp);
 1326                 if (coveredvp)
 1327                         VOP_UNLOCK(coveredvp, 0, td);
 1328                 return (error);
 1329         }
 1330         mtx_lock(&mountlist_mtx);
 1331         TAILQ_REMOVE(&mountlist, mp, mnt_list);
 1332         mtx_unlock(&mountlist_mtx);
 1333         if (coveredvp != NULL) {
 1334                 coveredvp->v_mountedhere = NULL;
 1335                 vput(coveredvp);
 1336         }
 1337         vfs_event_signal(NULL, VQ_UNMOUNT, 0);
 1338         lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
 1339         vfs_mount_destroy(mp);
 1340         return (0);
 1341 }
 1342 
 1343 /*
 1344  * ---------------------------------------------------------------------
 1345  * Mounting of root filesystem
 1346  *
 1347  */
 1348 
 1349 struct root_hold_token {
 1350         const char                      *who;
 1351         LIST_ENTRY(root_hold_token)     list;
 1352 };
 1353 
 1354 static LIST_HEAD(, root_hold_token)     root_holds =
 1355     LIST_HEAD_INITIALIZER(&root_holds);
 1356 
 1357 static int root_mount_complete;
 1358 
 1359 /*
 1360  * Hold root mount.
 1361  */
 1362 struct root_hold_token *
 1363 root_mount_hold(const char *identifier)
 1364 {
 1365         struct root_hold_token *h;
 1366 
 1367         h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
 1368         h->who = identifier;
 1369         mtx_lock(&mountlist_mtx);
 1370         LIST_INSERT_HEAD(&root_holds, h, list);
 1371         mtx_unlock(&mountlist_mtx);
 1372         return (h);
 1373 }
 1374 
 1375 /*
 1376  * Release root mount.
 1377  */
 1378 void
 1379 root_mount_rel(struct root_hold_token *h)
 1380 {
 1381 
 1382         mtx_lock(&mountlist_mtx);
 1383         LIST_REMOVE(h, list);
 1384         wakeup(&root_holds);
 1385         mtx_unlock(&mountlist_mtx);
 1386         free(h, M_DEVBUF);
 1387 }
 1388 
 1389 /*
 1390  * Wait for all subsystems to release root mount.
 1391  */
 1392 static void
 1393 root_mount_prepare(void)
 1394 {
 1395         struct root_hold_token *h;
 1396 
 1397         for (;;) {
 1398                 DROP_GIANT();
 1399                 g_waitidle();
 1400                 PICKUP_GIANT();
 1401                 mtx_lock(&mountlist_mtx);
 1402                 if (LIST_EMPTY(&root_holds)) {
 1403                         mtx_unlock(&mountlist_mtx);
 1404                         break;
 1405                 }
 1406                 printf("Root mount waiting for:");
 1407                 LIST_FOREACH(h, &root_holds, list)
 1408                         printf(" %s", h->who);
 1409                 printf("\n");
 1410                 msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold",
 1411                     hz);
 1412         }
 1413 }
 1414 
 1415 /*
 1416  * Root was mounted, share the good news.
 1417  */
 1418 static void
 1419 root_mount_done(void)
 1420 {
 1421 
 1422         /*
 1423          * Use a mutex to prevent the wakeup being missed and waiting for
 1424          * an extra 1 second sleep.
 1425          */
 1426         mtx_lock(&mountlist_mtx);
 1427         root_mount_complete = 1;
 1428         wakeup(&root_mount_complete);
 1429         mtx_unlock(&mountlist_mtx);
 1430 }
 1431 
 1432 /*
 1433  * Return true if root is already mounted.
 1434  */
 1435 int
 1436 root_mounted(void)
 1437 {
 1438 
 1439         /* No mutex is acquired here because int stores are atomic. */
 1440         return (root_mount_complete);
 1441 }
 1442 
 1443 /*
 1444  * Wait until root is mounted.
 1445  */
 1446 void
 1447 root_mount_wait(void)
 1448 {
 1449 
 1450         /*
 1451          * Panic on an obvious deadlock - the function can't be called from
 1452          * a thread which is doing the whole SYSINIT stuff.
 1453          */
 1454         KASSERT(curthread->td_proc->p_pid != 0,
 1455             ("root_mount_wait: cannot be called from the swapper thread"));
 1456         mtx_lock(&mountlist_mtx);
 1457         while (!root_mount_complete) {
 1458                 msleep(&root_mount_complete, &mountlist_mtx, PZERO, "rootwait",
 1459                     hz);
 1460         }
 1461         mtx_unlock(&mountlist_mtx);
 1462 }
 1463 
 1464 static void
 1465 set_rootvnode(struct thread *td)
 1466 {
 1467         struct proc *p;
 1468 
 1469         if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode, td))
 1470                 panic("Cannot find root vnode");
 1471 
 1472         VOP_UNLOCK(rootvnode, 0, td);
 1473 
 1474         p = td->td_proc;
 1475         FILEDESC_XLOCK(p->p_fd);
 1476 
 1477         if (p->p_fd->fd_cdir != NULL)
 1478                 vrele(p->p_fd->fd_cdir);
 1479         p->p_fd->fd_cdir = rootvnode;
 1480         VREF(rootvnode);
 1481 
 1482         if (p->p_fd->fd_rdir != NULL)
 1483                 vrele(p->p_fd->fd_rdir);
 1484         p->p_fd->fd_rdir = rootvnode;
 1485         VREF(rootvnode);
 1486 
 1487         FILEDESC_XUNLOCK(p->p_fd);
 1488 
 1489         EVENTHANDLER_INVOKE(mountroot);
 1490 }
 1491 
 1492 /*
 1493  * Mount /devfs as our root filesystem, but do not put it on the mountlist
 1494  * yet.  Create a /dev -> / symlink so that absolute pathnames will lookup.
 1495  */
 1496 
 1497 static void
 1498 devfs_first(void)
 1499 {
 1500         struct thread *td = curthread;
 1501         struct vfsoptlist *opts;
 1502         struct vfsconf *vfsp;
 1503         struct mount *mp = NULL;
 1504         int error;
 1505 
 1506         vfsp = vfs_byname("devfs");
 1507         KASSERT(vfsp != NULL, ("Could not find devfs by name"));
 1508         if (vfsp == NULL)
 1509                 return;
 1510 
 1511         mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td);
 1512 
 1513         error = VFS_MOUNT(mp, td);
 1514         KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
 1515         if (error)
 1516                 return;
 1517 
 1518         opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
 1519         TAILQ_INIT(opts);
 1520         mp->mnt_opt = opts;
 1521 
 1522         mtx_lock(&mountlist_mtx);
 1523         TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
 1524         mtx_unlock(&mountlist_mtx);
 1525 
 1526         set_rootvnode(td);
 1527 
 1528         error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);
 1529         if (error)
 1530                 printf("kern_symlink /dev -> / returns %d\n", error);
 1531 }
 1532 
 1533 /*
 1534  * Surgically move our devfs to be mounted on /dev.
 1535  */
 1536 
 1537 static void
 1538 devfs_fixup(struct thread *td)
 1539 {
 1540         struct nameidata nd;
 1541         int error;
 1542         struct vnode *vp, *dvp;
 1543         struct mount *mp;
 1544 
 1545         /* Remove our devfs mount from the mountlist and purge the cache */
 1546         mtx_lock(&mountlist_mtx);
 1547         mp = TAILQ_FIRST(&mountlist);
 1548         TAILQ_REMOVE(&mountlist, mp, mnt_list);
 1549         mtx_unlock(&mountlist_mtx);
 1550         cache_purgevfs(mp);
 1551 
 1552         VFS_ROOT(mp, LK_EXCLUSIVE, &dvp, td);
 1553         VI_LOCK(dvp);
 1554         dvp->v_iflag &= ~VI_MOUNT;
 1555         VI_UNLOCK(dvp);
 1556         dvp->v_mountedhere = NULL;
 1557 
 1558         /* Set up the real rootvnode, and purge the cache */
 1559         TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL;
 1560         set_rootvnode(td);
 1561         cache_purgevfs(rootvnode->v_mount);
 1562 
 1563         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
 1564         error = namei(&nd);
 1565         if (error) {
 1566                 printf("Lookup of /dev for devfs, error: %d\n", error);
 1567                 return;
 1568         }
 1569         NDFREE(&nd, NDF_ONLY_PNBUF);
 1570         vp = nd.ni_vp;
 1571         if (vp->v_type != VDIR) {
 1572                 vput(vp);
 1573         }
 1574         error = vinvalbuf(vp, V_SAVE, td, 0, 0);
 1575         if (error) {
 1576                 vput(vp);
 1577         }
 1578         cache_purge(vp);
 1579         mp->mnt_vnodecovered = vp;
 1580         vp->v_mountedhere = mp;
 1581         mtx_lock(&mountlist_mtx);
 1582         TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 1583         mtx_unlock(&mountlist_mtx);
 1584         VOP_UNLOCK(vp, 0, td);
 1585         vput(dvp);
 1586         vfs_unbusy(mp, td);
 1587 
 1588         /* Unlink the no longer needed /dev/dev -> / symlink */
 1589         kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
 1590 }
 1591 
 1592 /*
 1593  * Report errors during filesystem mounting.
 1594  */
 1595 void
 1596 vfs_mount_error(struct mount *mp, const char *fmt, ...)
 1597 {
 1598         struct vfsoptlist *moptlist = mp->mnt_optnew;
 1599         va_list ap;
 1600         int error, len;
 1601         char *errmsg;
 1602 
 1603         error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len);
 1604         if (error || errmsg == NULL || len <= 0)
 1605                 return;
 1606 
 1607         va_start(ap, fmt);
 1608         vsnprintf(errmsg, (size_t)len, fmt, ap);
 1609         va_end(ap);
 1610 }
 1611 
 1612 /*
 1613  * Find and mount the root filesystem
 1614  */
 1615 void
 1616 vfs_mountroot(void)
 1617 {
 1618         char *cp;
 1619         int error, i, asked = 0;
 1620 
 1621         root_mount_prepare();
 1622 
 1623         mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount),
 1624             NULL, NULL, mount_init, mount_fini,
 1625             UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 1626         devfs_first();
 1627 
 1628         /*
 1629          * We are booted with instructions to prompt for the root filesystem.
 1630          */
 1631         if (boothowto & RB_ASKNAME) {
 1632                 if (!vfs_mountroot_ask())
 1633                         goto mounted;
 1634                 asked = 1;
 1635         }
 1636 
 1637         /*
 1638          * The root filesystem information is compiled in, and we are
 1639          * booted with instructions to use it.
 1640          */
 1641         if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) {
 1642                 if (!vfs_mountroot_try(ctrootdevname))
 1643                         goto mounted;
 1644                 ctrootdevname = NULL;
 1645         }
 1646 
 1647         /*
 1648          * We've been given the generic "use CDROM as root" flag.  This is
 1649          * necessary because one media may be used in many different
 1650          * devices, so we need to search for them.
 1651          */
 1652         if (boothowto & RB_CDROM) {
 1653                 for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
 1654                         if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
 1655                                 goto mounted;
 1656                 }
 1657         }
 1658 
 1659         /*
 1660          * Try to use the value read by the loader from /etc/fstab, or
 1661          * supplied via some other means.  This is the preferred
 1662          * mechanism.
 1663          */
 1664         cp = getenv("vfs.root.mountfrom");
 1665         if (cp != NULL) {
 1666                 error = vfs_mountroot_try(cp);
 1667                 freeenv(cp);
 1668                 if (!error)
 1669                         goto mounted;
 1670         }
 1671 
 1672         /*
 1673          * Try values that may have been computed by code during boot
 1674          */
 1675         if (!vfs_mountroot_try(rootdevnames[0]))
 1676                 goto mounted;
 1677         if (!vfs_mountroot_try(rootdevnames[1]))
 1678                 goto mounted;
 1679 
 1680         /*
 1681          * If we (still) have a compiled-in default, try it.
 1682          */
 1683         if (ctrootdevname != NULL)
 1684                 if (!vfs_mountroot_try(ctrootdevname))
 1685                         goto mounted;
 1686         /*
 1687          * Everything so far has failed, prompt on the console if we haven't
 1688          * already tried that.
 1689          */
 1690         if (!asked)
 1691                 if (!vfs_mountroot_ask())
 1692                         goto mounted;
 1693 
 1694         panic("Root mount failed, startup aborted.");
 1695 
 1696 mounted:
 1697         root_mount_done();
 1698 }
 1699 
 1700 /*
 1701  * Mount (mountfrom) as the root filesystem.
 1702  */
 1703 static int
 1704 vfs_mountroot_try(const char *mountfrom)
 1705 {
 1706         struct mount    *mp;
 1707         char            *vfsname, *path;
 1708         time_t          timebase;
 1709         int             error;
 1710         char            patt[32];
 1711 
 1712         vfsname = NULL;
 1713         path    = NULL;
 1714         mp      = NULL;
 1715         error   = EINVAL;
 1716 
 1717         if (mountfrom == NULL)
 1718                 return (error);         /* don't complain */
 1719         printf("Trying to mount root from %s\n", mountfrom);
 1720 
 1721         /* parse vfs name and path */
 1722         vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
 1723         path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
 1724         vfsname[0] = path[0] = 0;
 1725         sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
 1726         if (sscanf(mountfrom, patt, vfsname, path) < 1)
 1727                 goto out;
 1728 
 1729         if (path[0] == '\0')
 1730                 strcpy(path, ROOTNAME);
 1731 
 1732         error = kernel_vmount(
 1733             MNT_RDONLY | MNT_ROOTFS,
 1734             "fstype", vfsname,
 1735             "fspath", "/",
 1736             "from", path,
 1737             NULL);
 1738         if (error == 0) {
 1739                 /*
 1740                  * We mount devfs prior to mounting the / FS, so the first
 1741                  * entry will typically be devfs.
 1742                  */
 1743                 mp = TAILQ_FIRST(&mountlist);
 1744                 KASSERT(mp != NULL, ("%s: mountlist is empty", __func__));
 1745 
 1746                 /*
 1747                  * Iterate over all currently mounted file systems and use
 1748                  * the time stamp found to check and/or initialize the RTC.
 1749                  * Typically devfs has no time stamp and the only other FS
 1750                  * is the actual / FS.
 1751                  * Call inittodr() only once and pass it the largest of the
 1752                  * timestamps we encounter.
 1753                  */
 1754                 timebase = 0;
 1755                 do {
 1756                         if (mp->mnt_time > timebase)
 1757                                 timebase = mp->mnt_time;
 1758                         mp = TAILQ_NEXT(mp, mnt_list);
 1759                 } while (mp != NULL);
 1760                 inittodr(timebase);
 1761 
 1762                 devfs_fixup(curthread);
 1763         }
 1764 out:
 1765         free(path, M_MOUNT);
 1766         free(vfsname, M_MOUNT);
 1767         return (error);
 1768 }
 1769 
 1770 /*
 1771  * ---------------------------------------------------------------------
 1772  * Interactive root filesystem selection code.
 1773  */
 1774 
 1775 static int
 1776 vfs_mountroot_ask(void)
 1777 {
 1778         char name[128];
 1779 
 1780         for(;;) {
 1781                 printf("\nManual root filesystem specification:\n");
 1782                 printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
 1783 #if defined(__amd64__) || defined(__i386__) || defined(__ia64__)
 1784                 printf("                       eg. ufs:da0s1a\n");
 1785 #else
 1786                 printf("                       eg. ufs:/dev/da0a\n");
 1787 #endif
 1788                 printf("  ?                  List valid disk boot devices\n");
 1789                 printf("  <empty line>       Abort manual input\n");
 1790                 printf("\nmountroot> ");
 1791                 gets(name, sizeof(name), 1);
 1792                 if (name[0] == '\0')
 1793                         return (1);
 1794                 if (name[0] == '?') {
 1795                         printf("\nList of GEOM managed disk devices:\n  ");
 1796                         g_dev_print();
 1797                         continue;
 1798                 }
 1799                 if (!vfs_mountroot_try(name))
 1800                         return (0);
 1801         }
 1802 }
 1803 
 1804 /*
 1805  * ---------------------------------------------------------------------
 1806  * Functions for querying mount options/arguments from filesystems.
 1807  */
 1808 
 1809 /*
 1810  * Check that no unknown options are given
 1811  */
 1812 int
 1813 vfs_filteropt(struct vfsoptlist *opts, const char **legal)
 1814 {
 1815         struct vfsopt *opt;
 1816         char errmsg[255];
 1817         const char **t, *p, *q;
 1818         int ret = 0;
 1819 
 1820         TAILQ_FOREACH(opt, opts, link) {
 1821                 p = opt->name;
 1822                 q = NULL;
 1823                 if (p[0] == 'n' && p[1] == 'o')
 1824                         q = p + 2;
 1825                 for(t = global_opts; *t != NULL; t++) {
 1826                         if (strcmp(*t, p) == 0)
 1827                                 break;
 1828                         if (q != NULL) {
 1829                                 if (strcmp(*t, q) == 0)
 1830                                         break;
 1831                         }
 1832                 }
 1833                 if (*t != NULL)
 1834                         continue;
 1835                 for(t = legal; *t != NULL; t++) {
 1836                         if (strcmp(*t, p) == 0)
 1837                                 break;
 1838                         if (q != NULL) {
 1839                                 if (strcmp(*t, q) == 0)
 1840                                         break;
 1841                         }
 1842                 }
 1843                 if (*t != NULL)
 1844                         continue;
 1845                 snprintf(errmsg, sizeof(errmsg),
 1846                     "mount option <%s> is unknown", p);
 1847                 printf("%s\n", errmsg);
 1848                 ret = EINVAL;
 1849         }
 1850         if (ret != 0) {
 1851                 TAILQ_FOREACH(opt, opts, link) {
 1852                         if (strcmp(opt->name, "errmsg") == 0) {
 1853                                 strncpy((char *)opt->value, errmsg, opt->len);
 1854                         }
 1855                 }
 1856         }
 1857         return (ret);
 1858 }
 1859 
 1860 /*
 1861  * Get a mount option by its name.
 1862  *
 1863  * Return 0 if the option was found, ENOENT otherwise.
 1864  * If len is non-NULL it will be filled with the length
 1865  * of the option. If buf is non-NULL, it will be filled
 1866  * with the address of the option.
 1867  */
 1868 int
 1869 vfs_getopt(opts, name, buf, len)
 1870         struct vfsoptlist *opts;
 1871         const char *name;
 1872         void **buf;
 1873         int *len;
 1874 {
 1875         struct vfsopt *opt;
 1876 
 1877         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 1878 
 1879         TAILQ_FOREACH(opt, opts, link) {
 1880                 if (strcmp(name, opt->name) == 0) {
 1881                         if (len != NULL)
 1882                                 *len = opt->len;
 1883                         if (buf != NULL)
 1884                                 *buf = opt->value;
 1885                         return (0);
 1886                 }
 1887         }
 1888         return (ENOENT);
 1889 }
 1890 
 1891 static int
 1892 vfs_getopt_pos(struct vfsoptlist *opts, const char *name)
 1893 {
 1894         struct vfsopt *opt;
 1895         int i;
 1896 
 1897         if (opts == NULL)
 1898                 return (-1);
 1899 
 1900         i = 0;
 1901         TAILQ_FOREACH(opt, opts, link) {
 1902                 if (strcmp(name, opt->name) == 0)
 1903                         return (i);
 1904                 ++i;
 1905         }
 1906         return (-1);
 1907 }
 1908 
 1909 char *
 1910 vfs_getopts(struct vfsoptlist *opts, const char *name, int *error)
 1911 {
 1912         struct vfsopt *opt;
 1913 
 1914         *error = 0;
 1915         TAILQ_FOREACH(opt, opts, link) {
 1916                 if (strcmp(name, opt->name) != 0)
 1917                         continue;
 1918                 if (((char *)opt->value)[opt->len - 1] != '\0') {
 1919                         *error = EINVAL;
 1920                         return (NULL);
 1921                 }
 1922                 return (opt->value);
 1923         }
 1924         *error = ENOENT;
 1925         return (NULL);
 1926 }
 1927 
 1928 int
 1929 vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val)
 1930 {
 1931         struct vfsopt *opt;
 1932 
 1933         TAILQ_FOREACH(opt, opts, link) {
 1934                 if (strcmp(name, opt->name) == 0) {
 1935                         if (w != NULL)
 1936                                 *w |= val;
 1937                         return (1);
 1938                 }
 1939         }
 1940         if (w != NULL)
 1941                 *w &= ~val;
 1942         return (0);
 1943 }
 1944 
 1945 int
 1946 vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...)
 1947 {
 1948         va_list ap;
 1949         struct vfsopt *opt;
 1950         int ret;
 1951 
 1952         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 1953 
 1954         TAILQ_FOREACH(opt, opts, link) {
 1955                 if (strcmp(name, opt->name) != 0)
 1956                         continue;
 1957                 if (opt->len == 0 || opt->value == NULL)
 1958                         return (0);
 1959                 if (((char *)opt->value)[opt->len - 1] != '\0')
 1960                         return (0);
 1961                 va_start(ap, fmt);
 1962                 ret = vsscanf(opt->value, fmt, ap);
 1963                 va_end(ap);
 1964                 return (ret);
 1965         }
 1966         return (0);
 1967 }
 1968 
 1969 /*
 1970  * Find and copy a mount option.
 1971  *
 1972  * The size of the buffer has to be specified
 1973  * in len, if it is not the same length as the
 1974  * mount option, EINVAL is returned.
 1975  * Returns ENOENT if the option is not found.
 1976  */
 1977 int
 1978 vfs_copyopt(opts, name, dest, len)
 1979         struct vfsoptlist *opts;
 1980         const char *name;
 1981         void *dest;
 1982         int len;
 1983 {
 1984         struct vfsopt *opt;
 1985 
 1986         KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
 1987 
 1988         TAILQ_FOREACH(opt, opts, link) {
 1989                 if (strcmp(name, opt->name) == 0) {
 1990                         if (len != opt->len)
 1991                                 return (EINVAL);
 1992                         bcopy(opt->value, dest, opt->len);
 1993                         return (0);
 1994                 }
 1995         }
 1996         return (ENOENT);
 1997 }
 1998 
 1999 /*
 2000  * This is a helper function for filesystems to traverse their
 2001  * vnodes.  See MNT_VNODE_FOREACH() in sys/mount.h
 2002  */
 2003 
 2004 struct vnode *
 2005 __mnt_vnode_next(struct vnode **mvp, struct mount *mp)
 2006 {
 2007         struct vnode *vp;
 2008 
 2009         mtx_assert(MNT_MTX(mp), MA_OWNED);
 2010 
 2011         KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 2012         if ((*mvp)->v_yield++ == 500) {
 2013                 MNT_IUNLOCK(mp);
 2014                 (*mvp)->v_yield = 0;
 2015                 uio_yield();
 2016                 MNT_ILOCK(mp);
 2017         }
 2018         vp = TAILQ_NEXT(*mvp, v_nmntvnodes);
 2019         while (vp != NULL && vp->v_type == VMARKER)
 2020                 vp = TAILQ_NEXT(vp, v_nmntvnodes);
 2021 
 2022         /* Check if we are done */
 2023         if (vp == NULL) {
 2024                 __mnt_vnode_markerfree(mvp, mp);
 2025                 return (NULL);
 2026         }
 2027         TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
 2028         TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
 2029         return (vp);
 2030 }
 2031 
 2032 struct vnode *
 2033 __mnt_vnode_first(struct vnode **mvp, struct mount *mp)
 2034 {
 2035         struct vnode *vp;
 2036 
 2037         mtx_assert(MNT_MTX(mp), MA_OWNED);
 2038 
 2039         vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
 2040         while (vp != NULL && vp->v_type == VMARKER)
 2041                 vp = TAILQ_NEXT(vp, v_nmntvnodes);
 2042 
 2043         /* Check if we are done */
 2044         if (vp == NULL) {
 2045                 *mvp = NULL;
 2046                 return (NULL);
 2047         }
 2048         mp->mnt_holdcnt++;
 2049         MNT_IUNLOCK(mp);
 2050         *mvp = (struct vnode *) malloc(sizeof(struct vnode),
 2051                                        M_VNODE_MARKER,
 2052                                        M_WAITOK | M_ZERO);
 2053         MNT_ILOCK(mp);
 2054         (*mvp)->v_type = VMARKER;
 2055 
 2056         vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
 2057         while (vp != NULL && vp->v_type == VMARKER)
 2058                 vp = TAILQ_NEXT(vp, v_nmntvnodes);
 2059 
 2060         /* Check if we are done */
 2061         if (vp == NULL) {
 2062                 MNT_IUNLOCK(mp);
 2063                 free(*mvp, M_VNODE_MARKER);
 2064                 MNT_ILOCK(mp);
 2065                 *mvp = NULL;
 2066                 mp->mnt_holdcnt--;
 2067                 if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0)
 2068                         wakeup(&mp->mnt_holdcnt);
 2069                 return (NULL);
 2070         }
 2071         mp->mnt_markercnt++;
 2072         (*mvp)->v_mount = mp;
 2073         TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
 2074         return (vp);
 2075 }
 2076 
 2077 
 2078 void
 2079 __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp)
 2080 {
 2081 
 2082         if (*mvp == NULL)
 2083                 return;
 2084 
 2085         mtx_assert(MNT_MTX(mp), MA_OWNED);
 2086 
 2087         KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 2088         TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
 2089         MNT_IUNLOCK(mp);
 2090         free(*mvp, M_VNODE_MARKER);
 2091         MNT_ILOCK(mp);
 2092         *mvp = NULL;
 2093 
 2094         mp->mnt_markercnt--;
 2095         mp->mnt_holdcnt--;
 2096         if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0)
 2097                 wakeup(&mp->mnt_holdcnt);
 2098 }
 2099 
 2100 
 2101 int
 2102 __vfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
 2103 {
 2104         int error;
 2105 
 2106         error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat, td);
 2107         if (sbp != &mp->mnt_stat)
 2108                 *sbp = mp->mnt_stat;
 2109         return (error);
 2110 }
 2111 
 2112 void
 2113 vfs_mountedfrom(struct mount *mp, const char *from)
 2114 {
 2115 
 2116         bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname);
 2117         strlcpy(mp->mnt_stat.f_mntfromname, from,
 2118             sizeof mp->mnt_stat.f_mntfromname);
 2119 }
 2120 
 2121 /*
 2122  * ---------------------------------------------------------------------
 2123  * This is the api for building mount args and mounting filesystems from
 2124  * inside the kernel.
 2125  *
 2126  * The API works by accumulation of individual args.  First error is
 2127  * latched.
 2128  *
 2129  * XXX: should be documented in new manpage kernel_mount(9)
 2130  */
 2131 
 2132 /* A memory allocation which must be freed when we are done */
 2133 struct mntaarg {
 2134         SLIST_ENTRY(mntaarg)    next;
 2135 };
 2136 
 2137 /* The header for the mount arguments */
 2138 struct mntarg {
 2139         struct iovec *v;
 2140         int len;
 2141         int error;
 2142         SLIST_HEAD(, mntaarg)   list;
 2143 };
 2144 
 2145 /*
 2146  * Add a boolean argument.
 2147  *
 2148  * flag is the boolean value.
 2149  * name must start with "no".
 2150  */
 2151 struct mntarg *
 2152 mount_argb(struct mntarg *ma, int flag, const char *name)
 2153 {
 2154 
 2155         KASSERT(name[0] == 'n' && name[1] == 'o',
 2156             ("mount_argb(...,%s): name must start with 'no'", name));
 2157 
 2158         return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0));
 2159 }
 2160 
 2161 /*
 2162  * Add an argument printf style
 2163  */
 2164 struct mntarg *
 2165 mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...)
 2166 {
 2167         va_list ap;
 2168         struct mntaarg *maa;
 2169         struct sbuf *sb;
 2170         int len;
 2171 
 2172         if (ma == NULL) {
 2173                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2174                 SLIST_INIT(&ma->list);
 2175         }
 2176         if (ma->error)
 2177                 return (ma);
 2178 
 2179         ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 2180             M_MOUNT, M_WAITOK);
 2181         ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 2182         ma->v[ma->len].iov_len = strlen(name) + 1;
 2183         ma->len++;
 2184 
 2185         sb = sbuf_new_auto();
 2186         va_start(ap, fmt);
 2187         sbuf_vprintf(sb, fmt, ap);
 2188         va_end(ap);
 2189         sbuf_finish(sb);
 2190         len = sbuf_len(sb) + 1;
 2191         maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 2192         SLIST_INSERT_HEAD(&ma->list, maa, next);
 2193         bcopy(sbuf_data(sb), maa + 1, len);
 2194         sbuf_delete(sb);
 2195 
 2196         ma->v[ma->len].iov_base = maa + 1;
 2197         ma->v[ma->len].iov_len = len;
 2198         ma->len++;
 2199 
 2200         return (ma);
 2201 }
 2202 
 2203 /*
 2204  * Add an argument which is a userland string.
 2205  */
 2206 struct mntarg *
 2207 mount_argsu(struct mntarg *ma, const char *name, const void *val, int len)
 2208 {
 2209         struct mntaarg *maa;
 2210         char *tbuf;
 2211 
 2212         if (val == NULL)
 2213                 return (ma);
 2214         if (ma == NULL) {
 2215                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2216                 SLIST_INIT(&ma->list);
 2217         }
 2218         if (ma->error)
 2219                 return (ma);
 2220         maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 2221         SLIST_INSERT_HEAD(&ma->list, maa, next);
 2222         tbuf = (void *)(maa + 1);
 2223         ma->error = copyinstr(val, tbuf, len, NULL);
 2224         return (mount_arg(ma, name, tbuf, -1));
 2225 }
 2226 
 2227 /*
 2228  * Plain argument.
 2229  *
 2230  * If length is -1, use printf.
 2231  */
 2232 struct mntarg *
 2233 mount_arg(struct mntarg *ma, const char *name, const void *val, int len)
 2234 {
 2235 
 2236         if (ma == NULL) {
 2237                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2238                 SLIST_INIT(&ma->list);
 2239         }
 2240         if (ma->error)
 2241                 return (ma);
 2242 
 2243         ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 2244             M_MOUNT, M_WAITOK);
 2245         ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 2246         ma->v[ma->len].iov_len = strlen(name) + 1;
 2247         ma->len++;
 2248 
 2249         ma->v[ma->len].iov_base = (void *)(uintptr_t)val;
 2250         if (len < 0)
 2251                 ma->v[ma->len].iov_len = strlen(val) + 1;
 2252         else
 2253                 ma->v[ma->len].iov_len = len;
 2254         ma->len++;
 2255         return (ma);
 2256 }
 2257 
 2258 /*
 2259  * Free a mntarg structure
 2260  */
 2261 static void
 2262 free_mntarg(struct mntarg *ma)
 2263 {
 2264         struct mntaarg *maa;
 2265 
 2266         while (!SLIST_EMPTY(&ma->list)) {
 2267                 maa = SLIST_FIRST(&ma->list);
 2268                 SLIST_REMOVE_HEAD(&ma->list, next);
 2269                 free(maa, M_MOUNT);
 2270         }
 2271         free(ma->v, M_MOUNT);
 2272         free(ma, M_MOUNT);
 2273 }
 2274 
 2275 /*
 2276  * Mount a filesystem
 2277  */
 2278 int
 2279 kernel_mount(struct mntarg *ma, int flags)
 2280 {
 2281         struct uio auio;
 2282         int error;
 2283 
 2284         KASSERT(ma != NULL, ("kernel_mount NULL ma"));
 2285         KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v"));
 2286         KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len));
 2287 
 2288         auio.uio_iov = ma->v;
 2289         auio.uio_iovcnt = ma->len;
 2290         auio.uio_segflg = UIO_SYSSPACE;
 2291 
 2292         error = ma->error;
 2293         if (!error)
 2294                 error = vfs_donmount(curthread, flags, &auio);
 2295         free_mntarg(ma);
 2296         return (error);
 2297 }
 2298 
 2299 /*
 2300  * A printflike function to mount a filesystem.
 2301  */
 2302 int
 2303 kernel_vmount(int flags, ...)
 2304 {
 2305         struct mntarg *ma = NULL;
 2306         va_list ap;
 2307         const char *cp;
 2308         const void *vp;
 2309         int error;
 2310 
 2311         va_start(ap, flags);
 2312         for (;;) {
 2313                 cp = va_arg(ap, const char *);
 2314                 if (cp == NULL)
 2315                         break;
 2316                 vp = va_arg(ap, const void *);
 2317                 ma = mount_arg(ma, cp, vp, -1);
 2318         }
 2319         va_end(ap);
 2320 
 2321         error = kernel_mount(ma, flags);
 2322         return (error);
 2323 }

Cache object: 4707be5c7fcbccb7d1d312bb8597b47b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.