The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_mountroot.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2010 Marcel Moolenaar
    3  * Copyright (c) 1999-2004 Poul-Henning Kamp
    4  * Copyright (c) 1999 Michael Smith
    5  * Copyright (c) 1989, 1993
    6  *      The Regents of the University of California.  All rights reserved.
    7  * (c) UNIX System Laboratories, Inc.
    8  * All or some portions of this file are derived from material licensed
    9  * to the University of California by American Telephone and Telegraph
   10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   11  * the permission of UNIX System Laboratories, Inc.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 4. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  */
   37 
   38 #include "opt_rootdevname.h"
   39 
   40 #include <sys/cdefs.h>
   41 __FBSDID("$FreeBSD: releng/11.2/sys/kern/vfs_mountroot.c 331722 2018-03-29 02:50:57Z eadler $");
   42 
   43 #include <sys/param.h>
   44 #include <sys/conf.h>
   45 #include <sys/cons.h>
   46 #include <sys/fcntl.h>
   47 #include <sys/jail.h>
   48 #include <sys/kernel.h>
   49 #include <sys/malloc.h>
   50 #include <sys/mdioctl.h>
   51 #include <sys/mount.h>
   52 #include <sys/mutex.h>
   53 #include <sys/namei.h>
   54 #include <sys/priv.h>
   55 #include <sys/proc.h>
   56 #include <sys/filedesc.h>
   57 #include <sys/reboot.h>
   58 #include <sys/sbuf.h>
   59 #include <sys/stat.h>
   60 #include <sys/syscallsubr.h>
   61 #include <sys/sysproto.h>
   62 #include <sys/sx.h>
   63 #include <sys/sysctl.h>
   64 #include <sys/sysent.h>
   65 #include <sys/systm.h>
   66 #include <sys/vnode.h>
   67 
   68 #include <geom/geom.h>
   69 
   70 /*
   71  * The root filesystem is detailed in the kernel environment variable
   72  * vfs.root.mountfrom, which is expected to be in the general format
   73  *
   74  * <vfsname>:[<path>][  <vfsname>:[<path>] ...]
   75  * vfsname   := the name of a VFS known to the kernel and capable
   76  *              of being mounted as root
   77  * path      := disk device name or other data used by the filesystem
   78  *              to locate its physical store
   79  *
   80  * If the environment variable vfs.root.mountfrom is a space separated list,
   81  * each list element is tried in turn and the root filesystem will be mounted
   82  * from the first one that succeeds.
   83  *
   84  * The environment variable vfs.root.mountfrom.options is a comma delimited
   85  * set of string mount options.  These mount options must be parseable
   86  * by nmount() in the kernel.
   87  */
   88 
   89 static int parse_mount(char **);
   90 static struct mntarg *parse_mountroot_options(struct mntarg *, const char *);
   91 static int sysctl_vfs_root_mount_hold(SYSCTL_HANDLER_ARGS);
   92 static void vfs_mountroot_wait(void);
   93 static int vfs_mountroot_wait_if_neccessary(const char *fs, const char *dev);
   94 
   95 /*
   96  * The vnode of the system's root (/ in the filesystem, without chroot
   97  * active.)
   98  */
   99 struct vnode *rootvnode;
  100 
  101 /*
  102  * Mount of the system's /dev.
  103  */
  104 struct mount *rootdevmp;
  105 
  106 char *rootdevnames[2] = {NULL, NULL};
  107 
  108 struct mtx root_holds_mtx;
  109 MTX_SYSINIT(root_holds, &root_holds_mtx, "root_holds", MTX_DEF);
  110 
  111 struct root_hold_token {
  112         const char                      *who;
  113         LIST_ENTRY(root_hold_token)     list;
  114 };
  115 
  116 static LIST_HEAD(, root_hold_token)     root_holds =
  117     LIST_HEAD_INITIALIZER(root_holds);
  118 
  119 enum action {
  120         A_CONTINUE,
  121         A_PANIC,
  122         A_REBOOT,
  123         A_RETRY
  124 };
  125 
  126 static enum action root_mount_onfail = A_CONTINUE;
  127 
  128 static int root_mount_mddev;
  129 static int root_mount_complete;
  130 
  131 /* By default wait up to 3 seconds for devices to appear. */
  132 static int root_mount_timeout = 3;
  133 TUNABLE_INT("vfs.mountroot.timeout", &root_mount_timeout);
  134 
  135 static int root_mount_always_wait = 0;
  136 SYSCTL_INT(_vfs, OID_AUTO, root_mount_always_wait, CTLFLAG_RDTUN,
  137     &root_mount_always_wait, 0,
  138     "Wait for root mount holds even if the root device already exists");
  139 
  140 SYSCTL_PROC(_vfs, OID_AUTO, root_mount_hold,
  141     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
  142     NULL, 0, sysctl_vfs_root_mount_hold, "A",
  143     "List of root mount hold tokens");
  144 
  145 static int
  146 sysctl_vfs_root_mount_hold(SYSCTL_HANDLER_ARGS)
  147 {
  148         struct sbuf sb;
  149         struct root_hold_token *h;
  150         int error;
  151 
  152         sbuf_new(&sb, NULL, 256, SBUF_AUTOEXTEND | SBUF_INCLUDENUL);
  153 
  154         mtx_lock(&root_holds_mtx);
  155         LIST_FOREACH(h, &root_holds, list) {
  156                 if (h != LIST_FIRST(&root_holds))
  157                         sbuf_putc(&sb, ' ');
  158                 sbuf_printf(&sb, "%s", h->who);
  159         }
  160         mtx_unlock(&root_holds_mtx);
  161 
  162         error = sbuf_finish(&sb);
  163         if (error == 0)
  164                 error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb));
  165         sbuf_delete(&sb);
  166         return (error);
  167 }
  168 
  169 struct root_hold_token *
  170 root_mount_hold(const char *identifier)
  171 {
  172         struct root_hold_token *h;
  173 
  174         h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
  175         h->who = identifier;
  176         mtx_lock(&root_holds_mtx);
  177         LIST_INSERT_HEAD(&root_holds, h, list);
  178         mtx_unlock(&root_holds_mtx);
  179         return (h);
  180 }
  181 
  182 void
  183 root_mount_rel(struct root_hold_token *h)
  184 {
  185 
  186         if (h == NULL)
  187                 return;
  188 
  189         mtx_lock(&root_holds_mtx);
  190         LIST_REMOVE(h, list);
  191         wakeup(&root_holds);
  192         mtx_unlock(&root_holds_mtx);
  193         free(h, M_DEVBUF);
  194 }
  195 
  196 int
  197 root_mounted(void)
  198 {
  199 
  200         /* No mutex is acquired here because int stores are atomic. */
  201         return (root_mount_complete);
  202 }
  203 
  204 static void
  205 set_rootvnode(void)
  206 {
  207         struct proc *p;
  208 
  209         if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode))
  210                 panic("Cannot find root vnode");
  211 
  212         VOP_UNLOCK(rootvnode, 0);
  213 
  214         p = curthread->td_proc;
  215         FILEDESC_XLOCK(p->p_fd);
  216 
  217         if (p->p_fd->fd_cdir != NULL)
  218                 vrele(p->p_fd->fd_cdir);
  219         p->p_fd->fd_cdir = rootvnode;
  220         VREF(rootvnode);
  221 
  222         if (p->p_fd->fd_rdir != NULL)
  223                 vrele(p->p_fd->fd_rdir);
  224         p->p_fd->fd_rdir = rootvnode;
  225         VREF(rootvnode);
  226 
  227         FILEDESC_XUNLOCK(p->p_fd);
  228 }
  229 
  230 static int
  231 vfs_mountroot_devfs(struct thread *td, struct mount **mpp)
  232 {
  233         struct vfsoptlist *opts;
  234         struct vfsconf *vfsp;
  235         struct mount *mp;
  236         int error;
  237 
  238         *mpp = NULL;
  239 
  240         if (rootdevmp != NULL) {
  241                 /*
  242                  * Already have /dev; this happens during rerooting.
  243                  */
  244                 error = vfs_busy(rootdevmp, 0);
  245                 if (error != 0)
  246                         return (error);
  247                 *mpp = rootdevmp;
  248         } else {
  249                 vfsp = vfs_byname("devfs");
  250                 KASSERT(vfsp != NULL, ("Could not find devfs by name"));
  251                 if (vfsp == NULL)
  252                         return (ENOENT);
  253 
  254                 mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred);
  255 
  256                 error = VFS_MOUNT(mp);
  257                 KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
  258                 if (error)
  259                         return (error);
  260 
  261                 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
  262                 TAILQ_INIT(opts);
  263                 mp->mnt_opt = opts;
  264 
  265                 mtx_lock(&mountlist_mtx);
  266                 TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
  267                 mtx_unlock(&mountlist_mtx);
  268 
  269                 *mpp = mp;
  270                 rootdevmp = mp;
  271         }
  272 
  273         set_rootvnode();
  274 
  275         error = kern_symlinkat(td, "/", AT_FDCWD, "dev", UIO_SYSSPACE);
  276         if (error)
  277                 printf("kern_symlink /dev -> / returns %d\n", error);
  278 
  279         return (error);
  280 }
  281 
  282 static void
  283 vfs_mountroot_shuffle(struct thread *td, struct mount *mpdevfs)
  284 {
  285         struct nameidata nd;
  286         struct mount *mporoot, *mpnroot;
  287         struct vnode *vp, *vporoot, *vpdevfs;
  288         char *fspath;
  289         int error;
  290 
  291         mpnroot = TAILQ_NEXT(mpdevfs, mnt_list);
  292 
  293         /* Shuffle the mountlist. */
  294         mtx_lock(&mountlist_mtx);
  295         mporoot = TAILQ_FIRST(&mountlist);
  296         TAILQ_REMOVE(&mountlist, mpdevfs, mnt_list);
  297         if (mporoot != mpdevfs) {
  298                 TAILQ_REMOVE(&mountlist, mpnroot, mnt_list);
  299                 TAILQ_INSERT_HEAD(&mountlist, mpnroot, mnt_list);
  300         }
  301         TAILQ_INSERT_TAIL(&mountlist, mpdevfs, mnt_list);
  302         mtx_unlock(&mountlist_mtx);
  303 
  304         cache_purgevfs(mporoot, true);
  305         if (mporoot != mpdevfs)
  306                 cache_purgevfs(mpdevfs, true);
  307 
  308         VFS_ROOT(mporoot, LK_EXCLUSIVE, &vporoot);
  309 
  310         VI_LOCK(vporoot);
  311         vporoot->v_iflag &= ~VI_MOUNT;
  312         VI_UNLOCK(vporoot);
  313         vporoot->v_mountedhere = NULL;
  314         mporoot->mnt_flag &= ~MNT_ROOTFS;
  315         mporoot->mnt_vnodecovered = NULL;
  316         vput(vporoot);
  317 
  318         /* Set up the new rootvnode, and purge the cache */
  319         mpnroot->mnt_vnodecovered = NULL;
  320         set_rootvnode();
  321         cache_purgevfs(rootvnode->v_mount, true);
  322 
  323         if (mporoot != mpdevfs) {
  324                 /* Remount old root under /.mount or /mnt */
  325                 fspath = "/.mount";
  326                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
  327                     fspath, td);
  328                 error = namei(&nd);
  329                 if (error) {
  330                         NDFREE(&nd, NDF_ONLY_PNBUF);
  331                         fspath = "/mnt";
  332                         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
  333                             fspath, td);
  334                         error = namei(&nd);
  335                 }
  336                 if (!error) {
  337                         vp = nd.ni_vp;
  338                         error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
  339                         if (!error)
  340                                 error = vinvalbuf(vp, V_SAVE, 0, 0);
  341                         if (!error) {
  342                                 cache_purge(vp);
  343                                 mporoot->mnt_vnodecovered = vp;
  344                                 vp->v_mountedhere = mporoot;
  345                                 strlcpy(mporoot->mnt_stat.f_mntonname,
  346                                     fspath, MNAMELEN);
  347                                 VOP_UNLOCK(vp, 0);
  348                         } else
  349                                 vput(vp);
  350                 }
  351                 NDFREE(&nd, NDF_ONLY_PNBUF);
  352 
  353                 if (error)
  354                         printf("mountroot: unable to remount previous root "
  355                             "under /.mount or /mnt (error %d)\n", error);
  356         }
  357 
  358         /* Remount devfs under /dev */
  359         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
  360         error = namei(&nd);
  361         if (!error) {
  362                 vp = nd.ni_vp;
  363                 error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
  364                 if (!error)
  365                         error = vinvalbuf(vp, V_SAVE, 0, 0);
  366                 if (!error) {
  367                         vpdevfs = mpdevfs->mnt_vnodecovered;
  368                         if (vpdevfs != NULL) {
  369                                 cache_purge(vpdevfs);
  370                                 vpdevfs->v_mountedhere = NULL;
  371                                 vrele(vpdevfs);
  372                         }
  373                         mpdevfs->mnt_vnodecovered = vp;
  374                         vp->v_mountedhere = mpdevfs;
  375                         VOP_UNLOCK(vp, 0);
  376                 } else
  377                         vput(vp);
  378         }
  379         if (error)
  380                 printf("mountroot: unable to remount devfs under /dev "
  381                     "(error %d)\n", error);
  382         NDFREE(&nd, NDF_ONLY_PNBUF);
  383 
  384         if (mporoot == mpdevfs) {
  385                 vfs_unbusy(mpdevfs);
  386                 /* Unlink the no longer needed /dev/dev -> / symlink */
  387                 error = kern_unlinkat(td, AT_FDCWD, "/dev/dev",
  388                     UIO_SYSSPACE, 0);
  389                 if (error)
  390                         printf("mountroot: unable to unlink /dev/dev "
  391                             "(error %d)\n", error);
  392         }
  393 }
  394 
  395 /*
  396  * Configuration parser.
  397  */
  398 
  399 /* Parser character classes. */
  400 #define CC_WHITESPACE           -1
  401 #define CC_NONWHITESPACE        -2
  402 
  403 /* Parse errors. */
  404 #define PE_EOF                  -1
  405 #define PE_EOL                  -2
  406 
  407 static __inline int
  408 parse_peek(char **conf)
  409 {
  410 
  411         return (**conf);
  412 }
  413 
  414 static __inline void
  415 parse_poke(char **conf, int c)
  416 {
  417 
  418         **conf = c;
  419 }
  420 
  421 static __inline void
  422 parse_advance(char **conf)
  423 {
  424 
  425         (*conf)++;
  426 }
  427 
  428 static int
  429 parse_skipto(char **conf, int mc)
  430 {
  431         int c, match;
  432 
  433         while (1) {
  434                 c = parse_peek(conf);
  435                 if (c == 0)
  436                         return (PE_EOF);
  437                 switch (mc) {
  438                 case CC_WHITESPACE:
  439                         match = (c == ' ' || c == '\t' || c == '\n') ? 1 : 0;
  440                         break;
  441                 case CC_NONWHITESPACE:
  442                         if (c == '\n')
  443                                 return (PE_EOL);
  444                         match = (c != ' ' && c != '\t') ? 1 : 0;
  445                         break;
  446                 default:
  447                         match = (c == mc) ? 1 : 0;
  448                         break;
  449                 }
  450                 if (match)
  451                         break;
  452                 parse_advance(conf);
  453         }
  454         return (0);
  455 }
  456 
  457 static int
  458 parse_token(char **conf, char **tok)
  459 {
  460         char *p;
  461         size_t len;
  462         int error;
  463 
  464         *tok = NULL;
  465         error = parse_skipto(conf, CC_NONWHITESPACE);
  466         if (error)
  467                 return (error);
  468         p = *conf;
  469         error = parse_skipto(conf, CC_WHITESPACE);
  470         len = *conf - p;
  471         *tok = malloc(len + 1, M_TEMP, M_WAITOK | M_ZERO);
  472         bcopy(p, *tok, len);
  473         return (0);
  474 }
  475 
  476 static void
  477 parse_dir_ask_printenv(const char *var)
  478 {
  479         char *val;
  480 
  481         val = kern_getenv(var);
  482         if (val != NULL) {
  483                 printf("  %s=%s\n", var, val);
  484                 freeenv(val);
  485         }
  486 }
  487 
  488 static int
  489 parse_dir_ask(char **conf)
  490 {
  491         char name[80];
  492         char *mnt;
  493         int error;
  494 
  495         vfs_mountroot_wait();
  496 
  497         printf("\nLoader variables:\n");
  498         parse_dir_ask_printenv("vfs.root.mountfrom");
  499         parse_dir_ask_printenv("vfs.root.mountfrom.options");
  500 
  501         printf("\nManual root filesystem specification:\n");
  502         printf("  <fstype>:<device> [options]\n");
  503         printf("      Mount <device> using filesystem <fstype>\n");
  504         printf("      and with the specified (optional) option list.\n");
  505         printf("\n");
  506         printf("    eg. ufs:/dev/da0s1a\n");
  507         printf("        zfs:tank\n");
  508         printf("        cd9660:/dev/cd0 ro\n");
  509         printf("          (which is equivalent to: ");
  510         printf("mount -t cd9660 -o ro /dev/cd0 /)\n");
  511         printf("\n");
  512         printf("  ?               List valid disk boot devices\n");
  513         printf("  .               Yield 1 second (for background tasks)\n");
  514         printf("  <empty line>    Abort manual input\n");
  515 
  516         do {
  517                 error = EINVAL;
  518                 printf("\nmountroot> ");
  519                 cngets(name, sizeof(name), GETS_ECHO);
  520                 if (name[0] == '\0')
  521                         break;
  522                 if (name[0] == '?' && name[1] == '\0') {
  523                         printf("\nList of GEOM managed disk devices:\n  ");
  524                         g_dev_print();
  525                         continue;
  526                 }
  527                 if (name[0] == '.' && name[1] == '\0') {
  528                         pause("rmask", hz);
  529                         continue;
  530                 }
  531                 mnt = name;
  532                 error = parse_mount(&mnt);
  533                 if (error == -1)
  534                         printf("Invalid file system specification.\n");
  535         } while (error != 0);
  536 
  537         return (error);
  538 }
  539 
  540 static int
  541 parse_dir_md(char **conf)
  542 {
  543         struct stat sb;
  544         struct thread *td;
  545         struct md_ioctl *mdio;
  546         char *path, *tok;
  547         int error, fd, len;
  548 
  549         td = curthread;
  550 
  551         error = parse_token(conf, &tok);
  552         if (error)
  553                 return (error);
  554 
  555         len = strlen(tok);
  556         mdio = malloc(sizeof(*mdio) + len + 1, M_TEMP, M_WAITOK | M_ZERO);
  557         path = (void *)(mdio + 1);
  558         bcopy(tok, path, len);
  559         free(tok, M_TEMP);
  560 
  561         /* Get file status. */
  562         error = kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &sb, NULL);
  563         if (error)
  564                 goto out;
  565 
  566         /* Open /dev/mdctl so that we can attach/detach. */
  567         error = kern_openat(td, AT_FDCWD, "/dev/" MDCTL_NAME, UIO_SYSSPACE,
  568             O_RDWR, 0);
  569         if (error)
  570                 goto out;
  571 
  572         fd = td->td_retval[0];
  573         mdio->md_version = MDIOVERSION;
  574         mdio->md_type = MD_VNODE;
  575 
  576         if (root_mount_mddev != -1) {
  577                 mdio->md_unit = root_mount_mddev;
  578                 DROP_GIANT();
  579                 error = kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
  580                 PICKUP_GIANT();
  581                 /* Ignore errors. We don't care. */
  582                 root_mount_mddev = -1;
  583         }
  584 
  585         mdio->md_file = (void *)(mdio + 1);
  586         mdio->md_options = MD_AUTOUNIT | MD_READONLY;
  587         mdio->md_mediasize = sb.st_size;
  588         mdio->md_unit = 0;
  589         DROP_GIANT();
  590         error = kern_ioctl(td, fd, MDIOCATTACH, (void *)mdio);
  591         PICKUP_GIANT();
  592         if (error)
  593                 goto out;
  594 
  595         if (mdio->md_unit > 9) {
  596                 printf("rootmount: too many md units\n");
  597                 mdio->md_file = NULL;
  598                 mdio->md_options = 0;
  599                 mdio->md_mediasize = 0;
  600                 DROP_GIANT();
  601                 error = kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
  602                 PICKUP_GIANT();
  603                 /* Ignore errors. We don't care. */
  604                 error = ERANGE;
  605                 goto out;
  606         }
  607 
  608         root_mount_mddev = mdio->md_unit;
  609         printf(MD_NAME "%u attached to %s\n", root_mount_mddev, mdio->md_file);
  610 
  611         error = kern_close(td, fd);
  612 
  613  out:
  614         free(mdio, M_TEMP);
  615         return (error);
  616 }
  617 
  618 static int
  619 parse_dir_onfail(char **conf)
  620 {
  621         char *action;
  622         int error;
  623 
  624         error = parse_token(conf, &action);
  625         if (error)
  626                 return (error);
  627 
  628         if (!strcmp(action, "continue"))
  629                 root_mount_onfail = A_CONTINUE;
  630         else if (!strcmp(action, "panic"))
  631                 root_mount_onfail = A_PANIC;
  632         else if (!strcmp(action, "reboot"))
  633                 root_mount_onfail = A_REBOOT;
  634         else if (!strcmp(action, "retry"))
  635                 root_mount_onfail = A_RETRY;
  636         else {
  637                 printf("rootmount: %s: unknown action\n", action);
  638                 error = EINVAL;
  639         }
  640 
  641         free(action, M_TEMP);
  642         return (0);
  643 }
  644 
  645 static int
  646 parse_dir_timeout(char **conf)
  647 {
  648         char *tok, *endtok;
  649         long secs;
  650         int error;
  651 
  652         error = parse_token(conf, &tok);
  653         if (error)
  654                 return (error);
  655 
  656         secs = strtol(tok, &endtok, 0);
  657         error = (secs < 0 || *endtok != '\0') ? EINVAL : 0;
  658         if (!error)
  659                 root_mount_timeout = secs;
  660         free(tok, M_TEMP);
  661         return (error);
  662 }
  663 
  664 static int
  665 parse_directive(char **conf)
  666 {
  667         char *dir;
  668         int error;
  669 
  670         error = parse_token(conf, &dir);
  671         if (error)
  672                 return (error);
  673 
  674         if (strcmp(dir, ".ask") == 0)
  675                 error = parse_dir_ask(conf);
  676         else if (strcmp(dir, ".md") == 0)
  677                 error = parse_dir_md(conf);
  678         else if (strcmp(dir, ".onfail") == 0)
  679                 error = parse_dir_onfail(conf);
  680         else if (strcmp(dir, ".timeout") == 0)
  681                 error = parse_dir_timeout(conf);
  682         else {
  683                 printf("mountroot: invalid directive `%s'\n", dir);
  684                 /* Ignore the rest of the line. */
  685                 (void)parse_skipto(conf, '\n');
  686                 error = EINVAL;
  687         }
  688         free(dir, M_TEMP);
  689         return (error);
  690 }
  691 
  692 static int
  693 parse_mount_dev_present(const char *dev)
  694 {
  695         struct nameidata nd;
  696         int error;
  697 
  698         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, dev, curthread);
  699         error = namei(&nd);
  700         if (!error)
  701                 vput(nd.ni_vp);
  702         NDFREE(&nd, NDF_ONLY_PNBUF);
  703         return (error != 0) ? 0 : 1;
  704 }
  705 
  706 #define ERRMSGL 255
  707 static int
  708 parse_mount(char **conf)
  709 {
  710         char *errmsg;
  711         struct mntarg *ma;
  712         char *dev, *fs, *opts, *tok;
  713         int delay, error, timeout;
  714 
  715         error = parse_token(conf, &tok);
  716         if (error)
  717                 return (error);
  718         fs = tok;
  719         error = parse_skipto(&tok, ':');
  720         if (error) {
  721                 free(fs, M_TEMP);
  722                 return (error);
  723         }
  724         parse_poke(&tok, '\0');
  725         parse_advance(&tok);
  726         dev = tok;
  727 
  728         if (root_mount_mddev != -1) {
  729                 /* Handle substitution for the md unit number. */
  730                 tok = strstr(dev, "md#");
  731                 if (tok != NULL)
  732                         tok[2] = '' + root_mount_mddev;
  733         }
  734 
  735         /* Parse options. */
  736         error = parse_token(conf, &tok);
  737         opts = (error == 0) ? tok : NULL;
  738 
  739         printf("Trying to mount root from %s:%s [%s]...\n", fs, dev,
  740             (opts != NULL) ? opts : "");
  741 
  742         errmsg = malloc(ERRMSGL, M_TEMP, M_WAITOK | M_ZERO);
  743 
  744         if (vfs_byname(fs) == NULL) {
  745                 strlcpy(errmsg, "unknown file system", ERRMSGL);
  746                 error = ENOENT;
  747                 goto out;
  748         }
  749 
  750         error = vfs_mountroot_wait_if_neccessary(fs, dev);
  751         if (error != 0)
  752                 goto out;
  753 
  754         delay = hz / 10;
  755         timeout = root_mount_timeout * hz;
  756 
  757         for (;;) {
  758                 ma = NULL;
  759                 ma = mount_arg(ma, "fstype", fs, -1);
  760                 ma = mount_arg(ma, "fspath", "/", -1);
  761                 ma = mount_arg(ma, "from", dev, -1);
  762                 ma = mount_arg(ma, "errmsg", errmsg, ERRMSGL);
  763                 ma = mount_arg(ma, "ro", NULL, 0);
  764                 ma = parse_mountroot_options(ma, opts);
  765 
  766                 error = kernel_mount(ma, MNT_ROOTFS);
  767                 if (error == 0 || timeout <= 0)
  768                         break;
  769 
  770                 if (root_mount_timeout * hz == timeout ||
  771                     (bootverbose && timeout % hz == 0)) {
  772                         printf("Mounting from %s:%s failed with error %d; "
  773                             "retrying for %d more second%s\n", fs, dev, error,
  774                             timeout / hz, (timeout / hz > 1) ? "s" : "");
  775                 }
  776                 pause("rmretry", delay);
  777                 timeout -= delay;
  778         }
  779  out:
  780         if (error) {
  781                 printf("Mounting from %s:%s failed with error %d",
  782                     fs, dev, error);
  783                 if (errmsg[0] != '\0')
  784                         printf(": %s", errmsg);
  785                 printf(".\n");
  786         }
  787         free(fs, M_TEMP);
  788         free(errmsg, M_TEMP);
  789         if (opts != NULL)
  790                 free(opts, M_TEMP);
  791         /* kernel_mount can return -1 on error. */
  792         return ((error < 0) ? EDOOFUS : error);
  793 }
  794 #undef ERRMSGL
  795 
  796 static int
  797 vfs_mountroot_parse(struct sbuf *sb, struct mount *mpdevfs)
  798 {
  799         struct mount *mp;
  800         char *conf;
  801         int error;
  802 
  803         root_mount_mddev = -1;
  804 
  805 retry:
  806         conf = sbuf_data(sb);
  807         mp = TAILQ_NEXT(mpdevfs, mnt_list);
  808         error = (mp == NULL) ? 0 : EDOOFUS;
  809         root_mount_onfail = A_CONTINUE;
  810         while (mp == NULL) {
  811                 error = parse_skipto(&conf, CC_NONWHITESPACE);
  812                 if (error == PE_EOL) {
  813                         parse_advance(&conf);
  814                         continue;
  815                 }
  816                 if (error < 0)
  817                         break;
  818                 switch (parse_peek(&conf)) {
  819                 case '#':
  820                         error = parse_skipto(&conf, '\n');
  821                         break;
  822                 case '.':
  823                         error = parse_directive(&conf);
  824                         break;
  825                 default:
  826                         error = parse_mount(&conf);
  827                         if (error == -1) {
  828                                 printf("mountroot: invalid file system "
  829                                     "specification.\n");
  830                                 error = 0;
  831                         }
  832                         break;
  833                 }
  834                 if (error < 0)
  835                         break;
  836                 /* Ignore any trailing garbage on the line. */
  837                 if (parse_peek(&conf) != '\n') {
  838                         printf("mountroot: advancing to next directive...\n");
  839                         (void)parse_skipto(&conf, '\n');
  840                 }
  841                 mp = TAILQ_NEXT(mpdevfs, mnt_list);
  842         }
  843         if (mp != NULL)
  844                 return (0);
  845 
  846         /*
  847          * We failed to mount (a new) root.
  848          */
  849         switch (root_mount_onfail) {
  850         case A_CONTINUE:
  851                 break;
  852         case A_PANIC:
  853                 panic("mountroot: unable to (re-)mount root.");
  854                 /* NOTREACHED */
  855         case A_RETRY:
  856                 goto retry;
  857         case A_REBOOT:
  858                 kern_reboot(RB_NOSYNC);
  859                 /* NOTREACHED */
  860         }
  861 
  862         return (error);
  863 }
  864 
  865 static void
  866 vfs_mountroot_conf0(struct sbuf *sb)
  867 {
  868         char *s, *tok, *mnt, *opt;
  869         int error;
  870 
  871         sbuf_printf(sb, ".onfail panic\n");
  872         sbuf_printf(sb, ".timeout %d\n", root_mount_timeout);
  873         if (boothowto & RB_ASKNAME)
  874                 sbuf_printf(sb, ".ask\n");
  875 #ifdef ROOTDEVNAME
  876         if (boothowto & RB_DFLTROOT)
  877                 sbuf_printf(sb, "%s\n", ROOTDEVNAME);
  878 #endif
  879         if (boothowto & RB_CDROM) {
  880                 sbuf_printf(sb, "cd9660:/dev/cd0 ro\n");
  881                 sbuf_printf(sb, ".timeout 0\n");
  882                 sbuf_printf(sb, "cd9660:/dev/cd1 ro\n");
  883                 sbuf_printf(sb, ".timeout %d\n", root_mount_timeout);
  884         }
  885         s = kern_getenv("vfs.root.mountfrom");
  886         if (s != NULL) {
  887                 opt = kern_getenv("vfs.root.mountfrom.options");
  888                 tok = s;
  889                 error = parse_token(&tok, &mnt);
  890                 while (!error) {
  891                         sbuf_printf(sb, "%s %s\n", mnt,
  892                             (opt != NULL) ? opt : "");
  893                         free(mnt, M_TEMP);
  894                         error = parse_token(&tok, &mnt);
  895                 }
  896                 if (opt != NULL)
  897                         freeenv(opt);
  898                 freeenv(s);
  899         }
  900         if (rootdevnames[0] != NULL)
  901                 sbuf_printf(sb, "%s\n", rootdevnames[0]);
  902         if (rootdevnames[1] != NULL)
  903                 sbuf_printf(sb, "%s\n", rootdevnames[1]);
  904 #ifdef ROOTDEVNAME
  905         if (!(boothowto & RB_DFLTROOT))
  906                 sbuf_printf(sb, "%s\n", ROOTDEVNAME);
  907 #endif
  908         if (!(boothowto & RB_ASKNAME))
  909                 sbuf_printf(sb, ".ask\n");
  910 }
  911 
  912 static int
  913 vfs_mountroot_readconf(struct thread *td, struct sbuf *sb)
  914 {
  915         static char buf[128];
  916         struct nameidata nd;
  917         off_t ofs;
  918         ssize_t resid;
  919         int error, flags, len;
  920 
  921         NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/.mount.conf", td);
  922         flags = FREAD;
  923         error = vn_open(&nd, &flags, 0, NULL);
  924         if (error)
  925                 return (error);
  926 
  927         NDFREE(&nd, NDF_ONLY_PNBUF);
  928         ofs = 0;
  929         len = sizeof(buf) - 1;
  930         while (1) {
  931                 error = vn_rdwr(UIO_READ, nd.ni_vp, buf, len, ofs,
  932                     UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
  933                     NOCRED, &resid, td);
  934                 if (error)
  935                         break;
  936                 if (resid == len)
  937                         break;
  938                 buf[len - resid] = 0;
  939                 sbuf_printf(sb, "%s", buf);
  940                 ofs += len - resid;
  941         }
  942 
  943         VOP_UNLOCK(nd.ni_vp, 0);
  944         vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
  945         return (error);
  946 }
  947 
  948 static void
  949 vfs_mountroot_wait(void)
  950 {
  951         struct root_hold_token *h;
  952         struct timeval lastfail;
  953         int curfail;
  954 
  955         curfail = 0;
  956         while (1) {
  957                 DROP_GIANT();
  958                 g_waitidle();
  959                 PICKUP_GIANT();
  960                 mtx_lock(&root_holds_mtx);
  961                 if (LIST_EMPTY(&root_holds)) {
  962                         mtx_unlock(&root_holds_mtx);
  963                         break;
  964                 }
  965                 if (ppsratecheck(&lastfail, &curfail, 1)) {
  966                         printf("Root mount waiting for:");
  967                         LIST_FOREACH(h, &root_holds, list)
  968                                 printf(" %s", h->who);
  969                         printf("\n");
  970                 }
  971                 msleep(&root_holds, &root_holds_mtx, PZERO | PDROP, "roothold",
  972                     hz);
  973         }
  974 }
  975 
  976 static int
  977 vfs_mountroot_wait_if_neccessary(const char *fs, const char *dev)
  978 {
  979         int delay, timeout;
  980 
  981         /*
  982          * In case of ZFS and NFS we don't have a way to wait for
  983          * specific device.  Also do the wait if the user forced that
  984          * behaviour by setting vfs.root_mount_always_wait=1.
  985          */
  986         if (strcmp(fs, "zfs") == 0 || strstr(fs, "nfs") != NULL ||
  987             dev[0] == '\0' || root_mount_always_wait != 0) {
  988                 vfs_mountroot_wait();
  989                 return (0);
  990         }
  991 
  992         /*
  993          * Otherwise, no point in waiting if the device is already there.
  994          * Note that we must wait for GEOM to finish reconfiguring itself,
  995          * eg for geom_part(4) to finish tasting.
  996          */
  997         DROP_GIANT();
  998         g_waitidle();
  999         PICKUP_GIANT();
 1000         if (parse_mount_dev_present(dev))
 1001                 return (0);
 1002 
 1003         /*
 1004          * No luck.  Let's wait.  This code looks weird, but it's that way
 1005          * to behave exactly as it used to work before.
 1006          */
 1007         vfs_mountroot_wait();
 1008         printf("mountroot: waiting for device %s...\n", dev);
 1009         delay = hz / 10;
 1010         timeout = root_mount_timeout * hz;
 1011         do {
 1012                 pause("rmdev", delay);
 1013                 timeout -= delay;
 1014         } while (timeout > 0 && !parse_mount_dev_present(dev));
 1015 
 1016         if (timeout <= 0)
 1017                 return (ENODEV);
 1018 
 1019         return (0);
 1020 }
 1021 
 1022 void
 1023 vfs_mountroot(void)
 1024 {
 1025         struct mount *mp;
 1026         struct sbuf *sb;
 1027         struct thread *td;
 1028         time_t timebase;
 1029         int error;
 1030 
 1031         td = curthread;
 1032 
 1033         sb = sbuf_new_auto();
 1034         vfs_mountroot_conf0(sb);
 1035         sbuf_finish(sb);
 1036 
 1037         error = vfs_mountroot_devfs(td, &mp);
 1038         while (!error) {
 1039                 error = vfs_mountroot_parse(sb, mp);
 1040                 if (!error) {
 1041                         vfs_mountroot_shuffle(td, mp);
 1042                         sbuf_clear(sb);
 1043                         error = vfs_mountroot_readconf(td, sb);
 1044                         sbuf_finish(sb);
 1045                 }
 1046         }
 1047 
 1048         sbuf_delete(sb);
 1049 
 1050         /*
 1051          * Iterate over all currently mounted file systems and use
 1052          * the time stamp found to check and/or initialize the RTC.
 1053          * Call inittodr() only once and pass it the largest of the
 1054          * timestamps we encounter.
 1055          */
 1056         timebase = 0;
 1057         mtx_lock(&mountlist_mtx);
 1058         mp = TAILQ_FIRST(&mountlist);
 1059         while (mp != NULL) {
 1060                 if (mp->mnt_time > timebase)
 1061                         timebase = mp->mnt_time;
 1062                 mp = TAILQ_NEXT(mp, mnt_list);
 1063         }
 1064         mtx_unlock(&mountlist_mtx);
 1065         inittodr(timebase);
 1066 
 1067         /* Keep prison0's root in sync with the global rootvnode. */
 1068         mtx_lock(&prison0.pr_mtx);
 1069         prison0.pr_root = rootvnode;
 1070         vref(prison0.pr_root);
 1071         mtx_unlock(&prison0.pr_mtx);
 1072 
 1073         mtx_lock(&root_holds_mtx);
 1074         atomic_store_rel_int(&root_mount_complete, 1);
 1075         wakeup(&root_mount_complete);
 1076         mtx_unlock(&root_holds_mtx);
 1077 
 1078         EVENTHANDLER_INVOKE(mountroot);
 1079 }
 1080 
 1081 static struct mntarg *
 1082 parse_mountroot_options(struct mntarg *ma, const char *options)
 1083 {
 1084         char *p;
 1085         char *name, *name_arg;
 1086         char *val, *val_arg;
 1087         char *opts;
 1088 
 1089         if (options == NULL || options[0] == '\0')
 1090                 return (ma);
 1091 
 1092         p = opts = strdup(options, M_MOUNT);
 1093         if (opts == NULL) {
 1094                 return (ma);
 1095         }
 1096 
 1097         while((name = strsep(&p, ",")) != NULL) {
 1098                 if (name[0] == '\0')
 1099                         break;
 1100 
 1101                 val = strchr(name, '=');
 1102                 if (val != NULL) {
 1103                         *val = '\0';
 1104                         ++val;
 1105                 }
 1106                 if( strcmp(name, "rw") == 0 ||
 1107                     strcmp(name, "noro") == 0) {
 1108                         /*
 1109                          * The first time we mount the root file system,
 1110                          * we need to mount 'ro', so We need to ignore
 1111                          * 'rw' and 'noro' mount options.
 1112                          */
 1113                         continue;
 1114                 }
 1115                 name_arg = strdup(name, M_MOUNT);
 1116                 val_arg = NULL;
 1117                 if (val != NULL)
 1118                         val_arg = strdup(val, M_MOUNT);
 1119 
 1120                 ma = mount_arg(ma, name_arg, val_arg,
 1121                     (val_arg != NULL ? -1 : 0));
 1122         }
 1123         free(opts, M_MOUNT);
 1124         return (ma);
 1125 }

Cache object: d65da242d200561c6f2be34ac38bfafc


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.