The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_mount.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1999-2004 Poul-Henning Kamp
    3  * Copyright (c) 1999 Michael Smith
    4  * Copyright (c) 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/6.0/sys/kern/vfs_mount.c 150744 2005-09-30 06:10:59Z delphij $");
   39 
   40 #include <sys/param.h>
   41 #include <sys/conf.h>
   42 #include <sys/jail.h>
   43 #include <sys/kernel.h>
   44 #include <sys/libkern.h>
   45 #include <sys/mac.h>
   46 #include <sys/malloc.h>
   47 #include <sys/mount.h>
   48 #include <sys/mutex.h>
   49 #include <sys/namei.h>
   50 #include <sys/proc.h>
   51 #include <sys/filedesc.h>
   52 #include <sys/reboot.h>
   53 #include <sys/syscallsubr.h>
   54 #include <sys/sysproto.h>
   55 #include <sys/sx.h>
   56 #include <sys/sysctl.h>
   57 #include <sys/sysent.h>
   58 #include <sys/systm.h>
   59 #include <sys/vnode.h>
   60 
   61 #include <geom/geom.h>
   62 
   63 #include <machine/stdarg.h>
   64 
   65 #include "opt_rootdevname.h"
   66 #include "opt_ddb.h"
   67 #include "opt_mac.h"
   68 
   69 #ifdef DDB
   70 #include <ddb/ddb.h>
   71 #endif
   72 
   73 #define ROOTNAME                "root_device"
   74 #define VFS_MOUNTARG_SIZE_MAX   (1024 * 64)
   75 
   76 static int      vfs_domount(struct thread *td, const char *fstype,
   77                     char *fspath, int fsflags, void *fsdata);
   78 static int      vfs_mount_alloc(struct vnode *dvp, struct vfsconf *vfsp,
   79                     const char *fspath, struct thread *td, struct mount **mpp);
   80 static int      vfs_mountroot_ask(void);
   81 static int      vfs_mountroot_try(const char *mountfrom);
   82 static int      vfs_donmount(struct thread *td, int fsflags,
   83                     struct uio *fsoptions);
   84 static void     free_mntarg(struct mntarg *ma);
   85 static void     vfs_mount_destroy(struct mount *, struct thread *);
   86 
   87 static int      usermount = 0;
   88 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
   89     "Unprivileged users may mount and unmount file systems");
   90 
   91 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
   92 
   93 /* List of mounted filesystems. */
   94 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
   95 
   96 /* For any iteration/modification of mountlist */
   97 struct mtx mountlist_mtx;
   98 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF);
   99 
  100 TAILQ_HEAD(vfsoptlist, vfsopt);
  101 struct vfsopt {
  102         TAILQ_ENTRY(vfsopt) link;
  103         char    *name;
  104         void    *value;
  105         int     len;
  106 };
  107 
  108 /*
  109  * The vnode of the system's root (/ in the filesystem, without chroot
  110  * active.)
  111  */
  112 struct vnode    *rootvnode;
  113 
  114 /*
  115  * The root filesystem is detailed in the kernel environment variable
  116  * vfs.root.mountfrom, which is expected to be in the general format
  117  *
  118  * <vfsname>:[<path>]
  119  * vfsname   := the name of a VFS known to the kernel and capable
  120  *              of being mounted as root
  121  * path      := disk device name or other data used by the filesystem
  122  *              to locate its physical store
  123  */
  124 
  125 /*
  126  * Global opts, taken by all filesystems
  127  */
  128 static const char *global_opts[] = {
  129         "fstype",
  130         "fspath",
  131         "ro",
  132         "suid",
  133         "exec",
  134         NULL
  135 };
  136 
  137 /*
  138  * The root specifiers we will try if RB_CDROM is specified.
  139  */
  140 static char *cdrom_rootdevnames[] = {
  141         "cd9660:cd0",
  142         "cd9660:acd0",
  143         NULL
  144 };
  145 
  146 /* legacy find-root code */
  147 char            *rootdevnames[2] = {NULL, NULL};
  148 #ifndef ROOTDEVNAME
  149 #  define ROOTDEVNAME NULL
  150 #endif
  151 static const char       *ctrootdevname = ROOTDEVNAME;
  152 
  153 /*
  154  * ---------------------------------------------------------------------
  155  * Functions for building and sanitizing the mount options
  156  */
  157 
  158 /* Remove one mount option. */
  159 static void
  160 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
  161 {
  162 
  163         TAILQ_REMOVE(opts, opt, link);
  164         free(opt->name, M_MOUNT);
  165         if (opt->value != NULL)
  166                 free(opt->value, M_MOUNT);
  167 #ifdef INVARIANTS
  168         else if (opt->len != 0)
  169                 panic("%s: mount option with NULL value but length != 0",
  170                     __func__);
  171 #endif
  172         free(opt, M_MOUNT);
  173 }
  174 
  175 /* Release all resources related to the mount options. */
  176 static void
  177 vfs_freeopts(struct vfsoptlist *opts)
  178 {
  179         struct vfsopt *opt;
  180 
  181         while (!TAILQ_EMPTY(opts)) {
  182                 opt = TAILQ_FIRST(opts);
  183                 vfs_freeopt(opts, opt);
  184         }
  185         free(opts, M_MOUNT);
  186 }
  187 
  188 /*
  189  * Check if options are equal (with or without the "no" prefix).
  190  */
  191 static int
  192 vfs_equalopts(const char *opt1, const char *opt2)
  193 {
  194 
  195         /* "opt" vs. "opt" or "noopt" vs. "noopt" */
  196         if (strcmp(opt1, opt2) == 0)
  197                 return (1);
  198         /* "noopt" vs. "opt" */
  199         if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
  200                 return (1);
  201         /* "opt" vs. "noopt" */
  202         if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
  203                 return (1);
  204         return (0);
  205 }
  206 
  207 /*
  208  * If a mount option is specified several times,
  209  * (with or without the "no" prefix) only keep
  210  * the last occurence of it.
  211  */
  212 static void
  213 vfs_sanitizeopts(struct vfsoptlist *opts)
  214 {
  215         struct vfsopt *opt, *opt2, *tmp;
  216 
  217         TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
  218                 opt2 = TAILQ_PREV(opt, vfsoptlist, link);
  219                 while (opt2 != NULL) {
  220                         if (vfs_equalopts(opt->name, opt2->name)) {
  221                                 tmp = TAILQ_PREV(opt2, vfsoptlist, link);
  222                                 vfs_freeopt(opts, opt2);
  223                                 opt2 = tmp;
  224                         } else {
  225                                 opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
  226                         }
  227                 }
  228         }
  229 }
  230 
  231 /*
  232  * Build a linked list of mount options from a struct uio.
  233  */
  234 static int
  235 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
  236 {
  237         struct vfsoptlist *opts;
  238         struct vfsopt *opt;
  239         size_t memused;
  240         unsigned int i, iovcnt;
  241         int error, namelen, optlen;
  242 
  243         opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
  244         TAILQ_INIT(opts);
  245         memused = 0;
  246         iovcnt = auio->uio_iovcnt;
  247         for (i = 0; i < iovcnt; i += 2) {
  248                 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  249                 namelen = auio->uio_iov[i].iov_len;
  250                 optlen = auio->uio_iov[i + 1].iov_len;
  251                 opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
  252                 opt->value = NULL;
  253                 opt->len = 0;
  254 
  255                 /*
  256                  * Do this early, so jumps to "bad" will free the current
  257                  * option.
  258                  */
  259                 TAILQ_INSERT_TAIL(opts, opt, link);
  260                 memused += sizeof(struct vfsopt) + optlen + namelen;
  261 
  262                 /*
  263                  * Avoid consuming too much memory, and attempts to overflow
  264                  * memused.
  265                  */
  266                 if (memused > VFS_MOUNTARG_SIZE_MAX ||
  267                     optlen > VFS_MOUNTARG_SIZE_MAX ||
  268                     namelen > VFS_MOUNTARG_SIZE_MAX) {
  269                         error = EINVAL;
  270                         goto bad;
  271                 }
  272 
  273                 if (auio->uio_segflg == UIO_SYSSPACE) {
  274                         bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
  275                 } else {
  276                         error = copyin(auio->uio_iov[i].iov_base, opt->name,
  277                             namelen);
  278                         if (error)
  279                                 goto bad;
  280                 }
  281                 /* Ensure names are null-terminated strings. */
  282                 if (opt->name[namelen - 1] != '\0') {
  283                         error = EINVAL;
  284                         goto bad;
  285                 }
  286                 if (optlen != 0) {
  287                         opt->len = optlen;
  288                         opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
  289                         if (auio->uio_segflg == UIO_SYSSPACE) {
  290                                 bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
  291                                     optlen);
  292                         } else {
  293                                 error = copyin(auio->uio_iov[i + 1].iov_base,
  294                                     opt->value, optlen);
  295                                 if (error)
  296                                         goto bad;
  297                         }
  298                 }
  299         }
  300         vfs_sanitizeopts(opts);
  301         *options = opts;
  302         return (0);
  303 bad:
  304         vfs_freeopts(opts);
  305         return (error);
  306 }
  307 
  308 /*
  309  * Merge the old mount options with the new ones passed
  310  * in the MNT_UPDATE case.
  311  */
  312 static void
  313 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
  314 {
  315         struct vfsopt *opt, *opt2, *new;
  316 
  317         TAILQ_FOREACH(opt, opts, link) {
  318                 /*
  319                  * Check that this option hasn't been redefined
  320                  * nor cancelled with a "no" mount option.
  321                  */
  322                 opt2 = TAILQ_FIRST(toopts);
  323                 while (opt2 != NULL) {
  324                         if (strcmp(opt2->name, opt->name) == 0)
  325                                 goto next;
  326                         if (strncmp(opt2->name, "no", 2) == 0 &&
  327                             strcmp(opt2->name + 2, opt->name) == 0) {
  328                                 vfs_freeopt(toopts, opt2);
  329                                 goto next;
  330                         }
  331                         opt2 = TAILQ_NEXT(opt2, link);
  332                 }
  333                 /* We want this option, duplicate it. */
  334                 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  335                 new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
  336                 strcpy(new->name, opt->name);
  337                 if (opt->len != 0) {
  338                         new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
  339                         bcopy(opt->value, new->value, opt->len);
  340                 } else {
  341                         new->value = NULL;
  342                 }
  343                 new->len = opt->len;
  344                 TAILQ_INSERT_TAIL(toopts, new, link);
  345 next:
  346                 continue;
  347         }
  348 }
  349 
  350 /*
  351  * ---------------------------------------------------------------------
  352  * Mount a filesystem
  353  */
  354 int
  355 nmount(td, uap)
  356         struct thread *td;
  357         struct nmount_args /* {
  358                 struct iovec *iovp;
  359                 unsigned int iovcnt;
  360                 int flags;
  361         } */ *uap;
  362 {
  363         struct uio *auio;
  364         struct iovec *iov;
  365         unsigned int i;
  366         int error;
  367         u_int iovcnt;
  368 
  369         /* Kick out MNT_ROOTFS early as it is legal internally */
  370         if (uap->flags & MNT_ROOTFS)
  371                 return (EINVAL);
  372 
  373         iovcnt = uap->iovcnt;
  374         /*
  375          * Check that we have an even number of iovec's
  376          * and that we have at least two options.
  377          */
  378         if ((iovcnt & 1) || (iovcnt < 4))
  379                 return (EINVAL);
  380 
  381         error = copyinuio(uap->iovp, iovcnt, &auio);
  382         if (error)
  383                 return (error);
  384         iov = auio->uio_iov;
  385         for (i = 0; i < iovcnt; i++) {
  386                 if (iov->iov_len > MMAXOPTIONLEN) {
  387                         free(auio, M_IOV);
  388                         return (EINVAL);
  389                 }
  390                 iov++;
  391         }
  392         error = vfs_donmount(td, uap->flags, auio);
  393         free(auio, M_IOV);
  394         return (error);
  395 }
  396 
  397 /*
  398  * ---------------------------------------------------------------------
  399  * Various utility functions
  400  */
  401 
  402 /*
  403  * Allocate and initialize the mount point struct.
  404  */
  405 static int
  406 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp,
  407     const char *fspath, struct thread *td, struct mount **mpp)
  408 {
  409         struct mount *mp;
  410 
  411         mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
  412         TAILQ_INIT(&mp->mnt_nvnodelist);
  413         mp->mnt_nvnodelistsize = 0;
  414         mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
  415         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
  416         vfs_busy(mp, LK_NOWAIT, 0, td);
  417         mp->mnt_op = vfsp->vfc_vfsops;
  418         mp->mnt_vfc = vfsp;
  419         vfsp->vfc_refcount++;
  420         mp->mnt_stat.f_type = vfsp->vfc_typenum;
  421         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
  422         strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
  423         mp->mnt_vnodecovered = vp;
  424         mp->mnt_cred = crdup(td->td_ucred);
  425         mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
  426         strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
  427         mp->mnt_iosize_max = DFLTPHYS;
  428 #ifdef MAC
  429         mac_init_mount(mp);
  430         mac_create_mount(td->td_ucred, mp);
  431 #endif
  432         arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
  433         *mpp = mp;
  434         return (0);
  435 }
  436 
  437 /*
  438  * Destroy the mount struct previously allocated by vfs_mount_alloc().
  439  */
  440 static void
  441 vfs_mount_destroy(struct mount *mp, struct thread *td)
  442 {
  443 
  444         mp->mnt_vfc->vfc_refcount--;
  445         if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
  446                 panic("unmount: dangling vnode");
  447         vfs_unbusy(mp,td);
  448         lockdestroy(&mp->mnt_lock);
  449         MNT_ILOCK(mp);
  450         if (mp->mnt_kern_flag & MNTK_MWAIT)
  451                 wakeup(mp);
  452         MNT_IUNLOCK(mp);
  453         mtx_destroy(&mp->mnt_mtx);
  454 #ifdef MAC
  455         mac_destroy_mount(mp);
  456 #endif
  457         if (mp->mnt_opt != NULL)
  458                 vfs_freeopts(mp->mnt_opt);
  459         crfree(mp->mnt_cred);
  460         free(mp, M_MOUNT);
  461 }
  462 
  463 static int
  464 vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions)
  465 {
  466         struct vfsoptlist *optlist;
  467         char *fstype, *fspath;
  468         int error, fstypelen, fspathlen;
  469 
  470         error = vfs_buildopts(fsoptions, &optlist);
  471         if (error)
  472                 return (error);
  473 
  474         /*
  475          * We need these two options before the others,
  476          * and they are mandatory for any filesystem.
  477          * Ensure they are NUL terminated as well.
  478          */
  479         fstypelen = 0;
  480         error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
  481         if (error || fstype[fstypelen - 1] != '\0') {
  482                 error = EINVAL;
  483                 goto bail;
  484         }
  485         fspathlen = 0;
  486         error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
  487         if (error || fspath[fspathlen - 1] != '\0') {
  488                 error = EINVAL;
  489                 goto bail;
  490         }
  491 
  492         /*
  493          * Be ultra-paranoid about making sure the type and fspath
  494          * variables will fit in our mp buffers, including the
  495          * terminating NUL.
  496          */
  497         if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
  498                 error = ENAMETOOLONG;
  499                 goto bail;
  500         }
  501 
  502         mtx_lock(&Giant);
  503         error = vfs_domount(td, fstype, fspath, fsflags, optlist);
  504         mtx_unlock(&Giant);
  505 bail:
  506         if (error)
  507                 vfs_freeopts(optlist);
  508         return (error);
  509 }
  510 
  511 /*
  512  * ---------------------------------------------------------------------
  513  * Old mount API.
  514  */
  515 #ifndef _SYS_SYSPROTO_H_
  516 struct mount_args {
  517         char    *type;
  518         char    *path;
  519         int     flags;
  520         caddr_t data;
  521 };
  522 #endif
  523 /* ARGSUSED */
  524 int
  525 mount(td, uap)
  526         struct thread *td;
  527         struct mount_args /* {
  528                 char *type;
  529                 char *path;
  530                 int flags;
  531                 caddr_t data;
  532         } */ *uap;
  533 {
  534         char *fstype;
  535         struct vfsconf *vfsp = NULL;
  536         struct mntarg *ma = NULL;
  537         int error;
  538 
  539         /* Kick out MNT_ROOTFS early as it is legal internally */
  540         uap->flags &= ~MNT_ROOTFS;
  541 
  542         if (uap->data == NULL)
  543                 return (EINVAL);
  544 
  545         fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
  546         error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
  547         if (!error) {
  548                 mtx_lock(&Giant);       /* XXX ? */
  549                 vfsp = vfs_byname_kld(fstype, td, &error);
  550                 mtx_unlock(&Giant);
  551         }
  552         free(fstype, M_TEMP);
  553         if (error)
  554                 return (error);
  555         if (vfsp == NULL)
  556                 return (ENOENT);
  557         if (vfsp->vfc_vfsops->vfs_cmount == NULL)
  558                 return (EOPNOTSUPP);
  559 
  560         ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN);
  561         ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
  562         ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro");
  563         ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid");
  564         ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec");
  565 
  566         error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td);
  567         return (error);
  568 }
  569 
  570 
  571 /*
  572  * vfs_domount(): actually attempt a filesystem mount.
  573  */
  574 static int
  575 vfs_domount(
  576         struct thread *td,      /* Flags common to all filesystems. */
  577         const char *fstype,     /* Filesystem type. */
  578         char *fspath,           /* Mount path. */
  579         int fsflags,            /* Flags common to all filesystems. */
  580         void *fsdata            /* Options local to the filesystem. */
  581         )
  582 {
  583         struct vnode *vp;
  584         struct mount *mp;
  585         struct vfsconf *vfsp;
  586         int error, flag = 0, kern_flag = 0;
  587         struct vattr va;
  588         struct nameidata nd;
  589 
  590         mtx_assert(&Giant, MA_OWNED);
  591 
  592         /*
  593          * Be ultra-paranoid about making sure the type and fspath
  594          * variables will fit in our mp buffers, including the
  595          * terminating NUL.
  596          */
  597         if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
  598                 return (ENAMETOOLONG);
  599 
  600         if (jailed(td->td_ucred))
  601                 return (EPERM);
  602         if (usermount == 0) {
  603                 if ((error = suser(td)) != 0)
  604                         return (error);
  605         }
  606 
  607         /*
  608          * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
  609          */
  610         if (fsflags & (MNT_EXPORTED | MNT_SUIDDIR)) {
  611                 if ((error = suser(td)) != 0)
  612                         return (error);
  613         }
  614         /*
  615          * Silently enforce MNT_NOSUID and MNT_USER for
  616          * unprivileged users.
  617          */
  618         if (suser(td) != 0)
  619                 fsflags |= MNT_NOSUID | MNT_USER;
  620         /*
  621          * Get vnode to be covered
  622          */
  623         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
  624         if ((error = namei(&nd)) != 0)
  625                 return (error);
  626         NDFREE(&nd, NDF_ONLY_PNBUF);
  627         vp = nd.ni_vp;
  628         if (fsflags & MNT_UPDATE) {
  629                 if ((vp->v_vflag & VV_ROOT) == 0) {
  630                         vput(vp);
  631                         return (EINVAL);
  632                 }
  633                 mp = vp->v_mount;
  634                 flag = mp->mnt_flag;
  635                 kern_flag = mp->mnt_kern_flag;
  636                 /*
  637                  * We only allow the filesystem to be reloaded if it
  638                  * is currently mounted read-only.
  639                  */
  640                 if ((fsflags & MNT_RELOAD) &&
  641                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
  642                         vput(vp);
  643                         return (EOPNOTSUPP);    /* Needs translation */
  644                 }
  645                 /*
  646                  * Only privileged root, or (if MNT_USER is set) the user that
  647                  * did the original mount is permitted to update it.
  648                  */
  649                 error = vfs_suser(mp, td);
  650                 if (error) {
  651                         vput(vp);
  652                         return (error);
  653                 }
  654                 if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
  655                         vput(vp);
  656                         return (EBUSY);
  657                 }
  658                 VI_LOCK(vp);
  659                 if ((vp->v_iflag & VI_MOUNT) != 0 ||
  660                     vp->v_mountedhere != NULL) {
  661                         VI_UNLOCK(vp);
  662                         vfs_unbusy(mp, td);
  663                         vput(vp);
  664                         return (EBUSY);
  665                 }
  666                 vp->v_iflag |= VI_MOUNT;
  667                 VI_UNLOCK(vp);
  668                 mp->mnt_flag |= fsflags &
  669                     (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS);
  670                 VOP_UNLOCK(vp, 0, td);
  671                 mp->mnt_optnew = fsdata;
  672                 vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
  673         } else {
  674                 /*
  675                  * If the user is not root, ensure that they own the directory
  676                  * onto which we are attempting to mount.
  677                  */
  678                 error = VOP_GETATTR(vp, &va, td->td_ucred, td);
  679                 if (error) {
  680                         vput(vp);
  681                         return (error);
  682                 }
  683                 if (va.va_uid != td->td_ucred->cr_uid) {
  684                         if ((error = suser(td)) != 0) {
  685                                 vput(vp);
  686                                 return (error);
  687                         }
  688                 }
  689                 error = vinvalbuf(vp, V_SAVE, td, 0, 0);
  690                 if (error != 0) {
  691                         vput(vp);
  692                         return (error);
  693                 }
  694                 if (vp->v_type != VDIR) {
  695                         vput(vp);
  696                         return (ENOTDIR);
  697                 }
  698                 vfsp = vfs_byname_kld(fstype, td, &error);
  699                 if (vfsp == NULL) {
  700                         vput(vp);
  701                         return (error);
  702                 }
  703                 VI_LOCK(vp);
  704                 if ((vp->v_iflag & VI_MOUNT) != 0 ||
  705                     vp->v_mountedhere != NULL) {
  706                         VI_UNLOCK(vp);
  707                         vput(vp);
  708                         return (EBUSY);
  709                 }
  710                 vp->v_iflag |= VI_MOUNT;
  711                 VI_UNLOCK(vp);
  712 
  713                 /*
  714                  * Allocate and initialize the filesystem.
  715                  */
  716                 error = vfs_mount_alloc(vp, vfsp, fspath, td, &mp);
  717                 if (error) {
  718                         vput(vp);
  719                         return (error);
  720                 }
  721                 VOP_UNLOCK(vp, 0, td);
  722 
  723                 /* XXXMAC: pass to vfs_mount_alloc? */
  724                 mp->mnt_optnew = fsdata;
  725         }
  726 
  727         /*
  728          * Set the mount level flags.
  729          */
  730         if (fsflags & MNT_RDONLY)
  731                 mp->mnt_flag |= MNT_RDONLY;
  732         mp->mnt_flag &=~ MNT_UPDATEMASK;
  733         mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS);
  734         /*
  735          * Mount the filesystem.
  736          * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
  737          * get.  No freeing of cn_pnbuf.
  738          */
  739         error = VFS_MOUNT(mp, td);
  740         if (!error) {
  741                 if (mp->mnt_opt != NULL)
  742                         vfs_freeopts(mp->mnt_opt);
  743                 mp->mnt_opt = mp->mnt_optnew;
  744                 VFS_STATFS(mp, &mp->mnt_stat, td);
  745         }
  746         /*
  747          * Prevent external consumers of mount options from reading
  748          * mnt_optnew.
  749         */
  750         mp->mnt_optnew = NULL;
  751         if (mp->mnt_flag & MNT_UPDATE) {
  752                 mp->mnt_flag &=
  753                     ~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
  754                 if (error) {
  755                         mp->mnt_flag = flag;
  756                         mp->mnt_kern_flag = kern_flag;
  757                 }
  758                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
  759                         if (mp->mnt_syncer == NULL)
  760                                 error = vfs_allocate_syncvnode(mp);
  761                 } else {
  762                         if (mp->mnt_syncer != NULL)
  763                                 vrele(mp->mnt_syncer);
  764                         mp->mnt_syncer = NULL;
  765                 }
  766                 vfs_unbusy(mp, td);
  767                 VI_LOCK(vp);
  768                 vp->v_iflag &= ~VI_MOUNT;
  769                 VI_UNLOCK(vp);
  770                 vrele(vp);
  771                 return (error);
  772         }
  773         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  774         /*
  775          * Put the new filesystem on the mount list after root.
  776          */
  777         cache_purge(vp);
  778         if (!error) {
  779                 struct vnode *newdp;
  780 
  781                 VI_LOCK(vp);
  782                 vp->v_iflag &= ~VI_MOUNT;
  783                 VI_UNLOCK(vp);
  784                 vp->v_mountedhere = mp;
  785                 mtx_lock(&mountlist_mtx);
  786                 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
  787                 mtx_unlock(&mountlist_mtx);
  788                 vfs_event_signal(NULL, VQ_MOUNT, 0);
  789                 if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp, td))
  790                         panic("mount: lost mount");
  791                 mountcheckdirs(vp, newdp);
  792                 vput(newdp);
  793                 VOP_UNLOCK(vp, 0, td);
  794                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
  795                         error = vfs_allocate_syncvnode(mp);
  796                 vfs_unbusy(mp, td);
  797                 if (error)
  798                         vrele(vp);
  799         } else {
  800                 VI_LOCK(vp);
  801                 vp->v_iflag &= ~VI_MOUNT;
  802                 VI_UNLOCK(vp);
  803                 vfs_mount_destroy(mp, td);
  804                 vput(vp);
  805         }
  806         return (error);
  807 }
  808 
  809 /*
  810  * ---------------------------------------------------------------------
  811  * Unmount a filesystem.
  812  *
  813  * Note: unmount takes a path to the vnode mounted on as argument,
  814  * not special file (as before).
  815  */
  816 #ifndef _SYS_SYSPROTO_H_
  817 struct unmount_args {
  818         char    *path;
  819         int     flags;
  820 };
  821 #endif
  822 /* ARGSUSED */
  823 int
  824 unmount(td, uap)
  825         struct thread *td;
  826         register struct unmount_args /* {
  827                 char *path;
  828                 int flags;
  829         } */ *uap;
  830 {
  831         struct mount *mp;
  832         char *pathbuf;
  833         int error, id0, id1;
  834 
  835         if (jailed(td->td_ucred))
  836                 return (EPERM);
  837         if (usermount == 0) {
  838                 if ((error = suser(td)) != 0)
  839                         return (error);
  840         }
  841 
  842         pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
  843         error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL);
  844         if (error) {
  845                 free(pathbuf, M_TEMP);
  846                 return (error);
  847         }
  848         if (uap->flags & MNT_BYFSID) {
  849                 /* Decode the filesystem ID. */
  850                 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
  851                         free(pathbuf, M_TEMP);
  852                         return (EINVAL);
  853                 }
  854 
  855                 mtx_lock(&mountlist_mtx);
  856                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
  857                         if (mp->mnt_stat.f_fsid.val[0] == id0 &&
  858                             mp->mnt_stat.f_fsid.val[1] == id1)
  859                                 break;
  860                 }
  861                 mtx_unlock(&mountlist_mtx);
  862         } else {
  863                 mtx_lock(&mountlist_mtx);
  864                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
  865                         if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0)
  866                                 break;
  867                 }
  868                 mtx_unlock(&mountlist_mtx);
  869         }
  870         free(pathbuf, M_TEMP);
  871         if (mp == NULL) {
  872                 /*
  873                  * Previously we returned ENOENT for a nonexistent path and
  874                  * EINVAL for a non-mountpoint.  We cannot tell these apart
  875                  * now, so in the !MNT_BYFSID case return the more likely
  876                  * EINVAL for compatibility.
  877                  */
  878                 return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL);
  879         }
  880 
  881         /*
  882          * Only privileged root, or (if MNT_USER is set) the user that did the
  883          * original mount is permitted to unmount this filesystem.
  884          */
  885         error = vfs_suser(mp, td);
  886         if (error)
  887                 return (error);
  888 
  889         /*
  890          * Don't allow unmounting the root filesystem.
  891          */
  892         if (mp->mnt_flag & MNT_ROOTFS)
  893                 return (EINVAL);
  894         mtx_lock(&Giant);
  895         error = dounmount(mp, uap->flags, td);
  896         mtx_unlock(&Giant);
  897         return (error);
  898 }
  899 
  900 /*
  901  * Do the actual filesystem unmount.
  902  */
  903 int
  904 dounmount(mp, flags, td)
  905         struct mount *mp;
  906         int flags;
  907         struct thread *td;
  908 {
  909         struct vnode *coveredvp, *fsrootvp;
  910         int error;
  911         int async_flag;
  912 
  913         mtx_assert(&Giant, MA_OWNED);
  914 
  915         MNT_ILOCK(mp);
  916         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
  917                 MNT_IUNLOCK(mp);
  918                 return (EBUSY);
  919         }
  920         mp->mnt_kern_flag |= MNTK_UNMOUNT;
  921         /* Allow filesystems to detect that a forced unmount is in progress. */
  922         if (flags & MNT_FORCE)
  923                 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
  924         error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
  925             ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), MNT_MTX(mp), td);
  926         if (error) {
  927                 MNT_ILOCK(mp);
  928                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
  929                 if (mp->mnt_kern_flag & MNTK_MWAIT)
  930                         wakeup(mp);
  931                 MNT_IUNLOCK(mp);
  932                 return (error);
  933         }
  934         vn_start_write(NULL, &mp, V_WAIT);
  935 
  936         if (mp->mnt_flag & MNT_EXPUBLIC)
  937                 vfs_setpublicfs(NULL, NULL, NULL);
  938 
  939         vfs_msync(mp, MNT_WAIT);
  940         async_flag = mp->mnt_flag & MNT_ASYNC;
  941         mp->mnt_flag &= ~MNT_ASYNC;
  942         cache_purgevfs(mp);     /* remove cache entries for this file sys */
  943         if (mp->mnt_syncer != NULL)
  944                 vrele(mp->mnt_syncer);
  945         /*
  946          * For forced unmounts, move process cdir/rdir refs on the fs root
  947          * vnode to the covered vnode.  For non-forced unmounts we want
  948          * such references to cause an EBUSY error.
  949          */
  950         if ((flags & MNT_FORCE) &&
  951             VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
  952                 if (mp->mnt_vnodecovered != NULL)
  953                         mountcheckdirs(fsrootvp, mp->mnt_vnodecovered);
  954                 if (fsrootvp == rootvnode) {
  955                         vrele(rootvnode);
  956                         rootvnode = NULL;
  957                 }
  958                 vput(fsrootvp);
  959         }
  960         if (((mp->mnt_flag & MNT_RDONLY) ||
  961              (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) ||
  962             (flags & MNT_FORCE)) {
  963                 error = VFS_UNMOUNT(mp, flags, td);
  964         }
  965         vn_finished_write(mp);
  966         if (error) {
  967                 /* Undo cdir/rdir and rootvnode changes made above. */
  968                 if ((flags & MNT_FORCE) &&
  969                     VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
  970                         if (mp->mnt_vnodecovered != NULL)
  971                                 mountcheckdirs(mp->mnt_vnodecovered, fsrootvp);
  972                         if (rootvnode == NULL) {
  973                                 rootvnode = fsrootvp;
  974                                 vref(rootvnode);
  975                         }
  976                         vput(fsrootvp);
  977                 }
  978                 if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
  979                         (void) vfs_allocate_syncvnode(mp);
  980                 MNT_ILOCK(mp);
  981                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
  982                 mp->mnt_flag |= async_flag;
  983                 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
  984                 if (mp->mnt_kern_flag & MNTK_MWAIT)
  985                         wakeup(mp);
  986                 MNT_IUNLOCK(mp);
  987                 return (error);
  988         }
  989         mtx_lock(&mountlist_mtx);
  990         TAILQ_REMOVE(&mountlist, mp, mnt_list);
  991         if ((coveredvp = mp->mnt_vnodecovered) != NULL)
  992                 coveredvp->v_mountedhere = NULL;
  993         mtx_unlock(&mountlist_mtx);
  994         vfs_event_signal(NULL, VQ_UNMOUNT, 0);
  995         vfs_mount_destroy(mp, td);
  996         if (coveredvp != NULL)
  997                 vrele(coveredvp);
  998         return (0);
  999 }
 1000 
 1001 /*
 1002  * ---------------------------------------------------------------------
 1003  * Mounting of root filesystem
 1004  *
 1005  */
 1006 
 1007 struct root_hold_token {
 1008         const char                      *who;
 1009         LIST_ENTRY(root_hold_token)     list;
 1010 };
 1011 
 1012 static LIST_HEAD(, root_hold_token)     root_holds =
 1013     LIST_HEAD_INITIALIZER(&root_holds);
 1014 
 1015 struct root_hold_token *
 1016 root_mount_hold(const char *identifier)
 1017 {
 1018         struct root_hold_token *h;
 1019 
 1020         h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
 1021         h->who = identifier;
 1022         mtx_lock(&mountlist_mtx);
 1023         LIST_INSERT_HEAD(&root_holds, h, list);
 1024         mtx_unlock(&mountlist_mtx);
 1025         return (h);
 1026 }
 1027 
 1028 void
 1029 root_mount_rel(struct root_hold_token *h)
 1030 {
 1031 
 1032         mtx_lock(&mountlist_mtx);
 1033         LIST_REMOVE(h, list);
 1034         wakeup(&root_holds);
 1035         mtx_unlock(&mountlist_mtx);
 1036         free(h, M_DEVBUF);
 1037 }
 1038 
 1039 static void
 1040 root_mount_wait(void)
 1041 {
 1042         struct root_hold_token *h;
 1043 
 1044         for (;;) {
 1045                 DROP_GIANT();
 1046                 g_waitidle();
 1047                 PICKUP_GIANT();
 1048                 mtx_lock(&mountlist_mtx);
 1049                 if (LIST_EMPTY(&root_holds)) {
 1050                         mtx_unlock(&mountlist_mtx);
 1051                         break;
 1052                 }
 1053                 printf("Root mount waiting for:");
 1054                 LIST_FOREACH(h, &root_holds, list)
 1055                         printf(" %s", h->who);
 1056                 printf("\n");
 1057                 msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold",
 1058                     hz);
 1059         }
 1060 }
 1061 
 1062 static void
 1063 set_rootvnode(struct thread *td)
 1064 {
 1065         struct proc *p;
 1066 
 1067         if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode, td))
 1068                 panic("Cannot find root vnode");
 1069 
 1070         p = td->td_proc;
 1071         FILEDESC_LOCK(p->p_fd);
 1072 
 1073         if (p->p_fd->fd_cdir != NULL)
 1074                 vrele(p->p_fd->fd_cdir);
 1075         p->p_fd->fd_cdir = rootvnode;
 1076         VREF(rootvnode);
 1077 
 1078         if (p->p_fd->fd_rdir != NULL)
 1079                 vrele(p->p_fd->fd_rdir);
 1080         p->p_fd->fd_rdir = rootvnode;
 1081         VREF(rootvnode);
 1082 
 1083         FILEDESC_UNLOCK(p->p_fd);
 1084 
 1085         VOP_UNLOCK(rootvnode, 0, td);
 1086 }
 1087 
 1088 /*
 1089  * Mount /devfs as our root filesystem, but do not put it on the mountlist
 1090  * yet.  Create a /dev -> / symlink so that absolute pathnames will lookup.
 1091  */
 1092 
 1093 static void
 1094 devfs_first(void)
 1095 {
 1096         struct thread *td = curthread;
 1097         struct vfsconf *vfsp;
 1098         struct mount *mp = NULL;
 1099         int error;
 1100 
 1101         vfsp = vfs_byname("devfs");
 1102         KASSERT(vfsp != NULL, ("Could not find devfs by name"));
 1103         if (vfsp == NULL) 
 1104                 return;
 1105 
 1106         error = vfs_mount_alloc(NULLVP, vfsp, "/dev", td, &mp);
 1107         KASSERT(error == 0, ("vfs_mount_alloc failed %d", error));
 1108         if (error)
 1109                 return;
 1110 
 1111         error = VFS_MOUNT(mp, curthread);
 1112         KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
 1113         if (error)
 1114                 return;
 1115 
 1116         mtx_lock(&mountlist_mtx);
 1117         TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
 1118         mtx_unlock(&mountlist_mtx);
 1119 
 1120         set_rootvnode(td);
 1121 
 1122         error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);
 1123         if (error)
 1124                 printf("kern_symlink /dev -> / returns %d\n", error);
 1125 }
 1126 
 1127 /*
 1128  * Surgically move our devfs to be mounted on /dev.
 1129  */
 1130 
 1131 static void
 1132 devfs_fixup(struct thread *td)
 1133 {
 1134         struct nameidata nd;
 1135         int error;
 1136         struct vnode *vp, *dvp;
 1137         struct mount *mp;
 1138 
 1139         /* Remove our devfs mount from the mountlist and purge the cache */
 1140         mtx_lock(&mountlist_mtx);
 1141         mp = TAILQ_FIRST(&mountlist);
 1142         TAILQ_REMOVE(&mountlist, mp, mnt_list);
 1143         mtx_unlock(&mountlist_mtx);
 1144         cache_purgevfs(mp);
 1145 
 1146         VFS_ROOT(mp, LK_EXCLUSIVE, &dvp, td);
 1147         VI_LOCK(dvp);
 1148         dvp->v_iflag &= ~VI_MOUNT;
 1149         dvp->v_mountedhere = NULL;
 1150         VI_UNLOCK(dvp);
 1151 
 1152         /* Set up the real rootvnode, and purge the cache */
 1153         TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL;
 1154         set_rootvnode(td);
 1155         cache_purgevfs(rootvnode->v_mount);
 1156 
 1157         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
 1158         error = namei(&nd);
 1159         if (error) {
 1160                 printf("Lookup of /dev for devfs, error: %d\n", error);
 1161                 return;
 1162         }
 1163         NDFREE(&nd, NDF_ONLY_PNBUF);
 1164         vp = nd.ni_vp;
 1165         if (vp->v_type != VDIR) {
 1166                 vput(vp);
 1167         }
 1168         error = vinvalbuf(vp, V_SAVE, td, 0, 0);
 1169         if (error) {
 1170                 vput(vp);
 1171         }
 1172         cache_purge(vp);
 1173         mp->mnt_vnodecovered = vp;
 1174         vp->v_mountedhere = mp;
 1175         mtx_lock(&mountlist_mtx);
 1176         TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 1177         mtx_unlock(&mountlist_mtx);
 1178         VOP_UNLOCK(vp, 0, td);
 1179         vfs_unbusy(mp, td);
 1180         vput(dvp);
 1181 
 1182         /* Unlink the no longer needed /dev/dev -> / symlink */
 1183         kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
 1184 }
 1185 
 1186 /*
 1187  * Find and mount the root filesystem
 1188  */
 1189 void
 1190 vfs_mountroot(void)
 1191 {
 1192         char *cp;
 1193         int error, i, asked = 0;
 1194 
 1195         root_mount_wait();
 1196 
 1197         devfs_first();
 1198 
 1199         /*
 1200          * We are booted with instructions to prompt for the root filesystem.
 1201          */
 1202         if (boothowto & RB_ASKNAME) {
 1203                 if (!vfs_mountroot_ask())
 1204                         return;
 1205                 asked = 1;
 1206         }
 1207 
 1208         /*
 1209          * The root filesystem information is compiled in, and we are
 1210          * booted with instructions to use it.
 1211          */
 1212         if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) {
 1213                 if (!vfs_mountroot_try(ctrootdevname))
 1214                         return;
 1215                 ctrootdevname = NULL;
 1216         }
 1217 
 1218         /*
 1219          * We've been given the generic "use CDROM as root" flag.  This is
 1220          * necessary because one media may be used in many different
 1221          * devices, so we need to search for them.
 1222          */
 1223         if (boothowto & RB_CDROM) {
 1224                 for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
 1225                         if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
 1226                                 return;
 1227                 }
 1228         }
 1229 
 1230         /*
 1231          * Try to use the value read by the loader from /etc/fstab, or
 1232          * supplied via some other means.  This is the preferred
 1233          * mechanism.
 1234          */
 1235         cp = getenv("vfs.root.mountfrom");
 1236         if (cp != NULL) {
 1237                 error = vfs_mountroot_try(cp);
 1238                 freeenv(cp);
 1239                 if (!error)
 1240                         return;
 1241         }
 1242 
 1243         /*
 1244          * Try values that may have been computed by code during boot
 1245          */
 1246         if (!vfs_mountroot_try(rootdevnames[0]))
 1247                 return;
 1248         if (!vfs_mountroot_try(rootdevnames[1]))
 1249                 return;
 1250 
 1251         /*
 1252          * If we (still) have a compiled-in default, try it.
 1253          */
 1254         if (ctrootdevname != NULL)
 1255                 if (!vfs_mountroot_try(ctrootdevname))
 1256                         return;
 1257         /*
 1258          * Everything so far has failed, prompt on the console if we haven't
 1259          * already tried that.
 1260          */
 1261         if (!asked)
 1262                 if (!vfs_mountroot_ask())
 1263                         return;
 1264 
 1265         panic("Root mount failed, startup aborted.");
 1266 }
 1267 
 1268 /*
 1269  * Mount (mountfrom) as the root filesystem.
 1270  */
 1271 static int
 1272 vfs_mountroot_try(const char *mountfrom)
 1273 {
 1274         struct mount    *mp;
 1275         char            *vfsname, *path;
 1276         int             error;
 1277         char            patt[32];
 1278 
 1279         vfsname = NULL;
 1280         path    = NULL;
 1281         mp      = NULL;
 1282         error   = EINVAL;
 1283 
 1284         if (mountfrom == NULL)
 1285                 return (error);         /* don't complain */
 1286         printf("Trying to mount root from %s\n", mountfrom);
 1287 
 1288         /* parse vfs name and path */
 1289         vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
 1290         path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
 1291         vfsname[0] = path[0] = 0;
 1292         sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
 1293         if (sscanf(mountfrom, patt, vfsname, path) < 1)
 1294                 goto out;
 1295 
 1296         if (path[0] == '\0')
 1297                 strcpy(path, ROOTNAME);
 1298 
 1299         error = kernel_vmount(
 1300             MNT_RDONLY | MNT_ROOTFS,
 1301             "fstype", vfsname,
 1302             "fspath", "/",
 1303             "from", path,
 1304             NULL);
 1305         if (error == 0) {
 1306                 /*
 1307                  * We mount devfs prior to mounting the / FS, so the first
 1308                  * entry will typically be devfs.
 1309                  */
 1310                 mp = TAILQ_FIRST(&mountlist);
 1311                 KASSERT(mp != NULL, ("%s: mountlist is empty", __func__));
 1312 
 1313                 /*
 1314                  * Iterate over all currently mounted file systems and use
 1315                  * the time stamp found to check and/or initialize the RTC.
 1316                  * Typically devfs has no time stamp and the only other FS
 1317                  * is the actual / FS.
 1318                  */
 1319                 do {
 1320                         if (mp->mnt_time != 0)
 1321                                 inittodr(mp->mnt_time);
 1322                         mp = TAILQ_NEXT(mp, mnt_list);
 1323                 } while (mp != NULL);
 1324 
 1325                 devfs_fixup(curthread);
 1326         }
 1327 out:
 1328         free(path, M_MOUNT);
 1329         free(vfsname, M_MOUNT);
 1330         return (error);
 1331 }
 1332 
 1333 /*
 1334  * ---------------------------------------------------------------------
 1335  * Interactive root filesystem selection code.
 1336  */
 1337 
 1338 static int
 1339 vfs_mountroot_ask(void)
 1340 {
 1341         char name[128];
 1342 
 1343         for(;;) {
 1344                 printf("\nManual root filesystem specification:\n");
 1345                 printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
 1346 #if defined(__i386__) || defined(__ia64__)
 1347                 printf("                       eg. ufs:da0s1a\n");
 1348 #else
 1349                 printf("                       eg. ufs:/dev/da0a\n");
 1350 #endif
 1351                 printf("  ?                  List valid disk boot devices\n");
 1352                 printf("  <empty line>       Abort manual input\n");
 1353                 printf("\nmountroot> ");
 1354                 gets(name, sizeof(name), 1);
 1355                 if (name[0] == '\0')
 1356                         return (1);
 1357                 if (name[0] == '?') {
 1358                         printf("\nList of GEOM managed disk devices:\n  ");
 1359                         g_dev_print();
 1360                         continue;
 1361                 }
 1362                 if (!vfs_mountroot_try(name))
 1363                         return (0);
 1364         }
 1365 }
 1366 
 1367 /*
 1368  * ---------------------------------------------------------------------
 1369  * Functions for querying mount options/arguments from filesystems.
 1370  */
 1371 
 1372 /*
 1373  * Check that no unknown options are given
 1374  */
 1375 int
 1376 vfs_filteropt(struct vfsoptlist *opts, const char **legal)
 1377 {
 1378         struct vfsopt *opt;
 1379         const char **t, *p;
 1380         
 1381 
 1382         TAILQ_FOREACH(opt, opts, link) {
 1383                 p = opt->name;
 1384                 if (p[0] == 'n' && p[1] == 'o')
 1385                         p += 2;
 1386                 for(t = global_opts; *t != NULL; t++)
 1387                         if (!strcmp(*t, p))
 1388                                 break;
 1389                 if (*t != NULL)
 1390                         continue;
 1391                 for(t = legal; *t != NULL; t++)
 1392                         if (!strcmp(*t, p))
 1393                                 break;
 1394                 if (*t != NULL)
 1395                         continue;
 1396                 printf("mount option <%s> is unknown\n", p);
 1397                 return (EINVAL);
 1398         }
 1399         return (0);
 1400 }
 1401 
 1402 /*
 1403  * Get a mount option by its name.
 1404  *
 1405  * Return 0 if the option was found, ENOENT otherwise.
 1406  * If len is non-NULL it will be filled with the length
 1407  * of the option. If buf is non-NULL, it will be filled
 1408  * with the address of the option.
 1409  */
 1410 int
 1411 vfs_getopt(opts, name, buf, len)
 1412         struct vfsoptlist *opts;
 1413         const char *name;
 1414         void **buf;
 1415         int *len;
 1416 {
 1417         struct vfsopt *opt;
 1418 
 1419         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 1420 
 1421         TAILQ_FOREACH(opt, opts, link) {
 1422                 if (strcmp(name, opt->name) == 0) {
 1423                         if (len != NULL)
 1424                                 *len = opt->len;
 1425                         if (buf != NULL)
 1426                                 *buf = opt->value;
 1427                         return (0);
 1428                 }
 1429         }
 1430         return (ENOENT);
 1431 }
 1432 
 1433 char *
 1434 vfs_getopts(struct vfsoptlist *opts, const char *name, int *error)
 1435 {
 1436         struct vfsopt *opt;
 1437 
 1438         *error = 0;
 1439         TAILQ_FOREACH(opt, opts, link) {
 1440                 if (strcmp(name, opt->name) != 0)
 1441                         continue;
 1442                 if (((char *)opt->value)[opt->len - 1] != '\0') {
 1443                         *error = EINVAL;
 1444                         return (NULL);
 1445                 }
 1446                 return (opt->value);
 1447         }
 1448         return (NULL);
 1449 }
 1450 
 1451 int
 1452 vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val)
 1453 {
 1454         struct vfsopt *opt;
 1455 
 1456         TAILQ_FOREACH(opt, opts, link) {
 1457                 if (strcmp(name, opt->name) == 0) {
 1458                         if (w != NULL)
 1459                                 *w |= val;
 1460                         return (1);
 1461                 }
 1462         }
 1463         if (w != NULL)
 1464                 *w &= ~val;
 1465         return (0);
 1466 }
 1467 
 1468 int
 1469 vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...)
 1470 {
 1471         va_list ap;
 1472         struct vfsopt *opt;
 1473         int ret;
 1474 
 1475         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 1476 
 1477         TAILQ_FOREACH(opt, opts, link) {
 1478                 if (strcmp(name, opt->name) != 0)
 1479                         continue;
 1480                 if (((char *)opt->value)[opt->len - 1] != '\0')
 1481                         return (0);
 1482                 va_start(ap, fmt);
 1483                 ret = vsscanf(opt->value, fmt, ap);
 1484                 va_end(ap);
 1485                 return (ret);
 1486         }
 1487         return (0);
 1488 }
 1489 
 1490 /*
 1491  * Find and copy a mount option.
 1492  *
 1493  * The size of the buffer has to be specified
 1494  * in len, if it is not the same length as the
 1495  * mount option, EINVAL is returned.
 1496  * Returns ENOENT if the option is not found.
 1497  */
 1498 int
 1499 vfs_copyopt(opts, name, dest, len)
 1500         struct vfsoptlist *opts;
 1501         const char *name;
 1502         void *dest;
 1503         int len;
 1504 {
 1505         struct vfsopt *opt;
 1506 
 1507         KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
 1508 
 1509         TAILQ_FOREACH(opt, opts, link) {
 1510                 if (strcmp(name, opt->name) == 0) {
 1511                         if (len != opt->len)
 1512                                 return (EINVAL);
 1513                         bcopy(opt->value, dest, opt->len);
 1514                         return (0);
 1515                 }
 1516         }
 1517         return (ENOENT);
 1518 }
 1519 
 1520 /*
 1521  * This is a helper function for filesystems to traverse their
 1522  * vnodes.  See MNT_VNODE_FOREACH() in sys/mount.h
 1523  */
 1524 
 1525 struct vnode *
 1526 __mnt_vnode_next(struct vnode **nvp, struct mount *mp)
 1527 {
 1528         struct vnode *vp;
 1529 
 1530         mtx_assert(&mp->mnt_mtx, MA_OWNED);
 1531 
 1532         vp = *nvp;
 1533         /* Check if we are done */
 1534         if (vp == NULL)
 1535                 return (NULL);
 1536         /* If our next vnode is no longer ours, start over */
 1537         if (vp->v_mount != mp) 
 1538                 vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
 1539         /* Save pointer to next vnode in list */
 1540         if (vp != NULL)
 1541                 *nvp = TAILQ_NEXT(vp, v_nmntvnodes);
 1542         else
 1543                 *nvp = NULL;
 1544         return (vp);
 1545 }
 1546 
 1547 int
 1548 __vfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
 1549 {
 1550         int error;
 1551 
 1552         error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat, td);
 1553         if (sbp != &mp->mnt_stat)
 1554                 *sbp = mp->mnt_stat;
 1555         return (error);
 1556 }
 1557 
 1558 void
 1559 vfs_mountedfrom(struct mount *mp, const char *from)
 1560 {
 1561 
 1562         bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname);
 1563         strlcpy(mp->mnt_stat.f_mntfromname, from,
 1564             sizeof mp->mnt_stat.f_mntfromname);
 1565 }
 1566 
 1567 /*
 1568  * ---------------------------------------------------------------------
 1569  * This is the api for building mount args and mounting filesystems from
 1570  * inside the kernel.
 1571  *
 1572  * The API works by accumulation of individual args.  First error is
 1573  * latched.
 1574  *
 1575  * XXX: should be documented in new manpage kernel_mount(9)
 1576  */
 1577 
 1578 /* A memory allocation which must be freed when we are done */
 1579 struct mntaarg {
 1580         SLIST_ENTRY(mntaarg)    next;
 1581 };
 1582 
 1583 /* The header for the mount arguments */
 1584 struct mntarg {
 1585         struct iovec *v;
 1586         int len;
 1587         int error;
 1588         SLIST_HEAD(, mntaarg)   list;
 1589 };
 1590 
 1591 /*
 1592  * Add a boolean argument.
 1593  *
 1594  * flag is the boolean value.
 1595  * name must start with "no".
 1596  */
 1597 struct mntarg *
 1598 mount_argb(struct mntarg *ma, int flag, const char *name)
 1599 {
 1600 
 1601         KASSERT(name[0] == 'n' && name[1] == 'o',
 1602             ("mount_argb(...,%s): name must start with 'no'", name));
 1603 
 1604         return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0));
 1605 }
 1606 
 1607 /*
 1608  * Add an argument printf style
 1609  */
 1610 struct mntarg *
 1611 mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...)
 1612 {
 1613         va_list ap;
 1614         struct mntaarg *maa;
 1615         struct sbuf *sb;
 1616         int len;
 1617 
 1618         if (ma == NULL) {
 1619                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 1620                 SLIST_INIT(&ma->list);
 1621         }
 1622         if (ma->error)
 1623                 return (ma);
 1624 
 1625         ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 1626             M_MOUNT, M_WAITOK);
 1627         ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 1628         ma->v[ma->len].iov_len = strlen(name) + 1;
 1629         ma->len++;
 1630 
 1631         sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
 1632         va_start(ap, fmt);
 1633         sbuf_vprintf(sb, fmt, ap);
 1634         va_end(ap);
 1635         sbuf_finish(sb);
 1636         len = sbuf_len(sb) + 1;
 1637         maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 1638         SLIST_INSERT_HEAD(&ma->list, maa, next);
 1639         bcopy(sbuf_data(sb), maa + 1, len);
 1640         sbuf_delete(sb);
 1641 
 1642         ma->v[ma->len].iov_base = maa + 1;
 1643         ma->v[ma->len].iov_len = len;
 1644         ma->len++;
 1645 
 1646         return (ma);
 1647 }
 1648 
 1649 /*
 1650  * Add an argument which is a userland string.
 1651  */
 1652 struct mntarg *
 1653 mount_argsu(struct mntarg *ma, const char *name, const void *val, int len)
 1654 {
 1655         struct mntaarg *maa;
 1656         char *tbuf;
 1657 
 1658         if (val == NULL)
 1659                 return (ma);
 1660         if (ma == NULL) {
 1661                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 1662                 SLIST_INIT(&ma->list);
 1663         }
 1664         if (ma->error)
 1665                 return (ma);
 1666         maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 1667         SLIST_INSERT_HEAD(&ma->list, maa, next);
 1668         tbuf = (void *)(maa + 1);
 1669         ma->error = copyinstr(val, tbuf, len, NULL);
 1670         return (mount_arg(ma, name, tbuf, -1));
 1671 }
 1672 
 1673 /*
 1674  * Plain argument.
 1675  *
 1676  * If length is -1, use printf.
 1677  */
 1678 struct mntarg *
 1679 mount_arg(struct mntarg *ma, const char *name, const void *val, int len)
 1680 {
 1681 
 1682         if (ma == NULL) {
 1683                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 1684                 SLIST_INIT(&ma->list);
 1685         }
 1686         if (ma->error)
 1687                 return (ma);
 1688 
 1689         ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 1690             M_MOUNT, M_WAITOK);
 1691         ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 1692         ma->v[ma->len].iov_len = strlen(name) + 1;
 1693         ma->len++;
 1694 
 1695         ma->v[ma->len].iov_base = (void *)(uintptr_t)val;
 1696         if (len < 0)
 1697                 ma->v[ma->len].iov_len = strlen(val) + 1;
 1698         else
 1699                 ma->v[ma->len].iov_len = len;
 1700         ma->len++;
 1701         return (ma);
 1702 }
 1703 
 1704 /*
 1705  * Free a mntarg structure
 1706  */
 1707 static void
 1708 free_mntarg(struct mntarg *ma)
 1709 {
 1710         struct mntaarg *maa;
 1711 
 1712         while (!SLIST_EMPTY(&ma->list)) {
 1713                 maa = SLIST_FIRST(&ma->list);
 1714                 SLIST_REMOVE_HEAD(&ma->list, next);
 1715                 free(maa, M_MOUNT);
 1716         }
 1717         free(ma->v, M_MOUNT);
 1718         free(ma, M_MOUNT);
 1719 }
 1720 
 1721 /*
 1722  * Mount a filesystem
 1723  */
 1724 int
 1725 kernel_mount(struct mntarg *ma, int flags)
 1726 {
 1727         struct uio auio;
 1728         int error;
 1729 
 1730         KASSERT(ma != NULL, ("kernel_mount NULL ma"));
 1731         KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v"));
 1732         KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len));
 1733 
 1734         auio.uio_iov = ma->v;
 1735         auio.uio_iovcnt = ma->len;
 1736         auio.uio_segflg = UIO_SYSSPACE;
 1737 
 1738         error = ma->error;
 1739         if (!error)
 1740                 error = vfs_donmount(curthread, flags, &auio);
 1741         free_mntarg(ma);
 1742         return (error);
 1743 }
 1744 
 1745 /*
 1746  * A printflike function to mount a filesystem.
 1747  */
 1748 int
 1749 kernel_vmount(int flags, ...)
 1750 {
 1751         struct mntarg *ma = NULL;
 1752         va_list ap;
 1753         const char *cp;
 1754         const void *vp;
 1755         int error;
 1756 
 1757         va_start(ap, flags);
 1758         for (;;) {
 1759                 cp = va_arg(ap, const char *);
 1760                 if (cp == NULL)
 1761                         break;
 1762                 vp = va_arg(ap, const void *);
 1763                 ma = mount_arg(ma, cp, vp, -1);
 1764         }
 1765         va_end(ap);
 1766 
 1767         error = kernel_mount(ma, flags);
 1768         return (error);
 1769 }

Cache object: b9dbadb1cf5537e06552bc7ca5f68b9b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.