vfs_mount.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1999-2004 Poul-Henning Kamp
    3  * Copyright (c) 1999 Michael Smith
    4  * Copyright (c) 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD$");
   39 
   40 #include <sys/param.h>
   41 #include <sys/conf.h>
   42 #include <sys/clock.h>
   43 #include <sys/jail.h>
   44 #include <sys/kernel.h>
   45 #include <sys/libkern.h>
   46 #include <sys/malloc.h>
   47 #include <sys/mount.h>
   48 #include <sys/mutex.h>
   49 #include <sys/namei.h>
   50 #include <sys/priv.h>
   51 #include <sys/proc.h>
   52 #include <sys/filedesc.h>
   53 #include <sys/reboot.h>
   54 #include <sys/syscallsubr.h>
   55 #include <sys/sysproto.h>
   56 #include <sys/sx.h>
   57 #include <sys/sysctl.h>
   58 #include <sys/sysent.h>
   59 #include <sys/systm.h>
   60 #include <sys/vnode.h>
   61 #include <vm/uma.h>
   62 
   63 #include <geom/geom.h>
   64 
   65 #include <machine/stdarg.h>
   66 
   67 #include <security/audit/audit.h>
   68 #include <security/mac/mac_framework.h>
   69 
   70 #include "opt_rootdevname.h"
   71 #include "opt_ddb.h"
   72 #include "opt_mac.h"
   73 
   74 #ifdef DDB
   75 #include <ddb/ddb.h>
   76 #endif
   77 
   78 #define ROOTNAME                "root_device"
   79 #define VFS_MOUNTARG_SIZE_MAX   (1024 * 64)
   80 
   81 static int      vfs_domount(struct thread *td, const char *fstype,
   82                     char *fspath, int fsflags, void *fsdata);
   83 static int      vfs_mountroot_ask(void);
   84 static int      vfs_mountroot_try(const char *mountfrom);
   85 static void     free_mntarg(struct mntarg *ma);
   86 static int      vfs_getopt_pos(struct vfsoptlist *opts, const char *name);
   87 
   88 static int      usermount = 0;
   89 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
   90     "Unprivileged users may mount and unmount file systems");
   91 
   92 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
   93 MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker");
   94 static uma_zone_t mount_zone;
   95 
   96 /* List of mounted filesystems. */
   97 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
   98 
   99 /* For any iteration/modification of mountlist */
  100 struct mtx mountlist_mtx;
  101 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF);
  102 
  103 /*
  104  * The vnode of the system's root (/ in the filesystem, without chroot
  105  * active.)
  106  */
  107 struct vnode    *rootvnode;
  108 
  109 /*
  110  * The root filesystem is detailed in the kernel environment variable
  111  * vfs.root.mountfrom, which is expected to be in the general format
  112  *
  113  * <vfsname>:[<path>]
  114  * vfsname   := the name of a VFS known to the kernel and capable
  115  *              of being mounted as root
  116  * path      := disk device name or other data used by the filesystem
  117  *              to locate its physical store
  118  */
  119 
  120 /*
  121  * Global opts, taken by all filesystems
  122  */
  123 static const char *global_opts[] = {
  124         "errmsg",
  125         "fstype",
  126         "fspath",
  127         "ro",
  128         "rw",
  129         "nosuid",
  130         "noexec",
  131         NULL
  132 };
  133 
  134 /*
  135  * The root specifiers we will try if RB_CDROM is specified.
  136  */
  137 static char *cdrom_rootdevnames[] = {
  138         "cd9660:cd0",
  139         "cd9660:acd0",
  140         NULL
  141 };
  142 
  143 /* legacy find-root code */
  144 char            *rootdevnames[2] = {NULL, NULL};
  145 #ifndef ROOTDEVNAME
  146 #  define ROOTDEVNAME NULL
  147 #endif
  148 static const char       *ctrootdevname = ROOTDEVNAME;
  149 
  150 /*
  151  * ---------------------------------------------------------------------
  152  * Functions for building and sanitizing the mount options
  153  */
  154 
  155 /* Remove one mount option. */
  156 static void
  157 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
  158 {
  159 
  160         TAILQ_REMOVE(opts, opt, link);
  161         free(opt->name, M_MOUNT);
  162         if (opt->value != NULL)
  163                 free(opt->value, M_MOUNT);
  164 #ifdef INVARIANTS
  165         else if (opt->len != 0)
  166                 panic("%s: mount option with NULL value but length != 0",
  167                     __func__);
  168 #endif
  169         free(opt, M_MOUNT);
  170 }
  171 
  172 /* Release all resources related to the mount options. */
  173 void
  174 vfs_freeopts(struct vfsoptlist *opts)
  175 {
  176         struct vfsopt *opt;
  177 
  178         while (!TAILQ_EMPTY(opts)) {
  179                 opt = TAILQ_FIRST(opts);
  180                 vfs_freeopt(opts, opt);
  181         }
  182         free(opts, M_MOUNT);
  183 }
  184 
  185 void
  186 vfs_deleteopt(struct vfsoptlist *opts, const char *name)
  187 {
  188         struct vfsopt *opt, *temp;
  189 
  190         if (opts == NULL)
  191                 return;
  192         TAILQ_FOREACH_SAFE(opt, opts, link, temp)  {
  193                 if (strcmp(opt->name, name) == 0)
  194                         vfs_freeopt(opts, opt);
  195         }
  196 }
  197 
  198 static int
  199 vfs_isopt_ro(const char *opt)
  200 {
  201 
  202         if (strcmp(opt, "ro") == 0 || strcmp(opt, "rdonly") == 0 ||
  203             strcmp(opt, "norw") == 0)
  204                 return (1);
  205         return (0);
  206 }
  207 
  208 static int
  209 vfs_isopt_rw(const char *opt)
  210 {
  211 
  212         if (strcmp(opt, "rw") == 0 || strcmp(opt, "noro") == 0)
  213                 return (1);
  214         return (0);
  215 }
  216 
  217 /*
  218  * Check if options are equal (with or without the "no" prefix).
  219  */
  220 static int
  221 vfs_equalopts(const char *opt1, const char *opt2)
  222 {
  223 
  224         /* "opt" vs. "opt" or "noopt" vs. "noopt" */
  225         if (strcmp(opt1, opt2) == 0)
  226                 return (1);
  227         /* "noopt" vs. "opt" */
  228         if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
  229                 return (1);
  230         /* "opt" vs. "noopt" */
  231         if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
  232                 return (1);
  233         /* "ro" / "rdonly" / "norw" / "rw" / "noro" */
  234         if ((vfs_isopt_ro(opt1) || vfs_isopt_rw(opt1)) &&
  235             (vfs_isopt_ro(opt2) || vfs_isopt_rw(opt2)))
  236                 return (1);
  237         return (0);
  238 }
  239 
  240 /*
  241  * If a mount option is specified several times,
  242  * (with or without the "no" prefix) only keep
  243  * the last occurence of it.
  244  */
  245 static void
  246 vfs_sanitizeopts(struct vfsoptlist *opts)
  247 {
  248         struct vfsopt *opt, *opt2, *tmp;
  249 
  250         TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
  251                 opt2 = TAILQ_PREV(opt, vfsoptlist, link);
  252                 while (opt2 != NULL) {
  253                         if (vfs_equalopts(opt->name, opt2->name)) {
  254                                 tmp = TAILQ_PREV(opt2, vfsoptlist, link);
  255                                 vfs_freeopt(opts, opt2);
  256                                 opt2 = tmp;
  257                         } else {
  258                                 opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
  259                         }
  260                 }
  261         }
  262 }
  263 
  264 /*
  265  * Build a linked list of mount options from a struct uio.
  266  */
  267 static int
  268 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
  269 {
  270         struct vfsoptlist *opts;
  271         struct vfsopt *opt;
  272         size_t memused;
  273         unsigned int i, iovcnt;
  274         int error, namelen, optlen;
  275 
  276         opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
  277         TAILQ_INIT(opts);
  278         memused = 0;
  279         iovcnt = auio->uio_iovcnt;
  280         for (i = 0; i < iovcnt; i += 2) {
  281                 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  282                 namelen = auio->uio_iov[i].iov_len;
  283                 optlen = auio->uio_iov[i + 1].iov_len;
  284                 opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
  285                 opt->value = NULL;
  286                 opt->len = 0;
  287 
  288                 /*
  289                  * Do this early, so jumps to "bad" will free the current
  290                  * option.
  291                  */
  292                 TAILQ_INSERT_TAIL(opts, opt, link);
  293                 memused += sizeof(struct vfsopt) + optlen + namelen;
  294 
  295                 /*
  296                  * Avoid consuming too much memory, and attempts to overflow
  297                  * memused.
  298                  */
  299                 if (memused > VFS_MOUNTARG_SIZE_MAX ||
  300                     optlen > VFS_MOUNTARG_SIZE_MAX ||
  301                     namelen > VFS_MOUNTARG_SIZE_MAX) {
  302                         error = EINVAL;
  303                         goto bad;
  304                 }
  305 
  306                 if (auio->uio_segflg == UIO_SYSSPACE) {
  307                         bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
  308                 } else {
  309                         error = copyin(auio->uio_iov[i].iov_base, opt->name,
  310                             namelen);
  311                         if (error)
  312                                 goto bad;
  313                 }
  314                 /* Ensure names are null-terminated strings. */
  315                 if (opt->name[namelen - 1] != '\0') {
  316                         error = EINVAL;
  317                         goto bad;
  318                 }
  319                 if (optlen != 0) {
  320                         opt->len = optlen;
  321                         opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
  322                         if (auio->uio_segflg == UIO_SYSSPACE) {
  323                                 bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
  324                                     optlen);
  325                         } else {
  326                                 error = copyin(auio->uio_iov[i + 1].iov_base,
  327                                     opt->value, optlen);
  328                                 if (error)
  329                                         goto bad;
  330                         }
  331                 }
  332         }
  333         vfs_sanitizeopts(opts);
  334         *options = opts;
  335         return (0);
  336 bad:
  337         vfs_freeopts(opts);
  338         return (error);
  339 }
  340 
  341 /*
  342  * Merge the old mount options with the new ones passed
  343  * in the MNT_UPDATE case.
  344  */
  345 static void
  346 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
  347 {
  348         struct vfsopt *opt, *opt2, *new;
  349 
  350         TAILQ_FOREACH(opt, opts, link) {
  351                 /*
  352                  * Check that this option hasn't been redefined
  353                  * nor cancelled with a "no" mount option.
  354                  */
  355                 opt2 = TAILQ_FIRST(toopts);
  356                 while (opt2 != NULL) {
  357                         if (strcmp(opt2->name, opt->name) == 0)
  358                                 goto next;
  359                         if (strncmp(opt2->name, "no", 2) == 0 &&
  360                             strcmp(opt2->name + 2, opt->name) == 0) {
  361                                 vfs_freeopt(toopts, opt2);
  362                                 goto next;
  363                         }
  364                         opt2 = TAILQ_NEXT(opt2, link);
  365                 }
  366                 /* We want this option, duplicate it. */
  367                 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  368                 new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
  369                 strcpy(new->name, opt->name);
  370                 if (opt->len != 0) {
  371                         new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
  372                         bcopy(opt->value, new->value, opt->len);
  373                 } else {
  374                         new->value = NULL;
  375                 }
  376                 new->len = opt->len;
  377                 TAILQ_INSERT_TAIL(toopts, new, link);
  378 next:
  379                 continue;
  380         }
  381 }
  382 
  383 /*
  384  * Mount a filesystem.
  385  */
  386 int
  387 nmount(td, uap)
  388         struct thread *td;
  389         struct nmount_args /* {
  390                 struct iovec *iovp;
  391                 unsigned int iovcnt;
  392                 int flags;
  393         } */ *uap;
  394 {
  395         struct uio *auio;
  396         struct iovec *iov;
  397         unsigned int i;
  398         int error;
  399         u_int iovcnt;
  400 
  401         AUDIT_ARG(fflags, uap->flags);
  402 
  403         /*
  404          * Filter out MNT_ROOTFS.  We do not want clients of nmount() in
  405          * userspace to set this flag, but we must filter it out if we want
  406          * MNT_UPDATE on the root file system to work.
  407          * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
  408          */
  409         uap->flags &= ~MNT_ROOTFS;
  410 
  411         iovcnt = uap->iovcnt;
  412         /*
  413          * Check that we have an even number of iovec's
  414          * and that we have at least two options.
  415          */
  416         if ((iovcnt & 1) || (iovcnt < 4))
  417                 return (EINVAL);
  418 
  419         error = copyinuio(uap->iovp, iovcnt, &auio);
  420         if (error)
  421                 return (error);
  422         iov = auio->uio_iov;
  423         for (i = 0; i < iovcnt; i++) {
  424                 if (iov->iov_len > MMAXOPTIONLEN) {
  425                         free(auio, M_IOV);
  426                         return (EINVAL);
  427                 }
  428                 iov++;
  429         }
  430         error = vfs_donmount(td, uap->flags, auio);
  431 
  432         free(auio, M_IOV);
  433         return (error);
  434 }
  435 
  436 /*
  437  * ---------------------------------------------------------------------
  438  * Various utility functions
  439  */
  440 
  441 void
  442 vfs_ref(struct mount *mp)
  443 {
  444 
  445         MNT_ILOCK(mp);
  446         MNT_REF(mp);
  447         MNT_IUNLOCK(mp);
  448 }
  449 
  450 void
  451 vfs_rel(struct mount *mp)
  452 {
  453 
  454         MNT_ILOCK(mp);
  455         MNT_REL(mp);
  456         MNT_IUNLOCK(mp);
  457 }
  458 
  459 static int
  460 mount_init(void *mem, int size, int flags)
  461 {
  462         struct mount *mp;
  463 
  464         mp = (struct mount *)mem;
  465         mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
  466         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
  467         lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
  468         return (0);
  469 }
  470 
  471 static void
  472 mount_fini(void *mem, int size)
  473 {
  474         struct mount *mp;
  475 
  476         mp = (struct mount *)mem;
  477         lockdestroy(&mp->mnt_explock);
  478         lockdestroy(&mp->mnt_lock);
  479         mtx_destroy(&mp->mnt_mtx);
  480 }
  481 
  482 /*
  483  * Allocate and initialize the mount point struct.
  484  */
  485 struct mount *
  486 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp,
  487     const char *fspath, struct thread *td)
  488 {
  489         struct mount *mp;
  490 
  491         mp = uma_zalloc(mount_zone, M_WAITOK);
  492         bzero(&mp->mnt_startzero,
  493             __rangeof(struct mount, mnt_startzero, mnt_endzero));
  494         TAILQ_INIT(&mp->mnt_nvnodelist);
  495         mp->mnt_nvnodelistsize = 0;
  496         mp->mnt_ref = 0;
  497         (void) vfs_busy(mp, LK_NOWAIT, 0, td);
  498         mp->mnt_op = vfsp->vfc_vfsops;
  499         mp->mnt_vfc = vfsp;
  500         vfsp->vfc_refcount++;   /* XXX Unlocked */
  501         mp->mnt_stat.f_type = vfsp->vfc_typenum;
  502         mp->mnt_gen++;
  503         strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
  504         mp->mnt_vnodecovered = vp;
  505         mp->mnt_cred = crdup(td->td_ucred);
  506         mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
  507         strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
  508         mp->mnt_iosize_max = DFLTPHYS;
  509 #ifdef MAC
  510         mac_init_mount(mp);
  511         mac_create_mount(td->td_ucred, mp);
  512 #endif
  513         arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
  514         return (mp);
  515 }
  516 
  517 /*
  518  * Destroy the mount struct previously allocated by vfs_mount_alloc().
  519  */
  520 void
  521 vfs_mount_destroy(struct mount *mp)
  522 {
  523         int i;
  524 
  525         MNT_ILOCK(mp);
  526         mp->mnt_kern_flag |= MNTK_REFEXPIRE;
  527         if (mp->mnt_kern_flag & MNTK_MWAIT) {
  528                 mp->mnt_kern_flag &= ~MNTK_MWAIT;
  529                 wakeup(mp);
  530         }
  531         for (i = 0; mp->mnt_ref && i < 3; i++)
  532                 msleep(mp, MNT_MTX(mp), PVFS, "mntref", hz);
  533         /*
  534          * This will always cause a 3 second delay in rebooting due to
  535          * refs on the root mountpoint that never go away.  Most of these
  536          * are held by init which never exits.
  537          */
  538         if (i == 3 && (!rebooting || bootverbose))
  539                 printf("Mount point %s had %d dangling refs\n",
  540                     mp->mnt_stat.f_mntonname, mp->mnt_ref);
  541         if (mp->mnt_holdcnt != 0) {
  542                 printf("Waiting for mount point to be unheld\n");
  543                 while (mp->mnt_holdcnt != 0) {
  544                         mp->mnt_holdcntwaiters++;
  545                         msleep(&mp->mnt_holdcnt, MNT_MTX(mp),
  546                                PZERO, "mntdestroy", 0);
  547                         mp->mnt_holdcntwaiters--;
  548                 }
  549                 printf("mount point unheld\n");
  550         }
  551         if (mp->mnt_writeopcount > 0) {
  552                 printf("Waiting for mount point write ops\n");
  553                 while (mp->mnt_writeopcount > 0) {
  554                         mp->mnt_kern_flag |= MNTK_SUSPEND;
  555                         msleep(&mp->mnt_writeopcount,
  556                                MNT_MTX(mp),
  557                                PZERO, "mntdestroy2", 0);
  558                 }
  559                 printf("mount point write ops completed\n");
  560         }
  561         if (mp->mnt_secondary_writes > 0) {
  562                 printf("Waiting for mount point secondary write ops\n");
  563                 while (mp->mnt_secondary_writes > 0) {
  564                         mp->mnt_kern_flag |= MNTK_SUSPEND;
  565                         msleep(&mp->mnt_secondary_writes,
  566                                MNT_MTX(mp),
  567                                PZERO, "mntdestroy3", 0);
  568                 }
  569                 printf("mount point secondary write ops completed\n");
  570         }
  571         MNT_IUNLOCK(mp);
  572         mp->mnt_vfc->vfc_refcount--;
  573         if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
  574                 struct vnode *vp;
  575 
  576                 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes)
  577                         vprint("", vp);
  578                 panic("unmount: dangling vnode");
  579         }
  580         MNT_ILOCK(mp);
  581         if (mp->mnt_kern_flag & MNTK_MWAIT)
  582                 wakeup(mp);
  583         if (mp->mnt_writeopcount != 0)
  584                 panic("vfs_mount_destroy: nonzero writeopcount");
  585         if (mp->mnt_secondary_writes != 0)
  586                 panic("vfs_mount_destroy: nonzero secondary_writes");
  587         if (mp->mnt_nvnodelistsize != 0)
  588                 panic("vfs_mount_destroy: nonzero nvnodelistsize");
  589         mp->mnt_writeopcount = -1000;
  590         mp->mnt_nvnodelistsize = -1000;
  591         mp->mnt_secondary_writes = -1000;
  592         MNT_IUNLOCK(mp);
  593 #ifdef MAC
  594         mac_destroy_mount(mp);
  595 #endif
  596         if (mp->mnt_opt != NULL)
  597                 vfs_freeopts(mp->mnt_opt);
  598         crfree(mp->mnt_cred);
  599         uma_zfree(mount_zone, mp);
  600 }
  601 
  602 int
  603 vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions)
  604 {
  605         struct vfsoptlist *optlist;
  606         struct vfsopt *opt, *noro_opt, *tmp_opt;
  607         char *fstype, *fspath, *errmsg;
  608         int error, fstypelen, fspathlen, errmsg_len, errmsg_pos;
  609         int has_rw, has_noro;
  610 
  611         errmsg = NULL;
  612         errmsg_len = 0;
  613         errmsg_pos = -1;
  614         has_rw = 0;
  615         has_noro = 0;
  616 
  617         error = vfs_buildopts(fsoptions, &optlist);
  618         if (error)
  619                 return (error);
  620 
  621         if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0)
  622                 errmsg_pos = vfs_getopt_pos(optlist, "errmsg");
  623 
  624         /*
  625          * We need these two options before the others,
  626          * and they are mandatory for any filesystem.
  627          * Ensure they are NUL terminated as well.
  628          */
  629         fstypelen = 0;
  630         error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
  631         if (error || fstype[fstypelen - 1] != '\0') {
  632                 error = EINVAL;
  633                 if (errmsg != NULL)
  634                         strncpy(errmsg, "Invalid fstype", errmsg_len);
  635                 goto bail;
  636         }
  637         fspathlen = 0;
  638         error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
  639         if (error || fspath[fspathlen - 1] != '\0') {
  640                 error = EINVAL;
  641                 if (errmsg != NULL)
  642                         strncpy(errmsg, "Invalid fspath", errmsg_len);
  643                 goto bail;
  644         }
  645 
  646         /*
  647          * We need to see if we have the "update" option
  648          * before we call vfs_domount(), since vfs_domount() has special
  649          * logic based on MNT_UPDATE.  This is very important
  650          * when we want to update the root filesystem.
  651          */
  652         TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) {
  653                 if (strcmp(opt->name, "update") == 0) {
  654                         fsflags |= MNT_UPDATE;
  655                         vfs_freeopt(optlist, opt);
  656                 }
  657                 else if (strcmp(opt->name, "async") == 0)
  658                         fsflags |= MNT_ASYNC;
  659                 else if (strcmp(opt->name, "force") == 0) {
  660                         fsflags |= MNT_FORCE;
  661                         vfs_freeopt(optlist, opt);
  662                 }
  663                 else if (strcmp(opt->name, "reload") == 0) {
  664                         fsflags |= MNT_RELOAD;
  665                         vfs_freeopt(optlist, opt);
  666                 }
  667                 else if (strcmp(opt->name, "multilabel") == 0)
  668                         fsflags |= MNT_MULTILABEL;
  669                 else if (strcmp(opt->name, "noasync") == 0)
  670                         fsflags &= ~MNT_ASYNC;
  671                 else if (strcmp(opt->name, "noatime") == 0)
  672                         fsflags |= MNT_NOATIME;
  673                 else if (strcmp(opt->name, "atime") == 0) {
  674                         free(opt->name, M_MOUNT);
  675                         opt->name = strdup("nonoatime", M_MOUNT);
  676                 }
  677                 else if (strcmp(opt->name, "noclusterr") == 0)
  678                         fsflags |= MNT_NOCLUSTERR;
  679                 else if (strcmp(opt->name, "clusterr") == 0) {
  680                         free(opt->name, M_MOUNT);
  681                         opt->name = strdup("nonoclusterr", M_MOUNT);
  682                 }
  683                 else if (strcmp(opt->name, "noclusterw") == 0)
  684                         fsflags |= MNT_NOCLUSTERW;
  685                 else if (strcmp(opt->name, "clusterw") == 0) {
  686                         free(opt->name, M_MOUNT);
  687                         opt->name = strdup("nonoclusterw", M_MOUNT);
  688                 }
  689                 else if (strcmp(opt->name, "noexec") == 0)
  690                         fsflags |= MNT_NOEXEC;
  691                 else if (strcmp(opt->name, "exec") == 0) {
  692                         free(opt->name, M_MOUNT);
  693                         opt->name = strdup("nonoexec", M_MOUNT);
  694                 }
  695                 else if (strcmp(opt->name, "nosuid") == 0)
  696                         fsflags |= MNT_NOSUID;
  697                 else if (strcmp(opt->name, "suid") == 0) {
  698                         free(opt->name, M_MOUNT);
  699                         opt->name = strdup("nonosuid", M_MOUNT);
  700                 }
  701                 else if (strcmp(opt->name, "nosymfollow") == 0)
  702                         fsflags |= MNT_NOSYMFOLLOW;
  703                 else if (strcmp(opt->name, "symfollow") == 0) {
  704                         free(opt->name, M_MOUNT);
  705                         opt->name = strdup("nonosymfollow", M_MOUNT);
  706                 }
  707                 else if (strcmp(opt->name, "noro") == 0) {
  708                         fsflags &= ~MNT_RDONLY;
  709                         has_noro = 1;
  710                 }
  711                 else if (strcmp(opt->name, "rw") == 0) {
  712                         fsflags &= ~MNT_RDONLY;
  713                         has_rw = 1;
  714                 }
  715                 else if (strcmp(opt->name, "ro") == 0)
  716                         fsflags |= MNT_RDONLY;
  717                 else if (strcmp(opt->name, "rdonly") == 0) {
  718                         free(opt->name, M_MOUNT);
  719                         opt->name = strdup("ro", M_MOUNT);
  720                         fsflags |= MNT_RDONLY;
  721                 }
  722                 else if (strcmp(opt->name, "suiddir") == 0)
  723                         fsflags |= MNT_SUIDDIR;
  724                 else if (strcmp(opt->name, "sync") == 0)
  725                         fsflags |= MNT_SYNCHRONOUS;
  726                 else if (strcmp(opt->name, "union") == 0)
  727                         fsflags |= MNT_UNION;
  728         }
  729 
  730         /*
  731          * If "rw" was specified as a mount option, and we
  732          * are trying to update a mount-point from "ro" to "rw",
  733          * we need a mount option "noro", since in vfs_mergeopts(),
  734          * "noro" will cancel "ro", but "rw" will not do anything.
  735          */
  736         if (has_rw && !has_noro) {
  737                 noro_opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  738                 noro_opt->name = strdup("noro", M_MOUNT);
  739                 noro_opt->value = NULL;
  740                 noro_opt->len = 0;
  741                 TAILQ_INSERT_TAIL(optlist, noro_opt, link);
  742         }
  743 
  744         /*
  745          * Be ultra-paranoid about making sure the type and fspath
  746          * variables will fit in our mp buffers, including the
  747          * terminating NUL.
  748          */
  749         if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
  750                 error = ENAMETOOLONG;
  751                 goto bail;
  752         }
  753 
  754         mtx_lock(&Giant);
  755         error = vfs_domount(td, fstype, fspath, fsflags, optlist);
  756         mtx_unlock(&Giant);
  757 bail:
  758         /* copyout the errmsg */
  759         if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt)
  760             && errmsg_len > 0 && errmsg != NULL) {
  761                 if (fsoptions->uio_segflg == UIO_SYSSPACE) {
  762                         bcopy(errmsg,
  763                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
  764                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
  765                 } else {
  766                         copyout(errmsg,
  767                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
  768                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
  769                 }
  770         }
  771 
  772         if (error != 0)
  773                 vfs_freeopts(optlist);
  774         return (error);
  775 }
  776 
  777 /*
  778  * Old mount API.
  779  */
  780 #ifndef _SYS_SYSPROTO_H_
  781 struct mount_args {
  782         char    *type;
  783         char    *path;
  784         int     flags;
  785         caddr_t data;
  786 };
  787 #endif
  788 /* ARGSUSED */
  789 int
  790 mount(td, uap)
  791         struct thread *td;
  792         struct mount_args /* {
  793                 char *type;
  794                 char *path;
  795                 int flags;
  796                 caddr_t data;
  797         } */ *uap;
  798 {
  799         char *fstype;
  800         struct vfsconf *vfsp = NULL;
  801         struct mntarg *ma = NULL;
  802         int error;
  803 
  804         AUDIT_ARG(fflags, uap->flags);
  805 
  806         /*
  807          * Filter out MNT_ROOTFS.  We do not want clients of mount() in
  808          * userspace to set this flag, but we must filter it out if we want
  809          * MNT_UPDATE on the root file system to work.
  810          * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try().
  811          */
  812         uap->flags &= ~MNT_ROOTFS;
  813 
  814         fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
  815         error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
  816         if (error) {
  817                 free(fstype, M_TEMP);
  818                 return (error);
  819         }
  820 
  821         AUDIT_ARG(text, fstype);
  822         mtx_lock(&Giant);
  823         vfsp = vfs_byname_kld(fstype, td, &error);
  824         free(fstype, M_TEMP);
  825         if (vfsp == NULL) {
  826                 mtx_unlock(&Giant);
  827                 return (ENOENT);
  828         }
  829         if (vfsp->vfc_vfsops->vfs_cmount == NULL) {
  830                 mtx_unlock(&Giant);
  831                 return (EOPNOTSUPP);
  832         }
  833 
  834         ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN);
  835         ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
  836         ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro");
  837         ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid");
  838         ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec");
  839 
  840         error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td);
  841         mtx_unlock(&Giant);
  842         return (error);
  843 }
  844 
  845 
  846 /*
  847  * vfs_domount(): actually attempt a filesystem mount.
  848  */
  849 static int
  850 vfs_domount(
  851         struct thread *td,      /* Calling thread. */
  852         const char *fstype,     /* Filesystem type. */
  853         char *fspath,           /* Mount path. */
  854         int fsflags,            /* Flags common to all filesystems. */
  855         void *fsdata            /* Options local to the filesystem. */
  856         )
  857 {
  858         struct vnode *vp;
  859         struct mount *mp;
  860         struct vfsconf *vfsp;
  861         struct export_args export;
  862         int error, flag = 0;
  863         struct vattr va;
  864         struct nameidata nd;
  865 
  866         mtx_assert(&Giant, MA_OWNED);
  867         /*
  868          * Be ultra-paranoid about making sure the type and fspath
  869          * variables will fit in our mp buffers, including the
  870          * terminating NUL.
  871          */
  872         if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
  873                 return (ENAMETOOLONG);
  874 
  875         if (jailed(td->td_ucred) || usermount == 0) {
  876                 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0)
  877                         return (error);
  878         }
  879 
  880         /*
  881          * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
  882          */
  883         if (fsflags & MNT_EXPORTED) {
  884                 error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED);
  885                 if (error)
  886                         return (error);
  887         }
  888         if (fsflags & MNT_SUIDDIR) {
  889                 error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR);
  890                 if (error)
  891                         return (error);
  892         }
  893         /*
  894          * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users.
  895          */
  896         if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) {
  897                 if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0)
  898                         fsflags |= MNT_NOSUID | MNT_USER;
  899         }
  900 
  901         /* Load KLDs before we lock the covered vnode to avoid reversals. */
  902         vfsp = NULL;
  903         if ((fsflags & MNT_UPDATE) == 0) {
  904                 /* Don't try to load KLDs if we're mounting the root. */
  905                 if (fsflags & MNT_ROOTFS)
  906                         vfsp = vfs_byname(fstype);
  907                 else
  908                         vfsp = vfs_byname_kld(fstype, td, &error);
  909                 if (vfsp == NULL)
  910                         return (ENODEV);
  911                 if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL))
  912                         return (EPERM);
  913         }
  914         /*
  915          * Get vnode to be covered
  916          */
  917         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE,
  918             fspath, td);
  919         if ((error = namei(&nd)) != 0)
  920                 return (error);
  921         NDFREE(&nd, NDF_ONLY_PNBUF);
  922         vp = nd.ni_vp;
  923         if (fsflags & MNT_UPDATE) {
  924                 if ((vp->v_vflag & VV_ROOT) == 0) {
  925                         vput(vp);
  926                         return (EINVAL);
  927                 }
  928                 mp = vp->v_mount;
  929                 MNT_ILOCK(mp);
  930                 flag = mp->mnt_flag;
  931                 /*
  932                  * We only allow the filesystem to be reloaded if it
  933                  * is currently mounted read-only.
  934                  */
  935                 if ((fsflags & MNT_RELOAD) &&
  936                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
  937                         MNT_IUNLOCK(mp);
  938                         vput(vp);
  939                         return (EOPNOTSUPP);    /* Needs translation */
  940                 }
  941                 MNT_IUNLOCK(mp);
  942                 /*
  943                  * Only privileged root, or (if MNT_USER is set) the user that
  944                  * did the original mount is permitted to update it.
  945                  */
  946                 error = vfs_suser(mp, td);
  947                 if (error) {
  948                         vput(vp);
  949                         return (error);
  950                 }
  951                 if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
  952                         vput(vp);
  953                         return (EBUSY);
  954                 }
  955                 VI_LOCK(vp);
  956                 if ((vp->v_iflag & VI_MOUNT) != 0 ||
  957                     vp->v_mountedhere != NULL) {
  958                         VI_UNLOCK(vp);
  959                         vfs_unbusy(mp, td);
  960                         vput(vp);
  961                         return (EBUSY);
  962                 }
  963                 vp->v_iflag |= VI_MOUNT;
  964                 VI_UNLOCK(vp);
  965                 MNT_ILOCK(mp);
  966                 mp->mnt_flag |= fsflags &
  967                     (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS);
  968                 MNT_IUNLOCK(mp);
  969                 VOP_UNLOCK(vp, 0, td);
  970                 mp->mnt_optnew = fsdata;
  971                 vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
  972         } else {
  973                 /*
  974                  * If the user is not root, ensure that they own the directory
  975                  * onto which we are attempting to mount.
  976                  */
  977                 error = VOP_GETATTR(vp, &va, td->td_ucred, td);
  978                 if (error) {
  979                         vput(vp);
  980                         return (error);
  981                 }
  982                 if (va.va_uid != td->td_ucred->cr_uid) {
  983                         error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN,
  984                             0);
  985                         if (error) {
  986                                 vput(vp);
  987                                 return (error);
  988                         }
  989                 }
  990                 error = vinvalbuf(vp, V_SAVE, td, 0, 0);
  991                 if (error != 0) {
  992                         vput(vp);
  993                         return (error);
  994                 }
  995                 if (vp->v_type != VDIR) {
  996                         vput(vp);
  997                         return (ENOTDIR);
  998                 }
  999                 VI_LOCK(vp);
 1000                 if ((vp->v_iflag & VI_MOUNT) != 0 ||
 1001                     vp->v_mountedhere != NULL) {
 1002                         VI_UNLOCK(vp);
 1003                         vput(vp);
 1004                         return (EBUSY);
 1005                 }
 1006                 vp->v_iflag |= VI_MOUNT;
 1007                 VI_UNLOCK(vp);
 1008 
 1009                 /*
 1010                  * Allocate and initialize the filesystem.
 1011                  */
 1012                 mp = vfs_mount_alloc(vp, vfsp, fspath, td);
 1013                 VOP_UNLOCK(vp, 0, td);
 1014 
 1015                 /* XXXMAC: pass to vfs_mount_alloc? */
 1016                 mp->mnt_optnew = fsdata;
 1017         }
 1018 
 1019         /*
 1020          * Set the mount level flags.
 1021          */
 1022         MNT_ILOCK(mp);
 1023         mp->mnt_flag = (mp->mnt_flag & ~MNT_UPDATEMASK) |
 1024                 (fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS |
 1025                             MNT_RDONLY));
 1026         if ((mp->mnt_flag & MNT_ASYNC) == 0)
 1027                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1028         MNT_IUNLOCK(mp);
 1029         /*
 1030          * Mount the filesystem.
 1031          * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
 1032          * get.  No freeing of cn_pnbuf.
 1033          */
 1034         error = VFS_MOUNT(mp, td);
 1035 
 1036         /*
 1037          * Process the export option only if we are
 1038          * updating mount options.
 1039          */
 1040         if (!error && (fsflags & MNT_UPDATE)) {
 1041                 if (vfs_copyopt(mp->mnt_optnew, "export", &export,
 1042                     sizeof(export)) == 0)
 1043                         error = vfs_export(mp, &export);
 1044         }
 1045 
 1046         if (!error) {
 1047                 if (mp->mnt_opt != NULL)
 1048                         vfs_freeopts(mp->mnt_opt);
 1049                 mp->mnt_opt = mp->mnt_optnew;
 1050                 (void)VFS_STATFS(mp, &mp->mnt_stat, td);
 1051         }
 1052         /*
 1053          * Prevent external consumers of mount options from reading
 1054          * mnt_optnew.
 1055         */
 1056         mp->mnt_optnew = NULL;
 1057         if (mp->mnt_flag & MNT_UPDATE) {
 1058                 MNT_ILOCK(mp);
 1059                 if (error)
 1060                         mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) |
 1061                                 (flag & ~MNT_QUOTA);
 1062                 else
 1063                         mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD |
 1064                                           MNT_FORCE | MNT_SNAPSHOT);
 1065                 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 1066                         mp->mnt_kern_flag |= MNTK_ASYNC;
 1067                 else
 1068                         mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1069                 MNT_IUNLOCK(mp);
 1070                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
 1071                         if (mp->mnt_syncer == NULL)
 1072                                 error = vfs_allocate_syncvnode(mp);
 1073                 } else {
 1074                         if (mp->mnt_syncer != NULL)
 1075                                 vrele(mp->mnt_syncer);
 1076                         mp->mnt_syncer = NULL;
 1077                 }
 1078                 vfs_unbusy(mp, td);
 1079                 VI_LOCK(vp);
 1080                 vp->v_iflag &= ~VI_MOUNT;
 1081                 VI_UNLOCK(vp);
 1082                 vrele(vp);
 1083                 return (error);
 1084         }
 1085         MNT_ILOCK(mp);
 1086         if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 1087                 mp->mnt_kern_flag |= MNTK_ASYNC;
 1088         else
 1089                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1090         MNT_IUNLOCK(mp);
 1091         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1092         /*
 1093          * Put the new filesystem on the mount list after root.
 1094          */
 1095         cache_purge(vp);
 1096         if (!error) {
 1097                 struct vnode *newdp;
 1098 
 1099                 VI_LOCK(vp);
 1100                 vp->v_iflag &= ~VI_MOUNT;
 1101                 VI_UNLOCK(vp);
 1102                 vp->v_mountedhere = mp;
 1103                 mtx_lock(&mountlist_mtx);
 1104                 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 1105                 mtx_unlock(&mountlist_mtx);
 1106                 vfs_event_signal(NULL, VQ_MOUNT, 0);
 1107                 if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp, td))
 1108                         panic("mount: lost mount");
 1109                 VOP_UNLOCK(newdp, 0, td);
 1110                 VOP_UNLOCK(vp, 0, td);
 1111                 mountcheckdirs(vp, newdp);
 1112                 vrele(newdp);
 1113                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
 1114                         error = vfs_allocate_syncvnode(mp);
 1115                 vfs_unbusy(mp, td);
 1116                 if (error)
 1117                         vrele(vp);
 1118         } else {
 1119                 VI_LOCK(vp);
 1120                 vp->v_iflag &= ~VI_MOUNT;
 1121                 VI_UNLOCK(vp);
 1122                 vfs_unbusy(mp, td);
 1123                 vfs_mount_destroy(mp);
 1124                 vput(vp);
 1125         }
 1126         return (error);
 1127 }
 1128 
 1129 /*
 1130  * Unmount a filesystem.
 1131  *
 1132  * Note: unmount takes a path to the vnode mounted on as argument, not
 1133  * special file (as before).
 1134  */
 1135 #ifndef _SYS_SYSPROTO_H_
 1136 struct unmount_args {
 1137         char    *path;
 1138         int     flags;
 1139 };
 1140 #endif
 1141 /* ARGSUSED */
 1142 int
 1143 unmount(td, uap)
 1144         struct thread *td;
 1145         register struct unmount_args /* {
 1146                 char *path;
 1147                 int flags;
 1148         } */ *uap;
 1149 {
 1150         struct mount *mp;
 1151         char *pathbuf;
 1152         int error, id0, id1;
 1153 
 1154         if (jailed(td->td_ucred) || usermount == 0) {
 1155                 error = priv_check(td, PRIV_VFS_UNMOUNT);
 1156                 if (error)
 1157                         return (error);
 1158         }
 1159 
 1160         pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
 1161         error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL);
 1162         if (error) {
 1163                 free(pathbuf, M_TEMP);
 1164                 return (error);
 1165         }
 1166         AUDIT_ARG(upath, td, pathbuf, ARG_UPATH1);
 1167         mtx_lock(&Giant);
 1168         if (uap->flags & MNT_BYFSID) {
 1169                 /* Decode the filesystem ID. */
 1170                 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
 1171                         mtx_unlock(&Giant);
 1172                         free(pathbuf, M_TEMP);
 1173                         return (EINVAL);
 1174                 }
 1175 
 1176                 mtx_lock(&mountlist_mtx);
 1177                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 1178                         if (mp->mnt_stat.f_fsid.val[0] == id0 &&
 1179                             mp->mnt_stat.f_fsid.val[1] == id1)
 1180                                 break;
 1181                 }
 1182                 mtx_unlock(&mountlist_mtx);
 1183         } else {
 1184                 mtx_lock(&mountlist_mtx);
 1185                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 1186                         if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0)
 1187                                 break;
 1188                 }
 1189                 mtx_unlock(&mountlist_mtx);
 1190         }
 1191         free(pathbuf, M_TEMP);
 1192         if (mp == NULL) {
 1193                 /*
 1194                  * Previously we returned ENOENT for a nonexistent path and
 1195                  * EINVAL for a non-mountpoint.  We cannot tell these apart
 1196                  * now, so in the !MNT_BYFSID case return the more likely
 1197                  * EINVAL for compatibility.
 1198                  */
 1199                 mtx_unlock(&Giant);
 1200                 return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL);
 1201         }
 1202 
 1203         /*
 1204          * Don't allow unmounting the root filesystem.
 1205          */
 1206         if (mp->mnt_flag & MNT_ROOTFS) {
 1207                 mtx_unlock(&Giant);
 1208                 return (EINVAL);
 1209         }
 1210         error = dounmount(mp, uap->flags, td);
 1211         mtx_unlock(&Giant);
 1212         return (error);
 1213 }
 1214 
 1215 /*
 1216  * Do the actual filesystem unmount.
 1217  */
 1218 int
 1219 dounmount(mp, flags, td)
 1220         struct mount *mp;
 1221         int flags;
 1222         struct thread *td;
 1223 {
 1224         struct vnode *coveredvp, *fsrootvp;
 1225         int error;
 1226         int async_flag;
 1227         int mnt_gen_r;
 1228 
 1229         mtx_assert(&Giant, MA_OWNED);
 1230 
 1231         if ((coveredvp = mp->mnt_vnodecovered) != NULL) {
 1232                 mnt_gen_r = mp->mnt_gen;
 1233                 VI_LOCK(coveredvp);
 1234                 vholdl(coveredvp);
 1235                 vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, td);
 1236                 vdrop(coveredvp);
 1237                 /*
 1238                  * Check for mp being unmounted while waiting for the
 1239                  * covered vnode lock.
 1240                  */
 1241                 if (coveredvp->v_mountedhere != mp ||
 1242                     coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) {
 1243                         VOP_UNLOCK(coveredvp, 0, td);
 1244                         return (EBUSY);
 1245                 }
 1246         }
 1247         /*
 1248          * Only privileged root, or (if MNT_USER is set) the user that did the
 1249          * original mount is permitted to unmount this filesystem.
 1250          */
 1251         error = vfs_suser(mp, td);
 1252         if (error) {
 1253                 if (coveredvp)
 1254                         VOP_UNLOCK(coveredvp, 0, td);
 1255                 return (error);
 1256         }
 1257 
 1258         MNT_ILOCK(mp);
 1259         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 1260                 MNT_IUNLOCK(mp);
 1261                 if (coveredvp)
 1262                         VOP_UNLOCK(coveredvp, 0, td);
 1263                 return (EBUSY);
 1264         }
 1265         mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_NOINSMNTQ;
 1266         /* Allow filesystems to detect that a forced unmount is in progress. */
 1267         if (flags & MNT_FORCE)
 1268                 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
 1269         error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
 1270             ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), MNT_MTX(mp), td);
 1271         if (error) {
 1272                 MNT_ILOCK(mp);
 1273                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_NOINSMNTQ |
 1274                     MNTK_UNMOUNTF);
 1275                 if (mp->mnt_kern_flag & MNTK_MWAIT)
 1276                         wakeup(mp);
 1277                 MNT_IUNLOCK(mp);
 1278                 if (coveredvp)
 1279                         VOP_UNLOCK(coveredvp, 0, td);
 1280                 return (error);
 1281         }
 1282         vn_start_write(NULL, &mp, V_WAIT);
 1283 
 1284         if (mp->mnt_flag & MNT_EXPUBLIC)
 1285                 vfs_setpublicfs(NULL, NULL, NULL);
 1286 
 1287         vfs_msync(mp, MNT_WAIT);
 1288         MNT_ILOCK(mp);
 1289         async_flag = mp->mnt_flag & MNT_ASYNC;
 1290         mp->mnt_flag &= ~MNT_ASYNC;
 1291         mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1292         MNT_IUNLOCK(mp);
 1293         cache_purgevfs(mp);     /* remove cache entries for this file sys */
 1294         if (mp->mnt_syncer != NULL)
 1295                 vrele(mp->mnt_syncer);
 1296         /*
 1297          * For forced unmounts, move process cdir/rdir refs on the fs root
 1298          * vnode to the covered vnode.  For non-forced unmounts we want
 1299          * such references to cause an EBUSY error.
 1300          */
 1301         if ((flags & MNT_FORCE) &&
 1302             VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
 1303                 if (mp->mnt_vnodecovered != NULL)
 1304                         mountcheckdirs(fsrootvp, mp->mnt_vnodecovered);
 1305                 if (fsrootvp == rootvnode) {
 1306                         vrele(rootvnode);
 1307                         rootvnode = NULL;
 1308                 }
 1309                 vput(fsrootvp);
 1310         }
 1311         if (((mp->mnt_flag & MNT_RDONLY) ||
 1312              (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) ||
 1313             (flags & MNT_FORCE)) {
 1314                 error = VFS_UNMOUNT(mp, flags, td);
 1315         }
 1316         vn_finished_write(mp);
 1317         /*
 1318          * If we failed to flush the dirty blocks for this mount point,
 1319          * undo all the cdir/rdir and rootvnode changes we made above.
 1320          * Unless we failed to do so because the device is reporting that
 1321          * it doesn't exist anymore.
 1322          */
 1323         if (error && error != ENXIO) {
 1324                 if ((flags & MNT_FORCE) &&
 1325                     VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
 1326                         if (mp->mnt_vnodecovered != NULL)
 1327                                 mountcheckdirs(mp->mnt_vnodecovered, fsrootvp);
 1328                         if (rootvnode == NULL) {
 1329                                 rootvnode = fsrootvp;
 1330                                 vref(rootvnode);
 1331                         }
 1332                         vput(fsrootvp);
 1333                 }
 1334                 MNT_ILOCK(mp);
 1335                 mp->mnt_kern_flag &= ~MNTK_NOINSMNTQ;
 1336                 if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) {
 1337                         MNT_IUNLOCK(mp);
 1338                         (void) vfs_allocate_syncvnode(mp);
 1339                         MNT_ILOCK(mp);
 1340                 }
 1341                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
 1342                 mp->mnt_flag |= async_flag;
 1343                 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0)
 1344                         mp->mnt_kern_flag |= MNTK_ASYNC;
 1345                 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
 1346                 if (mp->mnt_kern_flag & MNTK_MWAIT)
 1347                         wakeup(mp);
 1348                 MNT_IUNLOCK(mp);
 1349                 if (coveredvp)
 1350                         VOP_UNLOCK(coveredvp, 0, td);
 1351                 return (error);
 1352         }
 1353         mtx_lock(&mountlist_mtx);
 1354         TAILQ_REMOVE(&mountlist, mp, mnt_list);
 1355         mtx_unlock(&mountlist_mtx);
 1356         if (coveredvp != NULL) {
 1357                 coveredvp->v_mountedhere = NULL;
 1358                 vput(coveredvp);
 1359         }
 1360         vfs_event_signal(NULL, VQ_UNMOUNT, 0);
 1361         lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
 1362         vfs_mount_destroy(mp);
 1363         return (0);
 1364 }
 1365 
 1366 /*
 1367  * ---------------------------------------------------------------------
 1368  * Mounting of root filesystem
 1369  *
 1370  */
 1371 
 1372 struct root_hold_token {
 1373         const char                      *who;
 1374         LIST_ENTRY(root_hold_token)     list;
 1375 };
 1376 
 1377 static LIST_HEAD(, root_hold_token)     root_holds =
 1378     LIST_HEAD_INITIALIZER(&root_holds);
 1379 
 1380 static int root_mount_complete;
 1381 
 1382 /*
 1383  * Hold root mount.
 1384  */
 1385 struct root_hold_token *
 1386 root_mount_hold(const char *identifier)
 1387 {
 1388         struct root_hold_token *h;
 1389 
 1390         h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
 1391         h->who = identifier;
 1392         mtx_lock(&mountlist_mtx);
 1393         LIST_INSERT_HEAD(&root_holds, h, list);
 1394         mtx_unlock(&mountlist_mtx);
 1395         return (h);
 1396 }
 1397 
 1398 /*
 1399  * Release root mount.
 1400  */
 1401 void
 1402 root_mount_rel(struct root_hold_token *h)
 1403 {
 1404 
 1405         mtx_lock(&mountlist_mtx);
 1406         LIST_REMOVE(h, list);
 1407         wakeup(&root_holds);
 1408         mtx_unlock(&mountlist_mtx);
 1409         free(h, M_DEVBUF);
 1410 }
 1411 
 1412 /*
 1413  * Wait for all subsystems to release root mount.
 1414  */
 1415 static void
 1416 root_mount_prepare(void)
 1417 {
 1418         struct root_hold_token *h;
 1419 
 1420         for (;;) {
 1421                 DROP_GIANT();
 1422                 g_waitidle();
 1423                 PICKUP_GIANT();
 1424                 mtx_lock(&mountlist_mtx);
 1425                 if (LIST_EMPTY(&root_holds)) {
 1426                         mtx_unlock(&mountlist_mtx);
 1427                         break;
 1428                 }
 1429                 printf("Root mount waiting for:");
 1430                 LIST_FOREACH(h, &root_holds, list)
 1431                         printf(" %s", h->who);
 1432                 printf("\n");
 1433                 msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold",
 1434                     hz);
 1435         }
 1436 }
 1437 
 1438 /*
 1439  * Root was mounted, share the good news.
 1440  */
 1441 static void
 1442 root_mount_done(void)
 1443 {
 1444 
 1445         /*
 1446          * Use a mutex to prevent the wakeup being missed and waiting for
 1447          * an extra 1 second sleep.
 1448          */
 1449         mtx_lock(&mountlist_mtx);
 1450         root_mount_complete = 1;
 1451         wakeup(&root_mount_complete);
 1452         mtx_unlock(&mountlist_mtx);
 1453 }
 1454 
 1455 /*
 1456  * Return true if root is already mounted.
 1457  */
 1458 int
 1459 root_mounted(void)
 1460 {
 1461 
 1462         /* No mutex is acquired here because int stores are atomic. */
 1463         return (root_mount_complete);
 1464 }
 1465 
 1466 /*
 1467  * Wait until root is mounted.
 1468  */
 1469 void
 1470 root_mount_wait(void)
 1471 {
 1472 
 1473         /*
 1474          * Panic on an obvious deadlock - the function can't be called from
 1475          * a thread which is doing the whole SYSINIT stuff.
 1476          */
 1477         KASSERT(curthread->td_proc->p_pid != 0,
 1478             ("root_mount_wait: cannot be called from the swapper thread"));
 1479         mtx_lock(&mountlist_mtx);
 1480         while (!root_mount_complete) {
 1481                 msleep(&root_mount_complete, &mountlist_mtx, PZERO, "rootwait",
 1482                     hz);
 1483         }
 1484         mtx_unlock(&mountlist_mtx);
 1485 }
 1486 
 1487 static void
 1488 set_rootvnode(struct thread *td)
 1489 {
 1490         struct proc *p;
 1491 
 1492         if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode, td))
 1493                 panic("Cannot find root vnode");
 1494 
 1495         VOP_UNLOCK(rootvnode, 0, td);
 1496 
 1497         p = td->td_proc;
 1498         FILEDESC_XLOCK(p->p_fd);
 1499 
 1500         if (p->p_fd->fd_cdir != NULL)
 1501                 vrele(p->p_fd->fd_cdir);
 1502         p->p_fd->fd_cdir = rootvnode;
 1503         VREF(rootvnode);
 1504 
 1505         if (p->p_fd->fd_rdir != NULL)
 1506                 vrele(p->p_fd->fd_rdir);
 1507         p->p_fd->fd_rdir = rootvnode;
 1508         VREF(rootvnode);
 1509 
 1510         FILEDESC_XUNLOCK(p->p_fd);
 1511 
 1512         EVENTHANDLER_INVOKE(mountroot);
 1513 }
 1514 
 1515 /*
 1516  * Mount /devfs as our root filesystem, but do not put it on the mountlist
 1517  * yet.  Create a /dev -> / symlink so that absolute pathnames will lookup.
 1518  */
 1519 
 1520 static void
 1521 devfs_first(void)
 1522 {
 1523         struct thread *td = curthread;
 1524         struct vfsoptlist *opts;
 1525         struct vfsconf *vfsp;
 1526         struct mount *mp = NULL;
 1527         int error;
 1528 
 1529         vfsp = vfs_byname("devfs");
 1530         KASSERT(vfsp != NULL, ("Could not find devfs by name"));
 1531         if (vfsp == NULL)
 1532                 return;
 1533 
 1534         mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td);
 1535 
 1536         error = VFS_MOUNT(mp, td);
 1537         KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
 1538         if (error)
 1539                 return;
 1540 
 1541         opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
 1542         TAILQ_INIT(opts);
 1543         mp->mnt_opt = opts;
 1544 
 1545         mtx_lock(&mountlist_mtx);
 1546         TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
 1547         mtx_unlock(&mountlist_mtx);
 1548 
 1549         set_rootvnode(td);
 1550 
 1551         error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);
 1552         if (error)
 1553                 printf("kern_symlink /dev -> / returns %d\n", error);
 1554 }
 1555 
 1556 /*
 1557  * Surgically move our devfs to be mounted on /dev.
 1558  */
 1559 
 1560 static void
 1561 devfs_fixup(struct thread *td)
 1562 {
 1563         struct nameidata nd;
 1564         int error;
 1565         struct vnode *vp, *dvp;
 1566         struct mount *mp;
 1567 
 1568         /* Remove our devfs mount from the mountlist and purge the cache */
 1569         mtx_lock(&mountlist_mtx);
 1570         mp = TAILQ_FIRST(&mountlist);
 1571         TAILQ_REMOVE(&mountlist, mp, mnt_list);
 1572         mtx_unlock(&mountlist_mtx);
 1573         cache_purgevfs(mp);
 1574 
 1575         VFS_ROOT(mp, LK_EXCLUSIVE, &dvp, td);
 1576         VI_LOCK(dvp);
 1577         dvp->v_iflag &= ~VI_MOUNT;
 1578         VI_UNLOCK(dvp);
 1579         dvp->v_mountedhere = NULL;
 1580 
 1581         /* Set up the real rootvnode, and purge the cache */
 1582         TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL;
 1583         set_rootvnode(td);
 1584         cache_purgevfs(rootvnode->v_mount);
 1585 
 1586         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
 1587         error = namei(&nd);
 1588         if (error) {
 1589                 printf("Lookup of /dev for devfs, error: %d\n", error);
 1590                 return;
 1591         }
 1592         NDFREE(&nd, NDF_ONLY_PNBUF);
 1593         vp = nd.ni_vp;
 1594         if (vp->v_type != VDIR) {
 1595                 vput(vp);
 1596         }
 1597         error = vinvalbuf(vp, V_SAVE, td, 0, 0);
 1598         if (error) {
 1599                 vput(vp);
 1600         }
 1601         cache_purge(vp);
 1602         mp->mnt_vnodecovered = vp;
 1603         vp->v_mountedhere = mp;
 1604         mtx_lock(&mountlist_mtx);
 1605         TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 1606         mtx_unlock(&mountlist_mtx);
 1607         VOP_UNLOCK(vp, 0, td);
 1608         vput(dvp);
 1609         vfs_unbusy(mp, td);
 1610 
 1611         /* Unlink the no longer needed /dev/dev -> / symlink */
 1612         kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
 1613 }
 1614 
 1615 /*
 1616  * Report errors during filesystem mounting.
 1617  */
 1618 void
 1619 vfs_mount_error(struct mount *mp, const char *fmt, ...)
 1620 {
 1621         struct vfsoptlist *moptlist = mp->mnt_optnew;
 1622         va_list ap;
 1623         int error, len;
 1624         char *errmsg;
 1625 
 1626         error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len);
 1627         if (error || errmsg == NULL || len <= 0)
 1628                 return;
 1629 
 1630         va_start(ap, fmt);
 1631         vsnprintf(errmsg, (size_t)len, fmt, ap);
 1632         va_end(ap);
 1633 }
 1634 
 1635 /*
 1636  * Find and mount the root filesystem
 1637  */
 1638 void
 1639 vfs_mountroot(void)
 1640 {
 1641         char *cp;
 1642         int error, i, asked = 0;
 1643 
 1644         root_mount_prepare();
 1645 
 1646         mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount),
 1647             NULL, NULL, mount_init, mount_fini,
 1648             UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 1649         devfs_first();
 1650 
 1651         /*
 1652          * We are booted with instructions to prompt for the root filesystem.
 1653          */
 1654         if (boothowto & RB_ASKNAME) {
 1655                 if (!vfs_mountroot_ask())
 1656                         goto mounted;
 1657                 asked = 1;
 1658         }
 1659 
 1660         /*
 1661          * The root filesystem information is compiled in, and we are
 1662          * booted with instructions to use it.
 1663          */
 1664         if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) {
 1665                 if (!vfs_mountroot_try(ctrootdevname))
 1666                         goto mounted;
 1667                 ctrootdevname = NULL;
 1668         }
 1669 
 1670         /*
 1671          * We've been given the generic "use CDROM as root" flag.  This is
 1672          * necessary because one media may be used in many different
 1673          * devices, so we need to search for them.
 1674          */
 1675         if (boothowto & RB_CDROM) {
 1676                 for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
 1677                         if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
 1678                                 goto mounted;
 1679                 }
 1680         }
 1681 
 1682         /*
 1683          * Try to use the value read by the loader from /etc/fstab, or
 1684          * supplied via some other means.  This is the preferred
 1685          * mechanism.
 1686          */
 1687         cp = getenv("vfs.root.mountfrom");
 1688         if (cp != NULL) {
 1689                 error = vfs_mountroot_try(cp);
 1690                 freeenv(cp);
 1691                 if (!error)
 1692                         goto mounted;
 1693         }
 1694 
 1695         /*
 1696          * Try values that may have been computed by code during boot
 1697          */
 1698         if (!vfs_mountroot_try(rootdevnames[0]))
 1699                 goto mounted;
 1700         if (!vfs_mountroot_try(rootdevnames[1]))
 1701                 goto mounted;
 1702 
 1703         /*
 1704          * If we (still) have a compiled-in default, try it.
 1705          */
 1706         if (ctrootdevname != NULL)
 1707                 if (!vfs_mountroot_try(ctrootdevname))
 1708                         goto mounted;
 1709         /*
 1710          * Everything so far has failed, prompt on the console if we haven't
 1711          * already tried that.
 1712          */
 1713         if (!asked)
 1714                 if (!vfs_mountroot_ask())
 1715                         goto mounted;
 1716 
 1717         panic("Root mount failed, startup aborted.");
 1718 
 1719 mounted:
 1720         root_mount_done();
 1721 }
 1722 
 1723 /*
 1724  * Mount (mountfrom) as the root filesystem.
 1725  */
 1726 static int
 1727 vfs_mountroot_try(const char *mountfrom)
 1728 {
 1729         struct mount    *mp;
 1730         char            *vfsname, *path;
 1731         time_t          timebase;
 1732         int             error;
 1733         char            patt[32];
 1734 
 1735         vfsname = NULL;
 1736         path    = NULL;
 1737         mp      = NULL;
 1738         error   = EINVAL;
 1739 
 1740         if (mountfrom == NULL)
 1741                 return (error);         /* don't complain */
 1742         printf("Trying to mount root from %s\n", mountfrom);
 1743 
 1744         /* parse vfs name and path */
 1745         vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
 1746         path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
 1747         vfsname[0] = path[0] = 0;
 1748         sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
 1749         if (sscanf(mountfrom, patt, vfsname, path) < 1)
 1750                 goto out;
 1751 
 1752         if (path[0] == '\0')
 1753                 strcpy(path, ROOTNAME);
 1754 
 1755         error = kernel_vmount(
 1756             MNT_RDONLY | MNT_ROOTFS,
 1757             "fstype", vfsname,
 1758             "fspath", "/",
 1759             "from", path,
 1760             NULL);
 1761         if (error == 0) {
 1762                 /*
 1763                  * We mount devfs prior to mounting the / FS, so the first
 1764                  * entry will typically be devfs.
 1765                  */
 1766                 mp = TAILQ_FIRST(&mountlist);
 1767                 KASSERT(mp != NULL, ("%s: mountlist is empty", __func__));
 1768 
 1769                 /*
 1770                  * Iterate over all currently mounted file systems and use
 1771                  * the time stamp found to check and/or initialize the RTC.
 1772                  * Typically devfs has no time stamp and the only other FS
 1773                  * is the actual / FS.
 1774                  * Call inittodr() only once and pass it the largest of the
 1775                  * timestamps we encounter.
 1776                  */
 1777                 timebase = 0;
 1778                 do {
 1779                         if (mp->mnt_time > timebase)
 1780                                 timebase = mp->mnt_time;
 1781                         mp = TAILQ_NEXT(mp, mnt_list);
 1782                 } while (mp != NULL);
 1783                 inittodr(timebase);
 1784 
 1785                 devfs_fixup(curthread);
 1786         }
 1787 out:
 1788         free(path, M_MOUNT);
 1789         free(vfsname, M_MOUNT);
 1790         return (error);
 1791 }
 1792 
 1793 /*
 1794  * ---------------------------------------------------------------------
 1795  * Interactive root filesystem selection code.
 1796  */
 1797 
 1798 static int
 1799 vfs_mountroot_ask(void)
 1800 {
 1801         char name[128];
 1802 
 1803         for(;;) {
 1804                 printf("\nManual root filesystem specification:\n");
 1805                 printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
 1806 #if defined(__amd64__) || defined(__i386__) || defined(__ia64__)
 1807                 printf("                       eg. ufs:da0s1a\n");
 1808 #else
 1809                 printf("                       eg. ufs:/dev/da0a\n");
 1810 #endif
 1811                 printf("  ?                  List valid disk boot devices\n");
 1812                 printf("  <empty line>       Abort manual input\n");
 1813                 printf("\nmountroot> ");
 1814                 gets(name, sizeof(name), 1);
 1815                 if (name[0] == '\0')
 1816                         return (1);
 1817                 if (name[0] == '?') {
 1818                         printf("\nList of GEOM managed disk devices:\n  ");
 1819                         g_dev_print();
 1820                         continue;
 1821                 }
 1822                 if (!vfs_mountroot_try(name))
 1823                         return (0);
 1824         }
 1825 }
 1826 
 1827 /*
 1828  * ---------------------------------------------------------------------
 1829  * Functions for querying mount options/arguments from filesystems.
 1830  */
 1831 
 1832 /*
 1833  * Check that no unknown options are given
 1834  */
 1835 int
 1836 vfs_filteropt(struct vfsoptlist *opts, const char **legal)
 1837 {
 1838         struct vfsopt *opt;
 1839         char errmsg[255];
 1840         const char **t, *p, *q;
 1841         int ret = 0;
 1842 
 1843         TAILQ_FOREACH(opt, opts, link) {
 1844                 p = opt->name;
 1845                 q = NULL;
 1846                 if (p[0] == 'n' && p[1] == 'o')
 1847                         q = p + 2;
 1848                 for(t = global_opts; *t != NULL; t++) {
 1849                         if (strcmp(*t, p) == 0)
 1850                                 break;
 1851                         if (q != NULL) {
 1852                                 if (strcmp(*t, q) == 0)
 1853                                         break;
 1854                         }
 1855                 }
 1856                 if (*t != NULL)
 1857                         continue;
 1858                 for(t = legal; *t != NULL; t++) {
 1859                         if (strcmp(*t, p) == 0)
 1860                                 break;
 1861                         if (q != NULL) {
 1862                                 if (strcmp(*t, q) == 0)
 1863                                         break;
 1864                         }
 1865                 }
 1866                 if (*t != NULL)
 1867                         continue;
 1868                 snprintf(errmsg, sizeof(errmsg),
 1869                     "mount option <%s> is unknown", p);
 1870                 printf("%s\n", errmsg);
 1871                 ret = EINVAL;
 1872         }
 1873         if (ret != 0) {
 1874                 TAILQ_FOREACH(opt, opts, link) {
 1875                         if (strcmp(opt->name, "errmsg") == 0) {
 1876                                 strncpy((char *)opt->value, errmsg, opt->len);
 1877                         }
 1878                 }
 1879         }
 1880         return (ret);
 1881 }
 1882 
 1883 /*
 1884  * Get a mount option by its name.
 1885  *
 1886  * Return 0 if the option was found, ENOENT otherwise.
 1887  * If len is non-NULL it will be filled with the length
 1888  * of the option. If buf is non-NULL, it will be filled
 1889  * with the address of the option.
 1890  */
 1891 int
 1892 vfs_getopt(opts, name, buf, len)
 1893         struct vfsoptlist *opts;
 1894         const char *name;
 1895         void **buf;
 1896         int *len;
 1897 {
 1898         struct vfsopt *opt;
 1899 
 1900         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 1901 
 1902         TAILQ_FOREACH(opt, opts, link) {
 1903                 if (strcmp(name, opt->name) == 0) {
 1904                         if (len != NULL)
 1905                                 *len = opt->len;
 1906                         if (buf != NULL)
 1907                                 *buf = opt->value;
 1908                         return (0);
 1909                 }
 1910         }
 1911         return (ENOENT);
 1912 }
 1913 
 1914 static int
 1915 vfs_getopt_pos(struct vfsoptlist *opts, const char *name)
 1916 {
 1917         struct vfsopt *opt;
 1918         int i;
 1919 
 1920         if (opts == NULL)
 1921                 return (-1);
 1922 
 1923         i = 0;
 1924         TAILQ_FOREACH(opt, opts, link) {
 1925                 if (strcmp(name, opt->name) == 0)
 1926                         return (i);
 1927                 ++i;
 1928         }
 1929         return (-1);
 1930 }
 1931 
 1932 char *
 1933 vfs_getopts(struct vfsoptlist *opts, const char *name, int *error)
 1934 {
 1935         struct vfsopt *opt;
 1936 
 1937         *error = 0;
 1938         TAILQ_FOREACH(opt, opts, link) {
 1939                 if (strcmp(name, opt->name) != 0)
 1940                         continue;
 1941                 if (((char *)opt->value)[opt->len - 1] != '\0') {
 1942                         *error = EINVAL;
 1943                         return (NULL);
 1944                 }
 1945                 return (opt->value);
 1946         }
 1947         *error = ENOENT;
 1948         return (NULL);
 1949 }
 1950 
 1951 int
 1952 vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val)
 1953 {
 1954         struct vfsopt *opt;
 1955 
 1956         TAILQ_FOREACH(opt, opts, link) {
 1957                 if (strcmp(name, opt->name) == 0) {
 1958                         if (w != NULL)
 1959                                 *w |= val;
 1960                         return (1);
 1961                 }
 1962         }
 1963         if (w != NULL)
 1964                 *w &= ~val;
 1965         return (0);
 1966 }
 1967 
 1968 int
 1969 vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...)
 1970 {
 1971         va_list ap;
 1972         struct vfsopt *opt;
 1973         int ret;
 1974 
 1975         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 1976 
 1977         TAILQ_FOREACH(opt, opts, link) {
 1978                 if (strcmp(name, opt->name) != 0)
 1979                         continue;
 1980                 if (opt->len == 0 || opt->value == NULL)
 1981                         return (0);
 1982                 if (((char *)opt->value)[opt->len - 1] != '\0')
 1983                         return (0);
 1984                 va_start(ap, fmt);
 1985                 ret = vsscanf(opt->value, fmt, ap);
 1986                 va_end(ap);
 1987                 return (ret);
 1988         }
 1989         return (0);
 1990 }
 1991 
 1992 /*
 1993  * Find and copy a mount option.
 1994  *
 1995  * The size of the buffer has to be specified
 1996  * in len, if it is not the same length as the
 1997  * mount option, EINVAL is returned.
 1998  * Returns ENOENT if the option is not found.
 1999  */
 2000 int
 2001 vfs_copyopt(opts, name, dest, len)
 2002         struct vfsoptlist *opts;
 2003         const char *name;
 2004         void *dest;
 2005         int len;
 2006 {
 2007         struct vfsopt *opt;
 2008 
 2009         KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
 2010 
 2011         TAILQ_FOREACH(opt, opts, link) {
 2012                 if (strcmp(name, opt->name) == 0) {
 2013                         if (len != opt->len)
 2014                                 return (EINVAL);
 2015                         bcopy(opt->value, dest, opt->len);
 2016                         return (0);
 2017                 }
 2018         }
 2019         return (ENOENT);
 2020 }
 2021 
 2022 /*
 2023  * This is a helper function for filesystems to traverse their
 2024  * vnodes.  See MNT_VNODE_FOREACH() in sys/mount.h
 2025  */
 2026 
 2027 struct vnode *
 2028 __mnt_vnode_next(struct vnode **mvp, struct mount *mp)
 2029 {
 2030         struct vnode *vp;
 2031 
 2032         mtx_assert(MNT_MTX(mp), MA_OWNED);
 2033 
 2034         KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 2035         if ((*mvp)->v_yield++ == 500) {
 2036                 MNT_IUNLOCK(mp);
 2037                 (*mvp)->v_yield = 0;
 2038                 uio_yield();
 2039                 MNT_ILOCK(mp);
 2040         }
 2041         vp = TAILQ_NEXT(*mvp, v_nmntvnodes);
 2042         while (vp != NULL && vp->v_type == VMARKER)
 2043                 vp = TAILQ_NEXT(vp, v_nmntvnodes);
 2044 
 2045         /* Check if we are done */
 2046         if (vp == NULL) {
 2047                 __mnt_vnode_markerfree(mvp, mp);
 2048                 return (NULL);
 2049         }
 2050         TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
 2051         TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
 2052         return (vp);
 2053 }
 2054 
 2055 struct vnode *
 2056 __mnt_vnode_first(struct vnode **mvp, struct mount *mp)
 2057 {
 2058         struct vnode *vp;
 2059 
 2060         mtx_assert(MNT_MTX(mp), MA_OWNED);
 2061 
 2062         vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
 2063         while (vp != NULL && vp->v_type == VMARKER)
 2064                 vp = TAILQ_NEXT(vp, v_nmntvnodes);
 2065 
 2066         /* Check if we are done */
 2067         if (vp == NULL) {
 2068                 *mvp = NULL;
 2069                 return (NULL);
 2070         }
 2071         mp->mnt_holdcnt++;
 2072         MNT_IUNLOCK(mp);
 2073         *mvp = (struct vnode *) malloc(sizeof(struct vnode),
 2074                                        M_VNODE_MARKER,
 2075                                        M_WAITOK | M_ZERO);
 2076         MNT_ILOCK(mp);
 2077         (*mvp)->v_type = VMARKER;
 2078 
 2079         vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
 2080         while (vp != NULL && vp->v_type == VMARKER)
 2081                 vp = TAILQ_NEXT(vp, v_nmntvnodes);
 2082 
 2083         /* Check if we are done */
 2084         if (vp == NULL) {
 2085                 MNT_IUNLOCK(mp);
 2086                 free(*mvp, M_VNODE_MARKER);
 2087                 MNT_ILOCK(mp);
 2088                 *mvp = NULL;
 2089                 mp->mnt_holdcnt--;
 2090                 if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0)
 2091                         wakeup(&mp->mnt_holdcnt);
 2092                 return (NULL);
 2093         }
 2094         mp->mnt_markercnt++;
 2095         (*mvp)->v_mount = mp;
 2096         TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
 2097         return (vp);
 2098 }
 2099 
 2100 
 2101 void
 2102 __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp)
 2103 {
 2104 
 2105         if (*mvp == NULL)
 2106                 return;
 2107 
 2108         mtx_assert(MNT_MTX(mp), MA_OWNED);
 2109 
 2110         KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 2111         TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
 2112         MNT_IUNLOCK(mp);
 2113         free(*mvp, M_VNODE_MARKER);
 2114         MNT_ILOCK(mp);
 2115         *mvp = NULL;
 2116 
 2117         mp->mnt_markercnt--;
 2118         mp->mnt_holdcnt--;
 2119         if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0)
 2120                 wakeup(&mp->mnt_holdcnt);
 2121 }
 2122 
 2123 
 2124 int
 2125 __vfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
 2126 {
 2127         int error;
 2128 
 2129         error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat, td);
 2130         if (sbp != &mp->mnt_stat)
 2131                 *sbp = mp->mnt_stat;
 2132         return (error);
 2133 }
 2134 
 2135 void
 2136 vfs_mountedfrom(struct mount *mp, const char *from)
 2137 {
 2138 
 2139         bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname);
 2140         strlcpy(mp->mnt_stat.f_mntfromname, from,
 2141             sizeof mp->mnt_stat.f_mntfromname);
 2142 }
 2143 
 2144 /*
 2145  * ---------------------------------------------------------------------
 2146  * This is the api for building mount args and mounting filesystems from
 2147  * inside the kernel.
 2148  *
 2149  * The API works by accumulation of individual args.  First error is
 2150  * latched.
 2151  *
 2152  * XXX: should be documented in new manpage kernel_mount(9)
 2153  */
 2154 
 2155 /* A memory allocation which must be freed when we are done */
 2156 struct mntaarg {
 2157         SLIST_ENTRY(mntaarg)    next;
 2158 };
 2159 
 2160 /* The header for the mount arguments */
 2161 struct mntarg {
 2162         struct iovec *v;
 2163         int len;
 2164         int error;
 2165         SLIST_HEAD(, mntaarg)   list;
 2166 };
 2167 
 2168 /*
 2169  * Add a boolean argument.
 2170  *
 2171  * flag is the boolean value.
 2172  * name must start with "no".
 2173  */
 2174 struct mntarg *
 2175 mount_argb(struct mntarg *ma, int flag, const char *name)
 2176 {
 2177 
 2178         KASSERT(name[0] == 'n' && name[1] == 'o',
 2179             ("mount_argb(...,%s): name must start with 'no'", name));
 2180 
 2181         return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0));
 2182 }
 2183 
 2184 /*
 2185  * Add an argument printf style
 2186  */
 2187 struct mntarg *
 2188 mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...)
 2189 {
 2190         va_list ap;
 2191         struct mntaarg *maa;
 2192         struct sbuf *sb;
 2193         int len;
 2194 
 2195         if (ma == NULL) {
 2196                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2197                 SLIST_INIT(&ma->list);
 2198         }
 2199         if (ma->error)
 2200                 return (ma);
 2201 
 2202         ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 2203             M_MOUNT, M_WAITOK);
 2204         ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 2205         ma->v[ma->len].iov_len = strlen(name) + 1;
 2206         ma->len++;
 2207 
 2208         sb = sbuf_new_auto();
 2209         va_start(ap, fmt);
 2210         sbuf_vprintf(sb, fmt, ap);
 2211         va_end(ap);
 2212         sbuf_finish(sb);
 2213         len = sbuf_len(sb) + 1;
 2214         maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 2215         SLIST_INSERT_HEAD(&ma->list, maa, next);
 2216         bcopy(sbuf_data(sb), maa + 1, len);
 2217         sbuf_delete(sb);
 2218 
 2219         ma->v[ma->len].iov_base = maa + 1;
 2220         ma->v[ma->len].iov_len = len;
 2221         ma->len++;
 2222 
 2223         return (ma);
 2224 }
 2225 
 2226 /*
 2227  * Add an argument which is a userland string.
 2228  */
 2229 struct mntarg *
 2230 mount_argsu(struct mntarg *ma, const char *name, const void *val, int len)
 2231 {
 2232         struct mntaarg *maa;
 2233         char *tbuf;
 2234 
 2235         if (val == NULL)
 2236                 return (ma);
 2237         if (ma == NULL) {
 2238                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2239                 SLIST_INIT(&ma->list);
 2240         }
 2241         if (ma->error)
 2242                 return (ma);
 2243         maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 2244         SLIST_INSERT_HEAD(&ma->list, maa, next);
 2245         tbuf = (void *)(maa + 1);
 2246         ma->error = copyinstr(val, tbuf, len, NULL);
 2247         return (mount_arg(ma, name, tbuf, -1));
 2248 }
 2249 
 2250 /*
 2251  * Plain argument.
 2252  *
 2253  * If length is -1, use printf.
 2254  */
 2255 struct mntarg *
 2256 mount_arg(struct mntarg *ma, const char *name, const void *val, int len)
 2257 {
 2258 
 2259         if (ma == NULL) {
 2260                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2261                 SLIST_INIT(&ma->list);
 2262         }
 2263         if (ma->error)
 2264                 return (ma);
 2265 
 2266         ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 2267             M_MOUNT, M_WAITOK);
 2268         ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 2269         ma->v[ma->len].iov_len = strlen(name) + 1;
 2270         ma->len++;
 2271 
 2272         ma->v[ma->len].iov_base = (void *)(uintptr_t)val;
 2273         if (len < 0)
 2274                 ma->v[ma->len].iov_len = strlen(val) + 1;
 2275         else
 2276                 ma->v[ma->len].iov_len = len;
 2277         ma->len++;
 2278         return (ma);
 2279 }
 2280 
 2281 /*
 2282  * Free a mntarg structure
 2283  */
 2284 static void
 2285 free_mntarg(struct mntarg *ma)
 2286 {
 2287         struct mntaarg *maa;
 2288 
 2289         while (!SLIST_EMPTY(&ma->list)) {
 2290                 maa = SLIST_FIRST(&ma->list);
 2291                 SLIST_REMOVE_HEAD(&ma->list, next);
 2292                 free(maa, M_MOUNT);
 2293         }
 2294         free(ma->v, M_MOUNT);
 2295         free(ma, M_MOUNT);
 2296 }
 2297 
 2298 /*
 2299  * Mount a filesystem
 2300  */
 2301 int
 2302 kernel_mount(struct mntarg *ma, int flags)
 2303 {
 2304         struct uio auio;
 2305         int error;
 2306 
 2307         KASSERT(ma != NULL, ("kernel_mount NULL ma"));
 2308         KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v"));
 2309         KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len));
 2310 
 2311         auio.uio_iov = ma->v;
 2312         auio.uio_iovcnt = ma->len;
 2313         auio.uio_segflg = UIO_SYSSPACE;
 2314 
 2315         error = ma->error;
 2316         if (!error)
 2317                 error = vfs_donmount(curthread, flags, &auio);
 2318         free_mntarg(ma);
 2319         return (error);
 2320 }
 2321 
 2322 /*
 2323  * A printflike function to mount a filesystem.
 2324  */
 2325 int
 2326 kernel_vmount(int flags, ...)
 2327 {
 2328         struct mntarg *ma = NULL;
 2329         va_list ap;
 2330         const char *cp;
 2331         const void *vp;
 2332         int error;
 2333 
 2334         va_start(ap, flags);
 2335         for (;;) {
 2336                 cp = va_arg(ap, const char *);
 2337                 if (cp == NULL)
 2338                         break;
 2339                 vp = va_arg(ap, const void *);
 2340                 ma = mount_arg(ma, cp, vp, -1);
 2341         }
 2342         va_end(ap);
 2343 
 2344         error = kernel_mount(ma, flags);
 2345         return (error);
 2346 }
Cache object: 9ae70ff5dd77eb97c6977c9bdb645707
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_mount.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_mount.c