vfs_mount.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  * Copyright (c) 1999 Michael Smith
   35  * All rights reserved.
   36  * Copyright (c) 1999 Poul-Henning Kamp
   37  * All rights reserved.
   38  *
   39  * Redistribution and use in source and binary forms, with or without
   40  * modification, are permitted provided that the following conditions
   41  * are met:
   42  * 1. Redistributions of source code must retain the above copyright
   43  *    notice, this list of conditions and the following disclaimer.
   44  * 2. Redistributions in binary form must reproduce the above copyright
   45  *    notice, this list of conditions and the following disclaimer in the
   46  *    documentation and/or other materials provided with the distribution.
   47  *
   48  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   58  * SUCH DAMAGE.
   59  */
   60 
   61 #include <sys/cdefs.h>
   62 __FBSDID("$FreeBSD: releng/5.3/sys/kern/vfs_mount.c 136588 2004-10-16 08:43:07Z cvs2svn $");
   63 
   64 #include <sys/param.h>
   65 #include <sys/conf.h>
   66 #include <sys/cons.h>
   67 #include <sys/jail.h>
   68 #include <sys/kernel.h>
   69 #include <sys/linker.h>
   70 #include <sys/mac.h>
   71 #include <sys/malloc.h>
   72 #include <sys/mount.h>
   73 #include <sys/mutex.h>
   74 #include <sys/namei.h>
   75 #include <sys/proc.h>
   76 #include <sys/filedesc.h>
   77 #include <sys/reboot.h>
   78 #include <sys/sysproto.h>
   79 #include <sys/sx.h>
   80 #include <sys/sysctl.h>
   81 #include <sys/sysent.h>
   82 #include <sys/systm.h>
   83 #include <sys/vnode.h>
   84 
   85 #include <geom/geom.h>
   86 
   87 #include <machine/stdarg.h>
   88 
   89 #include "opt_rootdevname.h"
   90 #include "opt_ddb.h"
   91 #include "opt_mac.h"
   92 
   93 #ifdef DDB
   94 #include <ddb/ddb.h>
   95 #endif
   96 
   97 #define ROOTNAME                "root_device"
   98 #define VFS_MOUNTARG_SIZE_MAX   (1024 * 64)
   99 
  100 static void     checkdirs(struct vnode *olddp, struct vnode *newdp);
  101 static void     gets(char *cp);
  102 static int      vfs_domount(struct thread *td, const char *fstype,
  103                     char *fspath, int fsflags, void *fsdata, int compat);
  104 static int      vfs_mount_alloc(struct vnode *dvp, struct vfsconf *vfsp,
  105                     const char *fspath, struct thread *td, struct mount **mpp);
  106 static int      vfs_mountroot_ask(void);
  107 static int      vfs_mountroot_try(const char *mountfrom);
  108 static int      vfs_donmount(struct thread *td, int fsflags,
  109                     struct uio *fsoptions);
  110 
  111 static int      usermount = 0;
  112 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
  113     "Unprivileged users may mount and unmount file systems");
  114 
  115 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
  116 
  117 /* List of mounted filesystems. */
  118 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
  119 
  120 /* For any iteration/modification of mountlist */
  121 struct mtx mountlist_mtx;
  122 
  123 /*
  124  * The vnode of the system's root (/ in the filesystem, without chroot
  125  * active.)
  126  */
  127 struct vnode    *rootvnode;
  128 
  129 /*
  130  * The root filesystem is detailed in the kernel environment variable
  131  * vfs.root.mountfrom, which is expected to be in the general format
  132  *
  133  * <vfsname>:[<path>]
  134  * vfsname   := the name of a VFS known to the kernel and capable
  135  *              of being mounted as root
  136  * path      := disk device name or other data used by the filesystem
  137  *              to locate its physical store
  138  */
  139 
  140 /*
  141  * The root specifiers we will try if RB_CDROM is specified.
  142  */
  143 static char *cdrom_rootdevnames[] = {
  144         "cd9660:cd0",
  145         "cd9660:acd0",
  146         NULL
  147 };
  148 
  149 /* legacy find-root code */
  150 char            *rootdevnames[2] = {NULL, NULL};
  151 struct cdev *rootdev = NULL;
  152 #ifdef ROOTDEVNAME
  153 const char      *ctrootdevname = ROOTDEVNAME;
  154 #else
  155 const char      *ctrootdevname = NULL;
  156 #endif
  157 
  158 /*
  159  * Has to be dynamic as the value of rootdev can change; however, it can't
  160  * change after the root is mounted, so a user process can't access this
  161  * sysctl until after the value is unchangeable.
  162  */
  163 static int
  164 sysctl_rootdev(SYSCTL_HANDLER_ARGS)
  165 {
  166         int error;
  167 
  168         /* _RD prevents this from happening. */
  169         KASSERT(req->newptr == NULL, ("Attempt to change root device name"));
  170 
  171         if (rootdev != NULL)
  172                 error = sysctl_handle_string(oidp, rootdev->si_name, 0, req);
  173         else
  174                 error = sysctl_handle_string(oidp, "", 0, req);
  175 
  176         return (error);
  177 }
  178 
  179 SYSCTL_PROC(_kern, OID_AUTO, rootdev, CTLTYPE_STRING | CTLFLAG_RD,
  180     0, 0, sysctl_rootdev, "A", "Root file system device");
  181 
  182 /* Remove one mount option. */
  183 static void
  184 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
  185 {
  186 
  187         TAILQ_REMOVE(opts, opt, link);
  188         free(opt->name, M_MOUNT);
  189         if (opt->value != NULL)
  190                 free(opt->value, M_MOUNT);
  191 #ifdef INVARIANTS
  192         else if (opt->len != 0)
  193                 panic("%s: mount option with NULL value but length != 0",
  194                     __func__);
  195 #endif
  196         free(opt, M_MOUNT);
  197 }
  198 
  199 /* Release all resources related to the mount options. */
  200 static void
  201 vfs_freeopts(struct vfsoptlist *opts)
  202 {
  203         struct vfsopt *opt;
  204 
  205         while (!TAILQ_EMPTY(opts)) {
  206                 opt = TAILQ_FIRST(opts);
  207                 vfs_freeopt(opts, opt);
  208         }
  209         free(opts, M_MOUNT);
  210 }
  211 
  212 /*
  213  * Check if options are equal (with or without the "no" prefix).
  214  */
  215 static int
  216 vfs_equalopts(const char *opt1, const char *opt2)
  217 {
  218 
  219         /* "opt" vs. "opt" or "noopt" vs. "noopt" */
  220         if (strcmp(opt1, opt2) == 0)
  221                 return (1);
  222         /* "noopt" vs. "opt" */
  223         if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
  224                 return (1);
  225         /* "opt" vs. "noopt" */
  226         if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
  227                 return (1);
  228         return (0);
  229 }
  230 
  231 /*
  232  * If a mount option is specified several times,
  233  * (with or without the "no" prefix) only keep
  234  * the last occurence of it.
  235  */
  236 static void
  237 vfs_sanitizeopts(struct vfsoptlist *opts)
  238 {
  239         struct vfsopt *opt, *opt2, *tmp;
  240 
  241         TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
  242                 opt2 = TAILQ_PREV(opt, vfsoptlist, link);
  243                 while (opt2 != NULL) {
  244                         if (vfs_equalopts(opt->name, opt2->name)) {
  245                                 tmp = TAILQ_PREV(opt2, vfsoptlist, link);
  246                                 vfs_freeopt(opts, opt2);
  247                                 opt2 = tmp;
  248                         } else {
  249                                 opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
  250                         }
  251                 }
  252         }
  253 }
  254 
  255 /*
  256  * Build a linked list of mount options from a struct uio.
  257  */
  258 static int
  259 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
  260 {
  261         struct vfsoptlist *opts;
  262         struct vfsopt *opt;
  263         size_t memused;
  264         unsigned int i, iovcnt;
  265         int error, namelen, optlen;
  266 
  267         opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
  268         TAILQ_INIT(opts);
  269         memused = 0;
  270         iovcnt = auio->uio_iovcnt;
  271         for (i = 0; i < iovcnt; i += 2) {
  272                 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  273                 namelen = auio->uio_iov[i].iov_len;
  274                 optlen = auio->uio_iov[i + 1].iov_len;
  275                 opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
  276                 opt->value = NULL;
  277                 opt->len = optlen;
  278 
  279                 /*
  280                  * Do this early, so jumps to "bad" will free the current
  281                  * option.
  282                  */
  283                 TAILQ_INSERT_TAIL(opts, opt, link);
  284                 memused += sizeof(struct vfsopt) + optlen + namelen;
  285 
  286                 /*
  287                  * Avoid consuming too much memory, and attempts to overflow
  288                  * memused.
  289                  */
  290                 if (memused > VFS_MOUNTARG_SIZE_MAX ||
  291                     optlen > VFS_MOUNTARG_SIZE_MAX ||
  292                     namelen > VFS_MOUNTARG_SIZE_MAX) {
  293                         error = EINVAL;
  294                         goto bad;
  295                 }
  296 
  297                 if (auio->uio_segflg == UIO_SYSSPACE) {
  298                         bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
  299                 } else {
  300                         error = copyin(auio->uio_iov[i].iov_base, opt->name,
  301                             namelen);
  302                         if (error)
  303                                 goto bad;
  304                 }
  305                 /* Ensure names are null-terminated strings. */
  306                 if (opt->name[namelen - 1] != '\0') {
  307                         error = EINVAL;
  308                         goto bad;
  309                 }
  310                 if (optlen != 0) {
  311                         opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
  312                         if (auio->uio_segflg == UIO_SYSSPACE) {
  313                                 bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
  314                                     optlen);
  315                         } else {
  316                                 error = copyin(auio->uio_iov[i + 1].iov_base,
  317                                     opt->value, optlen);
  318                                 if (error)
  319                                         goto bad;
  320                         }
  321                 }
  322         }
  323         vfs_sanitizeopts(opts);
  324         *options = opts;
  325         return (0);
  326 bad:
  327         vfs_freeopts(opts);
  328         return (error);
  329 }
  330 
  331 /*
  332  * Merge the old mount options with the new ones passed
  333  * in the MNT_UPDATE case.
  334  */
  335 static void
  336 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
  337 {
  338         struct vfsopt *opt, *opt2, *new;
  339 
  340         TAILQ_FOREACH(opt, opts, link) {
  341                 /*
  342                  * Check that this option hasn't been redefined
  343                  * nor cancelled with a "no" mount option.
  344                  */
  345                 opt2 = TAILQ_FIRST(toopts);
  346                 while (opt2 != NULL) {
  347                         if (strcmp(opt2->name, opt->name) == 0)
  348                                 goto next;
  349                         if (strncmp(opt2->name, "no", 2) == 0 &&
  350                             strcmp(opt2->name + 2, opt->name) == 0) {
  351                                 vfs_freeopt(toopts, opt2);
  352                                 goto next;
  353                         }
  354                         opt2 = TAILQ_NEXT(opt2, link);
  355                 }
  356                 /* We want this option, duplicate it. */
  357                 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  358                 new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
  359                 strcpy(new->name, opt->name);
  360                 if (opt->len != 0) {
  361                         new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
  362                         bcopy(opt->value, new->value, opt->len);
  363                 } else {
  364                         new->value = NULL;
  365                 }
  366                 new->len = opt->len;
  367                 TAILQ_INSERT_TAIL(toopts, new, link);
  368 next:
  369                 continue;
  370         }
  371 }
  372 
  373 /*
  374  * New mount API.
  375  */
  376 int
  377 nmount(td, uap)
  378         struct thread *td;
  379         struct nmount_args /* {
  380                 struct iovec *iovp;
  381                 unsigned int iovcnt;
  382                 int flags;
  383         } */ *uap;
  384 {
  385         struct uio *auio;
  386         struct iovec *iov;
  387         unsigned int i;
  388         int error;
  389         u_int iovcnt;
  390 
  391         iovcnt = uap->iovcnt;
  392         /*
  393          * Check that we have an even number of iovec's
  394          * and that we have at least two options.
  395          */
  396         if ((iovcnt & 1) || (iovcnt < 4))
  397                 return (EINVAL);
  398         error = copyinuio(uap->iovp, iovcnt, &auio);
  399         if (error)
  400                 return (error);
  401         iov = auio->uio_iov;
  402         for (i = 0; i < iovcnt; i++) {
  403                 if (iov->iov_len > MMAXOPTIONLEN) {
  404                         free(auio, M_IOV);
  405                         return (EINVAL);
  406                 }
  407                 iov++;
  408         }
  409         error = vfs_donmount(td, uap->flags, auio);
  410         free(auio, M_IOV);
  411         return (error);
  412 }
  413 
  414 int
  415 kernel_mount(struct iovec *iovp, u_int iovcnt, int flags)
  416 {
  417         struct uio auio;
  418         int error;
  419 
  420         /*
  421          * Check that we have an even number of iovec's
  422          * and that we have at least two options.
  423          */
  424         if ((iovcnt & 1) || (iovcnt < 4))
  425                 return (EINVAL);
  426 
  427         auio.uio_iov = iovp;
  428         auio.uio_iovcnt = iovcnt;
  429         auio.uio_segflg = UIO_SYSSPACE;
  430 
  431         error = vfs_donmount(curthread, flags, &auio);
  432         return (error);
  433 }
  434 
  435 int
  436 kernel_vmount(int flags, ...)
  437 {
  438         struct iovec *iovp;
  439         struct uio auio;
  440         va_list ap;
  441         u_int iovcnt, iovlen, len;
  442         const char *cp;
  443         char *buf, *pos;
  444         size_t n;
  445         int error, i;
  446 
  447         len = 0;
  448         va_start(ap, flags);
  449         for (iovcnt = 0; (cp = va_arg(ap, const char *)) != NULL; iovcnt++)
  450                 len += strlen(cp) + 1;
  451         va_end(ap);
  452 
  453         if (iovcnt < 4 || iovcnt & 1)
  454                 return (EINVAL);
  455 
  456         iovlen = iovcnt * sizeof (struct iovec);
  457         MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK);
  458         MALLOC(buf, char *, len, M_MOUNT, M_WAITOK);
  459         pos = buf;
  460         va_start(ap, flags);
  461         for (i = 0; i < iovcnt; i++) {
  462                 cp = va_arg(ap, const char *);
  463                 copystr(cp, pos, len - (pos - buf), &n);
  464                 iovp[i].iov_base = pos;
  465                 iovp[i].iov_len = n;
  466                 pos += n;
  467         }
  468         va_end(ap);
  469 
  470         auio.uio_iov = iovp;
  471         auio.uio_iovcnt = iovcnt;
  472         auio.uio_segflg = UIO_SYSSPACE;
  473 
  474         error = vfs_donmount(curthread, flags, &auio);
  475         FREE(iovp, M_MOUNT);
  476         FREE(buf, M_MOUNT);
  477         return (error);
  478 }
  479 
  480 /*
  481  * Allocate and initialize the mount point struct.
  482  */
  483 static int
  484 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp,
  485     const char *fspath, struct thread *td, struct mount **mpp)
  486 {
  487         struct mount *mp;
  488 
  489         mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
  490         TAILQ_INIT(&mp->mnt_nvnodelist);
  491         mp->mnt_nvnodelistsize = 0;
  492         mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
  493         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
  494         vfs_busy(mp, LK_NOWAIT, 0, td);
  495         mp->mnt_op = vfsp->vfc_vfsops;
  496         mp->mnt_vfc = vfsp;
  497         vfsp->vfc_refcount++;
  498         mp->mnt_stat.f_type = vfsp->vfc_typenum;
  499         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
  500         strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
  501         mp->mnt_vnodecovered = vp;
  502         mp->mnt_cred = crdup(td->td_ucred);
  503         mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
  504         strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
  505         mp->mnt_iosize_max = DFLTPHYS;
  506 #ifdef MAC
  507         mac_init_mount(mp);
  508         mac_create_mount(td->td_ucred, mp);
  509 #endif
  510         *mpp = mp;
  511         return (0);
  512 }
  513 
  514 /*
  515  * Destroy the mount struct previously allocated by vfs_mount_alloc().
  516  */
  517 void
  518 vfs_mount_destroy(struct mount *mp, struct thread *td)
  519 {
  520 
  521         mp->mnt_vfc->vfc_refcount--;
  522         if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
  523                 panic("unmount: dangling vnode");
  524         vfs_unbusy(mp,td);
  525         lockdestroy(&mp->mnt_lock);
  526         mtx_destroy(&mp->mnt_mtx);
  527         if (mp->mnt_kern_flag & MNTK_MWAIT)
  528                 wakeup(mp);
  529 #ifdef MAC
  530         mac_destroy_mount(mp);
  531 #endif
  532         if (mp->mnt_opt != NULL)
  533                 vfs_freeopts(mp->mnt_opt);
  534         crfree(mp->mnt_cred);
  535         free(mp, M_MOUNT);
  536 }
  537 
  538 static int
  539 vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions)
  540 {
  541         struct vfsoptlist *optlist;
  542         char *fstype, *fspath;
  543         int error, fstypelen, fspathlen;
  544 
  545         error = vfs_buildopts(fsoptions, &optlist);
  546         if (error)
  547                 return (error);
  548 
  549         /*
  550          * We need these two options before the others,
  551          * and they are mandatory for any filesystem.
  552          * Ensure they are NUL terminated as well.
  553          */
  554         fstypelen = 0;
  555         error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
  556         if (error || fstype[fstypelen - 1] != '\0') {
  557                 error = EINVAL;
  558                 goto bail;
  559         }
  560         fspathlen = 0;
  561         error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
  562         if (error || fspath[fspathlen - 1] != '\0') {
  563                 error = EINVAL;
  564                 goto bail;
  565         }
  566 
  567         /*
  568          * Be ultra-paranoid about making sure the type and fspath
  569          * variables will fit in our mp buffers, including the
  570          * terminating NUL.
  571          */
  572         if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
  573                 error = ENAMETOOLONG;
  574                 goto bail;
  575         }
  576 
  577         error = vfs_domount(td, fstype, fspath, fsflags, optlist, 0);
  578 bail:
  579         if (error)
  580                 vfs_freeopts(optlist);
  581         return (error);
  582 }
  583 
  584 /*
  585  * Old mount API.
  586  */
  587 #ifndef _SYS_SYSPROTO_H_
  588 struct mount_args {
  589         char    *type;
  590         char    *path;
  591         int     flags;
  592         caddr_t data;
  593 };
  594 #endif
  595 /* ARGSUSED */
  596 int
  597 mount(td, uap)
  598         struct thread *td;
  599         struct mount_args /* {
  600                 char *type;
  601                 char *path;
  602                 int flags;
  603                 caddr_t data;
  604         } */ *uap;
  605 {
  606         char *fstype;
  607         char *fspath;
  608         int error;
  609 
  610         fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
  611         fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK);
  612 
  613         /*
  614          * vfs_mount() actually takes a kernel string for `type' and
  615          * `path' now, so extract them.
  616          */
  617         error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
  618         if (error == 0)
  619                 error = copyinstr(uap->path, fspath, MNAMELEN, NULL);
  620         if (error == 0)
  621                 error = vfs_domount(td, fstype, fspath, uap->flags,
  622                     uap->data, 1);
  623         free(fstype, M_TEMP);
  624         free(fspath, M_TEMP);
  625         return (error);
  626 }
  627 
  628 /*
  629  * vfs_mount(): actually attempt a filesystem mount.
  630  *
  631  * This routine is designed to be a "generic" entry point for routines
  632  * that wish to mount a filesystem. All parameters except `fsdata' are
  633  * pointers into kernel space. `fsdata' is currently still a pointer
  634  * into userspace.
  635  */
  636 int
  637 vfs_mount(td, fstype, fspath, fsflags, fsdata)
  638         struct thread *td;
  639         const char *fstype;
  640         char *fspath;
  641         int fsflags;
  642         void *fsdata;
  643 {
  644 
  645         return (vfs_domount(td, fstype, fspath, fsflags, fsdata, 1));
  646 }
  647 
  648 /*
  649  * vfs_domount(): actually attempt a filesystem mount.
  650  */
  651 static int
  652 vfs_domount(
  653         struct thread *td,      /* Flags common to all filesystems. */
  654         const char *fstype,     /* Filesystem type. */
  655         char *fspath,           /* Mount path. */
  656         int fsflags,            /* Flags common to all filesystems. */
  657         void *fsdata,           /* Options local to the filesystem. */
  658         int compat              /* Invocation from compat syscall. */
  659         )
  660 {
  661         linker_file_t lf;
  662         struct vnode *vp;
  663         struct mount *mp;
  664         struct vfsconf *vfsp;
  665         int error, flag = 0, kern_flag = 0;
  666         struct vattr va;
  667         struct nameidata nd;
  668 
  669         /*
  670          * Be ultra-paranoid about making sure the type and fspath
  671          * variables will fit in our mp buffers, including the
  672          * terminating NUL.
  673          */
  674         if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
  675                 return (ENAMETOOLONG);
  676 
  677         if (jailed(td->td_ucred))
  678                 return (EPERM);
  679         if (usermount == 0) {
  680                 if ((error = suser(td)) != 0)
  681                         return (error);
  682         }
  683 
  684         /*
  685          * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
  686          */
  687         if (fsflags & (MNT_EXPORTED | MNT_SUIDDIR)) {
  688                 if ((error = suser(td)) != 0)
  689                         return (error);
  690         }
  691         /*
  692          * Silently enforce MNT_NODEV, MNT_NOSUID and MNT_USER for
  693          * unprivileged users.
  694          */
  695         if (suser(td) != 0)
  696                 fsflags |= MNT_NODEV | MNT_NOSUID | MNT_USER;
  697         /*
  698          * Get vnode to be covered
  699          */
  700         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
  701         if ((error = namei(&nd)) != 0)
  702                 return (error);
  703         NDFREE(&nd, NDF_ONLY_PNBUF);
  704         vp = nd.ni_vp;
  705         if (fsflags & MNT_UPDATE) {
  706                 if ((vp->v_vflag & VV_ROOT) == 0) {
  707                         vput(vp);
  708                         return (EINVAL);
  709                 }
  710                 mp = vp->v_mount;
  711                 flag = mp->mnt_flag;
  712                 kern_flag = mp->mnt_kern_flag;
  713                 /*
  714                  * We only allow the filesystem to be reloaded if it
  715                  * is currently mounted read-only.
  716                  */
  717                 if ((fsflags & MNT_RELOAD) &&
  718                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
  719                         vput(vp);
  720                         return (EOPNOTSUPP);    /* Needs translation */
  721                 }
  722                 /*
  723                  * Only privileged root, or (if MNT_USER is set) the user that
  724                  * did the original mount is permitted to update it.
  725                  */
  726                 error = vfs_suser(mp, td);
  727                 if (error) {
  728                         vput(vp);
  729                         return (error);
  730                 }
  731                 if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
  732                         vput(vp);
  733                         return (EBUSY);
  734                 }
  735                 VI_LOCK(vp);
  736                 if ((vp->v_iflag & VI_MOUNT) != 0 ||
  737                     vp->v_mountedhere != NULL) {
  738                         VI_UNLOCK(vp);
  739                         vfs_unbusy(mp, td);
  740                         vput(vp);
  741                         return (EBUSY);
  742                 }
  743                 vp->v_iflag |= VI_MOUNT;
  744                 VI_UNLOCK(vp);
  745                 mp->mnt_flag |= fsflags &
  746                     (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
  747                 VOP_UNLOCK(vp, 0, td);
  748                 if (compat == 0) {
  749                         mp->mnt_optnew = fsdata;
  750                         vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
  751                 }
  752         } else {
  753                 /*
  754                  * If the user is not root, ensure that they own the directory
  755                  * onto which we are attempting to mount.
  756                  */
  757                 error = VOP_GETATTR(vp, &va, td->td_ucred, td);
  758                 if (error) {
  759                         vput(vp);
  760                         return (error);
  761                 }
  762                 if (va.va_uid != td->td_ucred->cr_uid) {
  763                         if ((error = suser(td)) != 0) {
  764                                 vput(vp);
  765                                 return (error);
  766                         }
  767                 }
  768                 if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
  769                         vput(vp);
  770                         return (error);
  771                 }
  772                 if (vp->v_type != VDIR) {
  773                         vput(vp);
  774                         return (ENOTDIR);
  775                 }
  776                 vfsp = vfs_byname(fstype);
  777                 if (vfsp == NULL) {
  778                         /* Only load modules for root (very important!). */
  779                         if ((error = suser(td)) != 0) {
  780                                 vput(vp);
  781                                 return (error);
  782                         }
  783                         error = securelevel_gt(td->td_ucred, 0);
  784                         if (error) {
  785                                 vput(vp);
  786                                 return (error);
  787                         }
  788                         error = linker_load_module(NULL, fstype, NULL, NULL, &lf);
  789                         if (error || lf == NULL) {
  790                                 vput(vp);
  791                                 if (lf == NULL)
  792                                         error = ENODEV;
  793                                 return (error);
  794                         }
  795                         lf->userrefs++;
  796                         /* Look up again to see if the VFS was loaded. */
  797                         vfsp = vfs_byname(fstype);
  798                         if (vfsp == NULL) {
  799                                 lf->userrefs--;
  800                                 linker_file_unload(lf, LINKER_UNLOAD_FORCE);
  801                                 vput(vp);
  802                                 return (ENODEV);
  803                         }
  804                 }
  805                 VI_LOCK(vp);
  806                 if ((vp->v_iflag & VI_MOUNT) != 0 ||
  807                     vp->v_mountedhere != NULL) {
  808                         VI_UNLOCK(vp);
  809                         vput(vp);
  810                         return (EBUSY);
  811                 }
  812                 vp->v_iflag |= VI_MOUNT;
  813                 VI_UNLOCK(vp);
  814 
  815                 /*
  816                  * Allocate and initialize the filesystem.
  817                  */
  818                 error = vfs_mount_alloc(vp, vfsp, fspath, td, &mp);
  819                 if (error) {
  820                         vput(vp);
  821                         return (error);
  822                 }
  823                 VOP_UNLOCK(vp, 0, td);
  824 
  825                 /* XXXMAC: pass to vfs_mount_alloc? */
  826                 if (compat == 0)
  827                         mp->mnt_optnew = fsdata;
  828         }
  829         /*
  830          * Check if the fs implements the type VFS_[O]MOUNT()
  831          * function we are looking for.
  832          */
  833         if ((compat == 0) == (mp->mnt_op->vfs_omount != NULL)) {
  834                 printf("%s doesn't support the %s mount syscall\n",
  835                     mp->mnt_vfc->vfc_name, compat ? "old" : "new");
  836                 VI_LOCK(vp);
  837                 vp->v_iflag &= ~VI_MOUNT;
  838                 VI_UNLOCK(vp);
  839                 if (mp->mnt_flag & MNT_UPDATE)
  840                         vfs_unbusy(mp, td);
  841                 else
  842                         vfs_mount_destroy(mp, td);
  843                 vrele(vp);
  844                 return (EOPNOTSUPP);
  845         }
  846 
  847         /*
  848          * Set the mount level flags.
  849          */
  850         if (fsflags & MNT_RDONLY)
  851                 mp->mnt_flag |= MNT_RDONLY;
  852         else if (mp->mnt_flag & MNT_RDONLY)
  853                 mp->mnt_kern_flag |= MNTK_WANTRDWR;
  854         mp->mnt_flag &=~ MNT_UPDATEMASK;
  855         mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
  856         /*
  857          * Mount the filesystem.
  858          * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
  859          * get.  No freeing of cn_pnbuf.
  860          */
  861         if (compat)
  862             error = VFS_OMOUNT(mp, fspath, fsdata, td);
  863         else
  864             error = VFS_MOUNT(mp, td);
  865         if (!error) {
  866                 if (mp->mnt_opt != NULL)
  867                         vfs_freeopts(mp->mnt_opt);
  868                 mp->mnt_opt = mp->mnt_optnew;
  869         }
  870         /*
  871          * Prevent external consumers of mount options from reading
  872          * mnt_optnew.
  873         */
  874         mp->mnt_optnew = NULL;
  875         if (mp->mnt_flag & MNT_UPDATE) {
  876                 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
  877                         mp->mnt_flag &= ~MNT_RDONLY;
  878                 mp->mnt_flag &=
  879                     ~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
  880                 mp->mnt_kern_flag &= ~MNTK_WANTRDWR;
  881                 if (error) {
  882                         mp->mnt_flag = flag;
  883                         mp->mnt_kern_flag = kern_flag;
  884                 }
  885                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
  886                         if (mp->mnt_syncer == NULL)
  887                                 error = vfs_allocate_syncvnode(mp);
  888                 } else {
  889                         if (mp->mnt_syncer != NULL)
  890                                 vrele(mp->mnt_syncer);
  891                         mp->mnt_syncer = NULL;
  892                 }
  893                 vfs_unbusy(mp, td);
  894                 VI_LOCK(vp);
  895                 vp->v_iflag &= ~VI_MOUNT;
  896                 VI_UNLOCK(vp);
  897                 vrele(vp);
  898                 return (error);
  899         }
  900         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  901         /*
  902          * Put the new filesystem on the mount list after root.
  903          */
  904         cache_purge(vp);
  905         if (!error) {
  906                 struct vnode *newdp;
  907 
  908                 VI_LOCK(vp);
  909                 vp->v_iflag &= ~VI_MOUNT;
  910                 VI_UNLOCK(vp);
  911                 vp->v_mountedhere = mp;
  912                 mtx_lock(&mountlist_mtx);
  913                 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
  914                 mtx_unlock(&mountlist_mtx);
  915                 vfs_event_signal(NULL, VQ_MOUNT, 0);
  916                 if (VFS_ROOT(mp, &newdp, td))
  917                         panic("mount: lost mount");
  918                 checkdirs(vp, newdp);
  919                 vput(newdp);
  920                 VOP_UNLOCK(vp, 0, td);
  921                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
  922                         error = vfs_allocate_syncvnode(mp);
  923                 vfs_unbusy(mp, td);
  924                 if (error || (error = VFS_START(mp, 0, td)) != 0)
  925                         vrele(vp);
  926         } else {
  927                 VI_LOCK(vp);
  928                 vp->v_iflag &= ~VI_MOUNT;
  929                 VI_UNLOCK(vp);
  930                 vfs_mount_destroy(mp, td);
  931                 vput(vp);
  932         }
  933         return (error);
  934 }
  935 
  936 /*
  937  * Scan all active processes to see if any of them have a current
  938  * or root directory of `olddp'. If so, replace them with the new
  939  * mount point.
  940  */
  941 static void
  942 checkdirs(olddp, newdp)
  943         struct vnode *olddp, *newdp;
  944 {
  945         struct filedesc *fdp;
  946         struct proc *p;
  947         int nrele;
  948 
  949         if (vrefcnt(olddp) == 1)
  950                 return;
  951         sx_slock(&allproc_lock);
  952         LIST_FOREACH(p, &allproc, p_list) {
  953                 mtx_lock(&fdesc_mtx);
  954                 fdp = p->p_fd;
  955                 if (fdp == NULL) {
  956                         mtx_unlock(&fdesc_mtx);
  957                         continue;
  958                 }
  959                 nrele = 0;
  960                 FILEDESC_LOCK(fdp);
  961                 if (fdp->fd_cdir == olddp) {
  962                         VREF(newdp);
  963                         fdp->fd_cdir = newdp;
  964                         nrele++;
  965                 }
  966                 if (fdp->fd_rdir == olddp) {
  967                         VREF(newdp);
  968                         fdp->fd_rdir = newdp;
  969                         nrele++;
  970                 }
  971                 FILEDESC_UNLOCK(fdp);
  972                 mtx_unlock(&fdesc_mtx);
  973                 while (nrele--)
  974                         vrele(olddp);
  975         }
  976         sx_sunlock(&allproc_lock);
  977         if (rootvnode == olddp) {
  978                 vrele(rootvnode);
  979                 VREF(newdp);
  980                 rootvnode = newdp;
  981         }
  982 }
  983 
  984 /*
  985  * Unmount a filesystem.
  986  *
  987  * Note: unmount takes a path to the vnode mounted on as argument,
  988  * not special file (as before).
  989  */
  990 #ifndef _SYS_SYSPROTO_H_
  991 struct unmount_args {
  992         char    *path;
  993         int     flags;
  994 };
  995 #endif
  996 /* ARGSUSED */
  997 int
  998 unmount(td, uap)
  999         struct thread *td;
 1000         register struct unmount_args /* {
 1001                 char *path;
 1002                 int flags;
 1003         } */ *uap;
 1004 {
 1005         struct mount *mp;
 1006         char *pathbuf;
 1007         int error, id0, id1;
 1008 
 1009         if (jailed(td->td_ucred))
 1010                 return (EPERM);
 1011         if (usermount == 0) {
 1012                 if ((error = suser(td)) != 0)
 1013                         return (error);
 1014         }
 1015 
 1016         pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
 1017         error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL);
 1018         if (error) {
 1019                 free(pathbuf, M_TEMP);
 1020                 return (error);
 1021         }
 1022         if (uap->flags & MNT_BYFSID) {
 1023                 /* Decode the filesystem ID. */
 1024                 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
 1025                         free(pathbuf, M_TEMP);
 1026                         return (EINVAL);
 1027                 }
 1028 
 1029                 mtx_lock(&mountlist_mtx);
 1030                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 1031                         if (mp->mnt_stat.f_fsid.val[0] == id0 &&
 1032                             mp->mnt_stat.f_fsid.val[1] == id1)
 1033                                 break;
 1034                 }
 1035                 mtx_unlock(&mountlist_mtx);
 1036         } else {
 1037                 mtx_lock(&mountlist_mtx);
 1038                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 1039                         if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0)
 1040                                 break;
 1041                 }
 1042                 mtx_unlock(&mountlist_mtx);
 1043         }
 1044         free(pathbuf, M_TEMP);
 1045         if (mp == NULL) {
 1046                 /*
 1047                  * Previously we returned ENOENT for a nonexistent path and
 1048                  * EINVAL for a non-mountpoint.  We cannot tell these apart
 1049                  * now, so in the !MNT_BYFSID case return the more likely
 1050                  * EINVAL for compatibility.
 1051                  */
 1052                 return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL);
 1053         }
 1054 
 1055         /*
 1056          * Only privileged root, or (if MNT_USER is set) the user that did the
 1057          * original mount is permitted to unmount this filesystem.
 1058          */
 1059         error = vfs_suser(mp, td);
 1060         if (error)
 1061                 return (error);
 1062 
 1063         /*
 1064          * Don't allow unmounting the root filesystem.
 1065          */
 1066         if (mp->mnt_flag & MNT_ROOTFS)
 1067                 return (EINVAL);
 1068         return (dounmount(mp, uap->flags, td));
 1069 }
 1070 
 1071 /*
 1072  * Do the actual filesystem unmount.
 1073  */
 1074 int
 1075 dounmount(mp, flags, td)
 1076         struct mount *mp;
 1077         int flags;
 1078         struct thread *td;
 1079 {
 1080         struct vnode *coveredvp, *fsrootvp;
 1081         int error;
 1082         int async_flag;
 1083 
 1084         mtx_lock(&mountlist_mtx);
 1085         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 1086                 mtx_unlock(&mountlist_mtx);
 1087                 return (EBUSY);
 1088         }
 1089         mp->mnt_kern_flag |= MNTK_UNMOUNT;
 1090         /* Allow filesystems to detect that a forced unmount is in progress. */
 1091         if (flags & MNT_FORCE)
 1092                 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
 1093         error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
 1094             ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
 1095         if (error) {
 1096                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
 1097                 if (mp->mnt_kern_flag & MNTK_MWAIT)
 1098                         wakeup(mp);
 1099                 return (error);
 1100         }
 1101         vn_start_write(NULL, &mp, V_WAIT);
 1102 
 1103         if (mp->mnt_flag & MNT_EXPUBLIC)
 1104                 vfs_setpublicfs(NULL, NULL, NULL);
 1105 
 1106         vfs_msync(mp, MNT_WAIT);
 1107         async_flag = mp->mnt_flag & MNT_ASYNC;
 1108         mp->mnt_flag &= ~MNT_ASYNC;
 1109         cache_purgevfs(mp);     /* remove cache entries for this file sys */
 1110         if (mp->mnt_syncer != NULL)
 1111                 vrele(mp->mnt_syncer);
 1112         /*
 1113          * For forced unmounts, move process cdir/rdir refs on the fs root
 1114          * vnode to the covered vnode.  For non-forced unmounts we want
 1115          * such references to cause an EBUSY error.
 1116          */
 1117         if ((flags & MNT_FORCE) && VFS_ROOT(mp, &fsrootvp, td) == 0) {
 1118                 if (mp->mnt_vnodecovered != NULL)
 1119                         checkdirs(fsrootvp, mp->mnt_vnodecovered);
 1120                 if (fsrootvp == rootvnode) {
 1121                         vrele(rootvnode);
 1122                         rootvnode = NULL;
 1123                 }
 1124                 vput(fsrootvp);
 1125         }
 1126         if (((mp->mnt_flag & MNT_RDONLY) ||
 1127              (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
 1128             (flags & MNT_FORCE)) {
 1129                 error = VFS_UNMOUNT(mp, flags, td);
 1130         }
 1131         vn_finished_write(mp);
 1132         if (error) {
 1133                 /* Undo cdir/rdir and rootvnode changes made above. */
 1134                 if ((flags & MNT_FORCE) && VFS_ROOT(mp, &fsrootvp, td) == 0) {
 1135                         if (mp->mnt_vnodecovered != NULL)
 1136                                 checkdirs(mp->mnt_vnodecovered, fsrootvp);
 1137                         if (rootvnode == NULL) {
 1138                                 rootvnode = fsrootvp;
 1139                                 vref(rootvnode);
 1140                         }
 1141                         vput(fsrootvp);
 1142                 }
 1143                 if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
 1144                         (void) vfs_allocate_syncvnode(mp);
 1145                 mtx_lock(&mountlist_mtx);
 1146                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
 1147                 mp->mnt_flag |= async_flag;
 1148                 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
 1149                     &mountlist_mtx, td);
 1150                 if (mp->mnt_kern_flag & MNTK_MWAIT)
 1151                         wakeup(mp);
 1152                 return (error);
 1153         }
 1154         mtx_lock(&mountlist_mtx);
 1155         TAILQ_REMOVE(&mountlist, mp, mnt_list);
 1156         if ((coveredvp = mp->mnt_vnodecovered) != NULL)
 1157                 coveredvp->v_mountedhere = NULL;
 1158         mtx_unlock(&mountlist_mtx);
 1159         vfs_event_signal(NULL, VQ_UNMOUNT, 0);
 1160         vfs_mount_destroy(mp, td);
 1161         if (coveredvp != NULL)
 1162                 vrele(coveredvp);
 1163         return (0);
 1164 }
 1165 
 1166 /*
 1167  * Lookup a filesystem type, and if found allocate and initialize
 1168  * a mount structure for it.
 1169  *
 1170  * Devname is usually updated by mount(8) after booting.
 1171  */
 1172 int
 1173 vfs_rootmountalloc(fstypename, devname, mpp)
 1174         char *fstypename;
 1175         char *devname;
 1176         struct mount **mpp;
 1177 {
 1178         struct thread *td = curthread;  /* XXX */
 1179         struct vfsconf *vfsp;
 1180         struct mount *mp;
 1181         int error;
 1182 
 1183         if (fstypename == NULL)
 1184                 return (ENODEV);
 1185         vfsp = vfs_byname(fstypename);
 1186         if (vfsp == NULL)
 1187                 return (ENODEV);
 1188         error = vfs_mount_alloc(NULLVP, vfsp, "/", td, &mp);
 1189         if (error)
 1190                 return (error);
 1191         mp->mnt_flag |= MNT_RDONLY | MNT_ROOTFS;
 1192         strlcpy(mp->mnt_stat.f_mntfromname, devname, MNAMELEN);
 1193         *mpp = mp;
 1194         return (0);
 1195 }
 1196 
 1197 /*
 1198  * Find and mount the root filesystem
 1199  */
 1200 void
 1201 vfs_mountroot(void)
 1202 {
 1203         char *cp;
 1204         int error, i, asked = 0;
 1205 
 1206 
 1207         /*
 1208          * Wait for GEOM to settle down
 1209          */
 1210         g_waitidle();
 1211 
 1212         /*
 1213          * We are booted with instructions to prompt for the root filesystem.
 1214          */
 1215         if (boothowto & RB_ASKNAME) {
 1216                 if (!vfs_mountroot_ask())
 1217                         return;
 1218                 asked = 1;
 1219         }
 1220 
 1221         /*
 1222          * The root filesystem information is compiled in, and we are
 1223          * booted with instructions to use it.
 1224          */
 1225         if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) {
 1226                 if (!vfs_mountroot_try(ctrootdevname))
 1227                         return;
 1228                 ctrootdevname = NULL;
 1229         }
 1230 
 1231         /*
 1232          * We've been given the generic "use CDROM as root" flag.  This is
 1233          * necessary because one media may be used in many different
 1234          * devices, so we need to search for them.
 1235          */
 1236         if (boothowto & RB_CDROM) {
 1237                 for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
 1238                         if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
 1239                                 return;
 1240                 }
 1241         }
 1242 
 1243         /*
 1244          * Try to use the value read by the loader from /etc/fstab, or
 1245          * supplied via some other means.  This is the preferred
 1246          * mechanism.
 1247          */
 1248         cp = getenv("vfs.root.mountfrom");
 1249         if (cp != NULL) {
 1250                 error = vfs_mountroot_try(cp);
 1251                 freeenv(cp);
 1252                 if (!error)
 1253                         return;
 1254         }
 1255 
 1256         /*
 1257          * Try values that may have been computed by code during boot
 1258          */
 1259         if (!vfs_mountroot_try(rootdevnames[0]))
 1260                 return;
 1261         if (!vfs_mountroot_try(rootdevnames[1]))
 1262                 return;
 1263 
 1264         /*
 1265          * If we (still) have a compiled-in default, try it.
 1266          */
 1267         if (ctrootdevname != NULL)
 1268                 if (!vfs_mountroot_try(ctrootdevname))
 1269                         return;
 1270 
 1271         /*
 1272          * Everything so far has failed, prompt on the console if we haven't
 1273          * already tried that.
 1274          */
 1275         if (!asked)
 1276                 if (!vfs_mountroot_ask())
 1277                         return;
 1278         panic("Root mount failed, startup aborted.");
 1279 }
 1280 
 1281 /*
 1282  * Mount (mountfrom) as the root filesystem.
 1283  */
 1284 static int
 1285 vfs_mountroot_try(const char *mountfrom)
 1286 {
 1287         struct mount    *mp;
 1288         char            *vfsname, *path;
 1289         const char      *devname;
 1290         int             error;
 1291         char            patt[32];
 1292         int             s;
 1293 
 1294         vfsname = NULL;
 1295         path    = NULL;
 1296         mp      = NULL;
 1297         error   = EINVAL;
 1298 
 1299         if (mountfrom == NULL)
 1300                 return (error);         /* don't complain */
 1301 
 1302         s = splcam();                   /* Overkill, but annoying without it */
 1303         printf("Mounting root from %s\n", mountfrom);
 1304         splx(s);
 1305 
 1306         /* parse vfs name and path */
 1307         vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
 1308         path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
 1309         vfsname[0] = path[0] = 0;
 1310         sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
 1311         if (sscanf(mountfrom, patt, vfsname, path) < 1)
 1312                 goto done;
 1313 
 1314         /* allocate a root mount */
 1315         error = vfs_rootmountalloc(vfsname, path[0] != 0 ? path : ROOTNAME,
 1316             &mp);
 1317         if (error != 0) {
 1318                 printf("Can't allocate root mount for filesystem '%s': %d\n",
 1319                        vfsname, error);
 1320                 goto done;
 1321         }
 1322 
 1323         /*
 1324          * do our best to set rootdev
 1325          * XXX: This does not belong here!
 1326          */
 1327         if (path[0] != '\0') {
 1328                 struct cdev *diskdev;
 1329                 diskdev = getdiskbyname(path);
 1330                 if (diskdev != NULL)
 1331                         rootdev = diskdev;
 1332                 else
 1333                         printf("setrootbyname failed\n");
 1334         }
 1335 
 1336         /* If the root device is a type "memory disk", mount RW */
 1337         if (rootdev != NULL && devsw(rootdev) != NULL) {
 1338                 devname = devtoname(rootdev);
 1339                 if (devname[0] == 'm' && devname[1] == 'd')
 1340                         mp->mnt_flag &= ~MNT_RDONLY;
 1341         }
 1342 
 1343         error = VFS_OMOUNT(mp, NULL, NULL, curthread);
 1344 
 1345 done:
 1346         if (vfsname != NULL)
 1347                 free(vfsname, M_MOUNT);
 1348         if (path != NULL)
 1349                 free(path, M_MOUNT);
 1350         if (error != 0) {
 1351                 if (mp != NULL)
 1352                         vfs_mount_destroy(mp, curthread);
 1353                 printf("Root mount failed: %d\n", error);
 1354         } else {
 1355 
 1356                 /* register with list of mounted filesystems */
 1357                 mtx_lock(&mountlist_mtx);
 1358                 TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
 1359                 mtx_unlock(&mountlist_mtx);
 1360 
 1361                 /* sanity check system clock against root fs timestamp */
 1362                 inittodr(mp->mnt_time);
 1363                 vfs_unbusy(mp, curthread);
 1364                 error = VFS_START(mp, 0, curthread);
 1365         }
 1366         return (error);
 1367 }
 1368 
 1369 /*
 1370  * Spin prompting on the console for a suitable root filesystem
 1371  */
 1372 static int
 1373 vfs_mountroot_ask(void)
 1374 {
 1375         char name[128];
 1376 
 1377         for(;;) {
 1378                 printf("\nManual root filesystem specification:\n");
 1379                 printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
 1380 #if defined(__i386__) || defined(__ia64__)
 1381                 printf("                       eg. ufs:da0s1a\n");
 1382 #else
 1383                 printf("                       eg. ufs:/dev/da0a\n");
 1384 #endif
 1385                 printf("  ?                  List valid disk boot devices\n");
 1386                 printf("  <empty line>       Abort manual input\n");
 1387                 printf("\nmountroot> ");
 1388                 gets(name);
 1389                 if (name[0] == '\0')
 1390                         return (1);
 1391                 if (name[0] == '?') {
 1392                         printf("\nList of GEOM managed disk devices:\n  ");
 1393                         g_dev_print();
 1394                         continue;
 1395                 }
 1396                 if (!vfs_mountroot_try(name))
 1397                         return (0);
 1398         }
 1399 }
 1400 
 1401 /*
 1402  * Local helper function for vfs_mountroot_ask.
 1403  */
 1404 static void
 1405 gets(char *cp)
 1406 {
 1407         char *lp;
 1408         int c;
 1409 
 1410         lp = cp;
 1411         for (;;) {
 1412                 printf("%c", c = cngetc() & 0177);
 1413                 switch (c) {
 1414                 case -1:
 1415                 case '\n':
 1416                 case '\r':
 1417                         *lp++ = '\0';
 1418                         return;
 1419                 case '\b':
 1420                 case '\177':
 1421                         if (lp > cp) {
 1422                                 printf(" \b");
 1423                                 lp--;
 1424                         }
 1425                         continue;
 1426                 case '#':
 1427                         lp--;
 1428                         if (lp < cp)
 1429                                 lp = cp;
 1430                         continue;
 1431                 case '@':
 1432                 case 'u' & 037:
 1433                         lp = cp;
 1434                         printf("%c", '\n');
 1435                         continue;
 1436                 default:
 1437                         *lp++ = c;
 1438                 }
 1439         }
 1440 }
 1441 
 1442 /*
 1443  * Convert a given name to the cdev pointer of the device, which is probably
 1444  * but not by definition, a disk.  Mount a DEVFS (on nothing), look the name
 1445  * up, extract the cdev from the vnode and unmount it again.  Unfortunately
 1446  * we cannot use the vnode directly (because we unmount the DEVFS again)
 1447  * so the filesystems still have to do the bdevvp() stunt.
 1448  */
 1449 struct cdev *
 1450 getdiskbyname(char *name)
 1451 {
 1452         char *cp = name;
 1453         struct cdev *dev = NULL;
 1454         struct thread *td = curthread;
 1455         struct vfsconf *vfsp;
 1456         struct mount *mp = NULL;
 1457         struct vnode *vroot = NULL;
 1458         struct nameidata nid;
 1459         int error;
 1460 
 1461         if (!bcmp(cp, "/dev/", 5))
 1462                 cp += 5;
 1463 
 1464         do {
 1465                 vfsp = vfs_byname("devfs");
 1466                 if (vfsp == NULL)
 1467                         break;
 1468                 error = vfs_mount_alloc(NULLVP, vfsp, "/dev", td, &mp);
 1469                 if (error)
 1470                         break;
 1471                 mp->mnt_flag |= MNT_RDONLY;
 1472 
 1473                 error = VFS_MOUNT(mp, curthread);
 1474                 if (error)
 1475                         break;
 1476                 VFS_START(mp, 0, td);
 1477                 VFS_ROOT(mp, &vroot, td);
 1478                 VOP_UNLOCK(vroot, 0, td);
 1479 
 1480                 NDINIT(&nid, LOOKUP, NOCACHE|FOLLOW,
 1481                     UIO_SYSSPACE, cp, curthread);
 1482                 nid.ni_startdir = vroot;
 1483                 nid.ni_pathlen = strlen(cp);
 1484                 nid.ni_cnd.cn_cred = curthread->td_ucred;
 1485                 nid.ni_cnd.cn_nameptr = cp;
 1486 
 1487                 error = lookup(&nid);
 1488                 if (error)
 1489                         break;
 1490                 dev = vn_todev (nid.ni_vp);
 1491                 NDFREE(&nid, 0);
 1492         } while (0);
 1493 
 1494         if (vroot != NULL)
 1495                 VFS_UNMOUNT(mp, 0, td);
 1496         if (mp != NULL)
 1497                 vfs_mount_destroy(mp, td);
 1498         return (dev);
 1499 }
 1500 
 1501 /* Show the struct cdev *for a disk specified by name */
 1502 #ifdef DDB
 1503 DB_SHOW_COMMAND(disk, db_getdiskbyname)
 1504 {
 1505         struct cdev *dev;
 1506 
 1507         if (modif[0] == '\0') {
 1508                 db_error("usage: show disk/devicename");
 1509                 return;
 1510         }
 1511         dev = getdiskbyname(modif);
 1512         if (dev != NULL)
 1513                 db_printf("struct cdev *= %p\n", dev);
 1514         else
 1515                 db_printf("No disk device matched.\n");
 1516 }
 1517 #endif
 1518 
 1519 /*
 1520  * Get a mount option by its name.
 1521  *
 1522  * Return 0 if the option was found, ENOENT otherwise.
 1523  * If len is non-NULL it will be filled with the length
 1524  * of the option. If buf is non-NULL, it will be filled
 1525  * with the address of the option.
 1526  */
 1527 int
 1528 vfs_getopt(opts, name, buf, len)
 1529         struct vfsoptlist *opts;
 1530         const char *name;
 1531         void **buf;
 1532         int *len;
 1533 {
 1534         struct vfsopt *opt;
 1535 
 1536         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 1537 
 1538         TAILQ_FOREACH(opt, opts, link) {
 1539                 if (strcmp(name, opt->name) == 0) {
 1540                         if (len != NULL)
 1541                                 *len = opt->len;
 1542                         if (buf != NULL)
 1543                                 *buf = opt->value;
 1544                         return (0);
 1545                 }
 1546         }
 1547         return (ENOENT);
 1548 }
 1549 
 1550 /*
 1551  * Find and copy a mount option.
 1552  *
 1553  * The size of the buffer has to be specified
 1554  * in len, if it is not the same length as the
 1555  * mount option, EINVAL is returned.
 1556  * Returns ENOENT if the option is not found.
 1557  */
 1558 int
 1559 vfs_copyopt(opts, name, dest, len)
 1560         struct vfsoptlist *opts;
 1561         const char *name;
 1562         void *dest;
 1563         int len;
 1564 {
 1565         struct vfsopt *opt;
 1566 
 1567         KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
 1568 
 1569         TAILQ_FOREACH(opt, opts, link) {
 1570                 if (strcmp(name, opt->name) == 0) {
 1571                         if (len != opt->len)
 1572                                 return (EINVAL);
 1573                         bcopy(opt->value, dest, opt->len);
 1574                         return (0);
 1575                 }
 1576         }
 1577         return (ENOENT);
 1578 }
 1579 
 1580 
 1581 /*
 1582  * This is a helper function for filesystems to traverse their
 1583  * vnodes.  See MNT_VNODE_FOREACH() in sys/mount.h
 1584  */
 1585 
 1586 struct vnode *
 1587 __mnt_vnode_next(struct vnode **nvp, struct mount *mp)
 1588 {
 1589         struct vnode *vp;
 1590 
 1591         mtx_assert(&mp->mnt_mtx, MA_OWNED);
 1592         vp = *nvp;
 1593         /* Check if we are done */
 1594         if (vp == NULL)
 1595                 return (NULL);
 1596         /* If our next vnode is no longer ours, start over */
 1597         if (vp->v_mount != mp) 
 1598                 vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
 1599         /* Save pointer to next vnode in list */
 1600         if (vp != NULL)
 1601                 *nvp = TAILQ_NEXT(vp, v_nmntvnodes);
 1602         else
 1603                 *nvp = NULL;
 1604         return (vp);
 1605 }
Cache object: b30fa79e91862591f32addd34f71dd36
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_mount.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_mount.c