vfs_mount.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $NetBSD: vfs_mount.c,v 1.101 2022/12/09 10:33:18 hannken Exp $  */
    2 
    3 /*-
    4  * Copyright (c) 1997-2020 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
    9  * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   30  * POSSIBILITY OF SUCH DAMAGE.
   31  */
   32 
   33 /*
   34  * Copyright (c) 1989, 1993
   35  *      The Regents of the University of California.  All rights reserved.
   36  * (c) UNIX System Laboratories, Inc.
   37  * All or some portions of this file are derived from material licensed
   38  * to the University of California by American Telephone and Telegraph
   39  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   40  * the permission of UNIX System Laboratories, Inc.
   41  *
   42  * Redistribution and use in source and binary forms, with or without
   43  * modification, are permitted provided that the following conditions
   44  * are met:
   45  * 1. Redistributions of source code must retain the above copyright
   46  *    notice, this list of conditions and the following disclaimer.
   47  * 2. Redistributions in binary form must reproduce the above copyright
   48  *    notice, this list of conditions and the following disclaimer in the
   49  *    documentation and/or other materials provided with the distribution.
   50  * 3. Neither the name of the University nor the names of its contributors
   51  *    may be used to endorse or promote products derived from this software
   52  *    without specific prior written permission.
   53  *
   54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   64  * SUCH DAMAGE.
   65  *
   66  *      @(#)vfs_subr.c  8.13 (Berkeley) 4/18/94
   67  */
   68 
   69 #include <sys/cdefs.h>
   70 __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.101 2022/12/09 10:33:18 hannken Exp $");
   71 
   72 #include <sys/param.h>
   73 #include <sys/kernel.h>
   74 
   75 #include <sys/atomic.h>
   76 #include <sys/buf.h>
   77 #include <sys/conf.h>
   78 #include <sys/fcntl.h>
   79 #include <sys/filedesc.h>
   80 #include <sys/device.h>
   81 #include <sys/kauth.h>
   82 #include <sys/kmem.h>
   83 #include <sys/module.h>
   84 #include <sys/mount.h>
   85 #include <sys/fstrans.h>
   86 #include <sys/namei.h>
   87 #include <sys/extattr.h>
   88 #include <sys/syscallargs.h>
   89 #include <sys/sysctl.h>
   90 #include <sys/systm.h>
   91 #include <sys/vfs_syscalls.h>
   92 #include <sys/vnode_impl.h>
   93 
   94 #include <miscfs/deadfs/deadfs.h>
   95 #include <miscfs/genfs/genfs.h>
   96 #include <miscfs/specfs/specdev.h>
   97 
   98 #include <uvm/uvm_swap.h>
   99 
  100 enum mountlist_type {
  101         ME_MOUNT,
  102         ME_MARKER
  103 };
  104 struct mountlist_entry {
  105         TAILQ_ENTRY(mountlist_entry) me_list;   /* Mount list. */
  106         struct mount *me_mount;                 /* Actual mount if ME_MOUNT,
  107                                                    current mount else. */
  108         enum mountlist_type me_type;            /* Mount or marker. */
  109 };
  110 struct mount_iterator {
  111         struct mountlist_entry mi_entry;
  112 };
  113 
  114 static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *,
  115     bool (*)(void *, struct vnode *), void *, bool);
  116 
  117 /* Root filesystem. */
  118 vnode_t *                       rootvnode;
  119 
  120 /* Mounted filesystem list. */
  121 static TAILQ_HEAD(mountlist, mountlist_entry) mountlist;
  122 static kmutex_t                 mountlist_lock __cacheline_aligned;
  123 int vnode_offset_next_by_lru    /* XXX: ugly hack for pstat.c */
  124     = offsetof(vnode_impl_t, vi_lrulist.tqe_next);
  125 
  126 kmutex_t                        vfs_list_lock __cacheline_aligned;
  127 
  128 static specificdata_domain_t    mount_specificdata_domain;
  129 static kmutex_t                 mntid_lock;
  130 
  131 static kmutex_t                 mountgen_lock __cacheline_aligned;
  132 static uint64_t                 mountgen;
  133 
  134 void
  135 vfs_mount_sysinit(void)
  136 {
  137 
  138         TAILQ_INIT(&mountlist);
  139         mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
  140         mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);
  141 
  142         mount_specificdata_domain = specificdata_domain_create();
  143         mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
  144         mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE);
  145         mountgen = 0;
  146 }
  147 
  148 struct mount *
  149 vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp)
  150 {
  151         struct mount *mp;
  152         int error __diagused;
  153 
  154         mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
  155         mp->mnt_op = vfsops;
  156         mp->mnt_refcnt = 1;
  157         TAILQ_INIT(&mp->mnt_vnodelist);
  158         mp->mnt_renamelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
  159         mp->mnt_vnodelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
  160         mp->mnt_updating = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
  161         mp->mnt_vnodecovered = vp;
  162         mount_initspecific(mp);
  163 
  164         error = fstrans_mount(mp);
  165         KASSERT(error == 0);
  166 
  167         mutex_enter(&mountgen_lock);
  168         mp->mnt_gen = mountgen++;
  169         mutex_exit(&mountgen_lock);
  170 
  171         return mp;
  172 }
  173 
  174 /*
  175  * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and
  176  * initialize a mount structure for it.
  177  *
  178  * Devname is usually updated by mount(8) after booting.
  179  */
  180 int
  181 vfs_rootmountalloc(const char *fstypename, const char *devname,
  182     struct mount **mpp)
  183 {
  184         struct vfsops *vfsp = NULL;
  185         struct mount *mp;
  186         int error __diagused;
  187 
  188         mutex_enter(&vfs_list_lock);
  189         LIST_FOREACH(vfsp, &vfs_list, vfs_list)
  190                 if (!strncmp(vfsp->vfs_name, fstypename, 
  191                     sizeof(mp->mnt_stat.f_fstypename)))
  192                         break;
  193         if (vfsp == NULL) {
  194                 mutex_exit(&vfs_list_lock);
  195                 return (ENODEV);
  196         }
  197         vfsp->vfs_refcount++;
  198         mutex_exit(&vfs_list_lock);
  199 
  200         if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL)
  201                 return ENOMEM;
  202         error = vfs_busy(mp);
  203         KASSERT(error == 0);
  204         mp->mnt_flag = MNT_RDONLY;
  205         (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
  206             sizeof(mp->mnt_stat.f_fstypename));
  207         mp->mnt_stat.f_mntonname[0] = '/';
  208         mp->mnt_stat.f_mntonname[1] = '\0';
  209         mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
  210             '\0';
  211         (void)copystr(devname, mp->mnt_stat.f_mntfromname,
  212             sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
  213         *mpp = mp;
  214         return 0;
  215 }
  216 
  217 /*
  218  * vfs_getnewfsid: get a new unique fsid.
  219  */
  220 void
  221 vfs_getnewfsid(struct mount *mp)
  222 {
  223         static u_short xxxfs_mntid;
  224         struct mountlist_entry *me;
  225         fsid_t tfsid;
  226         int mtype;
  227 
  228         mutex_enter(&mntid_lock);
  229         if (xxxfs_mntid == 0)
  230                 ++xxxfs_mntid;
  231         mtype = makefstype(mp->mnt_op->vfs_name);
  232         tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
  233         tfsid.__fsid_val[1] = mtype;
  234         /* Always increment to not return the same fsid to parallel mounts. */
  235         xxxfs_mntid++;
  236 
  237         /*
  238          * Directly walk mountlist to prevent deadlock through
  239          * mountlist_iterator_next() -> vfs_busy().
  240          */
  241         mutex_enter(&mountlist_lock);
  242         for (me = TAILQ_FIRST(&mountlist); me != TAILQ_END(&mountlist); ) {
  243                 if (me->me_type == ME_MOUNT &&
  244                     me->me_mount->mnt_stat.f_fsidx.__fsid_val[0] ==
  245                     tfsid.__fsid_val[0] &&
  246                     me->me_mount->mnt_stat.f_fsidx.__fsid_val[1] ==
  247                     tfsid.__fsid_val[1]) {
  248                         tfsid.__fsid_val[0]++;
  249                         xxxfs_mntid++;
  250                         me = TAILQ_FIRST(&mountlist);
  251                 } else {
  252                         me = TAILQ_NEXT(me, me_list);
  253                 }
  254         }
  255         mutex_exit(&mountlist_lock);
  256 
  257         mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
  258         mp->mnt_stat.f_fsidx.__fsid_val[1] = tfsid.__fsid_val[1];
  259         mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
  260         mutex_exit(&mntid_lock);
  261 }
  262 
  263 /*
  264  * Lookup a mount point by filesystem identifier.
  265  *
  266  * XXX Needs to add a reference to the mount point.
  267  */
  268 struct mount *
  269 vfs_getvfs(fsid_t *fsid)
  270 {
  271         mount_iterator_t *iter;
  272         struct mount *mp;
  273 
  274         mountlist_iterator_init(&iter);
  275         while ((mp = mountlist_iterator_next(iter)) != NULL) {
  276                 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
  277                     mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
  278                         mountlist_iterator_destroy(iter);
  279                         return mp;
  280                 }
  281         }
  282         mountlist_iterator_destroy(iter);
  283         return NULL;
  284 }
  285 
  286 /*
  287  * Take a reference to a mount structure.
  288  */
  289 void
  290 vfs_ref(struct mount *mp)
  291 {
  292 
  293         KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock));
  294 
  295         atomic_inc_uint(&mp->mnt_refcnt);
  296 }
  297 
  298 /*
  299  * Drop a reference to a mount structure, freeing if the last reference.
  300  */
  301 void
  302 vfs_rele(struct mount *mp)
  303 {
  304 
  305 #ifndef __HAVE_ATOMIC_AS_MEMBAR
  306         membar_release();
  307 #endif
  308         if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) {
  309                 return;
  310         }
  311 #ifndef __HAVE_ATOMIC_AS_MEMBAR
  312         membar_acquire();
  313 #endif
  314 
  315         /*
  316          * Nothing else has visibility of the mount: we can now
  317          * free the data structures.
  318          */
  319         KASSERT(mp->mnt_refcnt == 0);
  320         specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
  321         mutex_obj_free(mp->mnt_updating);
  322         mutex_obj_free(mp->mnt_renamelock);
  323         mutex_obj_free(mp->mnt_vnodelock);
  324         if (mp->mnt_op != NULL) {
  325                 vfs_delref(mp->mnt_op);
  326         }
  327         fstrans_unmount(mp);
  328         /*
  329          * Final free of mp gets done from fstrans_mount_dtor().
  330          *
  331          * Prevents this memory to be reused as a mount before
  332          * fstrans releases all references to it.
  333          */
  334 }
  335 
  336 /*
  337  * Mark a mount point as busy, and gain a new reference to it.  Used to
  338  * prevent the file system from being unmounted during critical sections.
  339  *
  340  * vfs_busy can be called multiple times and by multiple threads
  341  * and must be accompanied by the same number of vfs_unbusy calls.
  342  *
  343  * => The caller must hold a pre-existing reference to the mount.
  344  * => Will fail if the file system is being unmounted, or is unmounted.
  345  */
  346 static inline int
  347 _vfs_busy(struct mount *mp, bool wait)
  348 {
  349 
  350         KASSERT(mp->mnt_refcnt > 0);
  351 
  352         if (wait) {
  353                 fstrans_start(mp);
  354         } else {
  355                 if (fstrans_start_nowait(mp))
  356                         return EBUSY;
  357         }
  358         if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) {
  359                 fstrans_done(mp);
  360                 return ENOENT;
  361         }
  362         vfs_ref(mp);
  363         return 0;
  364 }
  365 
  366 int
  367 vfs_busy(struct mount *mp)
  368 {
  369 
  370         return _vfs_busy(mp, true);
  371 }
  372 
  373 int
  374 vfs_trybusy(struct mount *mp)
  375 {
  376 
  377         return _vfs_busy(mp, false);
  378 }
  379 
  380 /*
  381  * Unbusy a busy filesystem.
  382  *
  383  * Every successful vfs_busy() call must be undone by a vfs_unbusy() call.
  384  */
  385 void
  386 vfs_unbusy(struct mount *mp)
  387 {
  388 
  389         KASSERT(mp->mnt_refcnt > 0);
  390 
  391         fstrans_done(mp);
  392         vfs_rele(mp);
  393 }
  394 
  395 /*
  396  * Change a file systems lower mount.
  397  * Both the current and the new lower mount may be NULL.  The caller
  398  * guarantees exclusive access to the mount and holds a pre-existing
  399  * reference to the new lower mount.
  400  */
  401 int
  402 vfs_set_lowermount(struct mount *mp, struct mount *lowermp)
  403 {
  404         struct mount *oldlowermp;
  405         int error;
  406 
  407 #ifdef DEBUG
  408         /*
  409          * Limit the depth of file system stack so kernel sanitizers
  410          * may stress mount/unmount without exhausting the kernel stack.
  411          */
  412         int depth;
  413         struct mount *mp2;
  414 
  415         for (depth = 0, mp2 = lowermp; mp2; depth++, mp2 = mp2->mnt_lower) {
  416                 if (depth == 23)
  417                         return EINVAL;
  418         }
  419 #endif
  420 
  421         if (lowermp) {
  422                 if (lowermp == dead_rootmount)
  423                         return ENOENT;
  424                 error = vfs_busy(lowermp);
  425                 if (error)
  426                         return error;
  427                 vfs_ref(lowermp);
  428         }
  429 
  430         oldlowermp = mp->mnt_lower;
  431         mp->mnt_lower = lowermp;
  432 
  433         if (lowermp)
  434                 vfs_unbusy(lowermp);
  435 
  436         if (oldlowermp)
  437                 vfs_rele(oldlowermp);
  438 
  439         return 0;
  440 }
  441 
  442 struct vnode_iterator {
  443         vnode_impl_t vi_vnode;
  444 };
  445 
  446 void
  447 vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip)
  448 {
  449         vnode_t *vp;
  450         vnode_impl_t *vip;
  451 
  452         vp = vnalloc_marker(mp);
  453         vip = VNODE_TO_VIMPL(vp);
  454 
  455         mutex_enter(mp->mnt_vnodelock);
  456         TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes);
  457         vp->v_usecount = 1;
  458         mutex_exit(mp->mnt_vnodelock);
  459 
  460         *vnip = (struct vnode_iterator *)vip;
  461 }
  462 
  463 void
  464 vfs_vnode_iterator_destroy(struct vnode_iterator *vni)
  465 {
  466         vnode_impl_t *mvip = &vni->vi_vnode;
  467         vnode_t *mvp = VIMPL_TO_VNODE(mvip);
  468         kmutex_t *lock;
  469 
  470         KASSERT(vnis_marker(mvp));
  471         if (vrefcnt(mvp) != 0) {
  472                 lock = mvp->v_mount->mnt_vnodelock;
  473                 mutex_enter(lock);
  474                 TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes);
  475                 mvp->v_usecount = 0;
  476                 mutex_exit(lock);
  477         }
  478         vnfree_marker(mvp);
  479 }
  480 
  481 static struct vnode *
  482 vfs_vnode_iterator_next1(struct vnode_iterator *vni,
  483     bool (*f)(void *, struct vnode *), void *cl, bool do_wait)
  484 {
  485         vnode_impl_t *mvip = &vni->vi_vnode;
  486         struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount;
  487         vnode_t *vp;
  488         vnode_impl_t *vip;
  489         kmutex_t *lock;
  490         int error;
  491 
  492         KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip)));
  493 
  494         lock = mp->mnt_vnodelock;
  495         do {
  496                 mutex_enter(lock);
  497                 vip = TAILQ_NEXT(mvip, vi_mntvnodes);
  498                 TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes);
  499                 VIMPL_TO_VNODE(mvip)->v_usecount = 0;
  500 again:
  501                 if (vip == NULL) {
  502                         mutex_exit(lock);
  503                         return NULL;
  504                 }
  505                 vp = VIMPL_TO_VNODE(vip);
  506                 KASSERT(vp != NULL);
  507                 mutex_enter(vp->v_interlock);
  508                 if (vnis_marker(vp) ||
  509                     vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) ||
  510                     (f && !(*f)(cl, vp))) {
  511                         mutex_exit(vp->v_interlock);
  512                         vip = TAILQ_NEXT(vip, vi_mntvnodes);
  513                         goto again;
  514                 }
  515 
  516                 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, vi_mntvnodes);
  517                 VIMPL_TO_VNODE(mvip)->v_usecount = 1;
  518                 mutex_exit(lock);
  519                 error = vcache_vget(vp);
  520                 KASSERT(error == 0 || error == ENOENT);
  521         } while (error != 0);
  522 
  523         return vp;
  524 }
  525 
  526 struct vnode *
  527 vfs_vnode_iterator_next(struct vnode_iterator *vni,
  528     bool (*f)(void *, struct vnode *), void *cl)
  529 {
  530 
  531         return vfs_vnode_iterator_next1(vni, f, cl, false);
  532 }
  533 
  534 /*
  535  * Move a vnode from one mount queue to another.
  536  */
  537 void
  538 vfs_insmntque(vnode_t *vp, struct mount *mp)
  539 {
  540         vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
  541         struct mount *omp;
  542         kmutex_t *lock;
  543 
  544         KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 ||
  545             vp->v_tag == VT_VFS);
  546 
  547         /*
  548          * Delete from old mount point vnode list, if on one.
  549          */
  550         if ((omp = vp->v_mount) != NULL) {
  551                 lock = omp->mnt_vnodelock;
  552                 mutex_enter(lock);
  553                 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes);
  554                 mutex_exit(lock);
  555         }
  556 
  557         /*
  558          * Insert into list of vnodes for the new mount point, if
  559          * available.  The caller must take a reference on the mount
  560          * structure and donate to the vnode.
  561          */
  562         if ((vp->v_mount = mp) != NULL) {
  563                 lock = mp->mnt_vnodelock;
  564                 mutex_enter(lock);
  565                 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes);
  566                 mutex_exit(lock);
  567         }
  568 
  569         if (omp != NULL) {
  570                 /* Release reference to old mount. */
  571                 vfs_rele(omp);
  572         }
  573 }
  574 
  575 /*
  576  * Remove any vnodes in the vnode table belonging to mount point mp.
  577  *
  578  * If FORCECLOSE is not specified, there should not be any active ones,
  579  * return error if any are found (nb: this is a user error, not a
  580  * system error). If FORCECLOSE is specified, detach any active vnodes
  581  * that are found.
  582  *
  583  * If WRITECLOSE is set, only flush out regular file vnodes open for
  584  * writing.
  585  *
  586  * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped.
  587  */
  588 #ifdef DEBUG
  589 int busyprt = 0;        /* print out busy vnodes */
  590 struct ctldebug debug1 = { "busyprt", &busyprt };
  591 #endif
  592 
  593 static vnode_t *
  594 vflushnext(struct vnode_iterator *marker, int *when)
  595 {
  596         if (getticks() > *when) {
  597                 yield();
  598                 *when = getticks() + hz / 10;
  599         }
  600         preempt_point();
  601         return vfs_vnode_iterator_next1(marker, NULL, NULL, true);
  602 }
  603 
  604 /*
  605  * Flush one vnode.  Referenced on entry, unreferenced on return.
  606  */
  607 static int
  608 vflush_one(vnode_t *vp, vnode_t *skipvp, int flags)
  609 {
  610         int error;
  611         struct vattr vattr;
  612 
  613         if (vp == skipvp ||
  614             ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) {
  615                 vrele(vp);
  616                 return 0;
  617         }
  618         /*
  619          * If WRITECLOSE is set, only flush out regular file
  620          * vnodes open for writing or open and unlinked.
  621          */
  622         if ((flags & WRITECLOSE)) {
  623                 if (vp->v_type != VREG) {
  624                         vrele(vp);
  625                         return 0;
  626                 }
  627                 error = vn_lock(vp, LK_EXCLUSIVE);
  628                 if (error) {
  629                         KASSERT(error == ENOENT);
  630                         vrele(vp);
  631                         return 0;
  632                 }
  633                 error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0);
  634                 if (error == 0)
  635                         error = VOP_GETATTR(vp, &vattr, curlwp->l_cred);
  636                 VOP_UNLOCK(vp);
  637                 if (error) {
  638                         vrele(vp);
  639                         return error;
  640                 }
  641                 if (vp->v_writecount == 0 && vattr.va_nlink > 0) {
  642                         vrele(vp);
  643                         return 0;
  644                 }
  645         }
  646         /*
  647          * First try to recycle the vnode.
  648          */
  649         if (vrecycle(vp))
  650                 return 0;
  651         /*
  652          * If FORCECLOSE is set, forcibly close the vnode.
  653          * For block or character devices, revert to an
  654          * anonymous device.  For all other files, just
  655          * kill them.
  656          */
  657         if (flags & FORCECLOSE) {
  658                 if (vrefcnt(vp) > 1 &&
  659                     (vp->v_type == VBLK || vp->v_type == VCHR))
  660                         vcache_make_anon(vp);
  661                 else
  662                         vgone(vp);
  663                 return 0;
  664         }
  665         vrele(vp);
  666         return EBUSY;
  667 }
  668 
  669 int
  670 vflush(struct mount *mp, vnode_t *skipvp, int flags)
  671 {
  672         vnode_t *vp;
  673         struct vnode_iterator *marker;
  674         int busy, error, when, retries = 2;
  675 
  676         do {
  677                 busy = error = when = 0;
  678 
  679                 /*
  680                  * First, flush out any vnode references from the
  681                  * deferred vrele list.
  682                  */
  683                 vrele_flush(mp);
  684 
  685                 vfs_vnode_iterator_init(mp, &marker);
  686 
  687                 while ((vp = vflushnext(marker, &when)) != NULL) {
  688                         error = vflush_one(vp, skipvp, flags);
  689                         if (error == EBUSY) {
  690                                 error = 0;
  691                                 busy++;
  692 #ifdef DEBUG
  693                                 if (busyprt && retries == 0)
  694                                         vprint("vflush: busy vnode", vp);
  695 #endif
  696                         } else if (error != 0) {
  697                                 break;
  698                         }
  699                 }
  700 
  701                 vfs_vnode_iterator_destroy(marker);
  702         } while (error == 0 && busy > 0 && retries-- > 0);
  703 
  704         if (error)
  705                 return error;
  706         if (busy)
  707                 return EBUSY;
  708         return 0;
  709 }
  710 
  711 /*
  712  * Mount a file system.
  713  */
  714 
  715 /*
  716  * Scan all active processes to see if any of them have a current or root
  717  * directory onto which the new filesystem has just been  mounted. If so,
  718  * replace them with the new mount point.
  719  */
  720 static void
  721 mount_checkdirs(vnode_t *olddp)
  722 {
  723         vnode_t *newdp, *rele1, *rele2;
  724         struct cwdinfo *cwdi;
  725         struct proc *p;
  726         bool retry;
  727 
  728         if (vrefcnt(olddp) == 1) {
  729                 return;
  730         }
  731         if (VFS_ROOT(olddp->v_mountedhere, LK_EXCLUSIVE, &newdp))
  732                 panic("mount: lost mount");
  733 
  734         do {
  735                 retry = false;
  736                 mutex_enter(&proc_lock);
  737                 PROCLIST_FOREACH(p, &allproc) {
  738                         if ((cwdi = p->p_cwdi) == NULL)
  739                                 continue;
  740                         /*
  741                          * Cannot change to the old directory any more,
  742                          * so even if we see a stale value it is not a
  743                          * problem.
  744                          */
  745                         if (cwdi->cwdi_cdir != olddp &&
  746                             cwdi->cwdi_rdir != olddp)
  747                                 continue;
  748                         retry = true;
  749                         rele1 = NULL;
  750                         rele2 = NULL;
  751                         atomic_inc_uint(&cwdi->cwdi_refcnt);
  752                         mutex_exit(&proc_lock);
  753                         rw_enter(&cwdi->cwdi_lock, RW_WRITER);
  754                         if (cwdi->cwdi_cdir == olddp) {
  755                                 rele1 = cwdi->cwdi_cdir;
  756                                 vref(newdp);
  757                                 cwdi->cwdi_cdir = newdp;
  758                         }
  759                         if (cwdi->cwdi_rdir == olddp) {
  760                                 rele2 = cwdi->cwdi_rdir;
  761                                 vref(newdp);
  762                                 cwdi->cwdi_rdir = newdp;
  763                         }
  764                         rw_exit(&cwdi->cwdi_lock);
  765                         cwdfree(cwdi);
  766                         if (rele1 != NULL)
  767                                 vrele(rele1);
  768                         if (rele2 != NULL)
  769                                 vrele(rele2);
  770                         mutex_enter(&proc_lock);
  771                         break;
  772                 }
  773                 mutex_exit(&proc_lock);
  774         } while (retry);
  775 
  776         if (rootvnode == olddp) {
  777                 vrele(rootvnode);
  778                 vref(newdp);
  779                 rootvnode = newdp;
  780         }
  781         vput(newdp);
  782 }
  783 
  784 /*
  785  * Start extended attributes
  786  */
  787 static int
  788 start_extattr(struct mount *mp)
  789 {
  790         int error;
  791 
  792         error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL);
  793         if (error) 
  794                 printf("%s: failed to start extattr: error = %d\n",
  795                        mp->mnt_stat.f_mntonname, error);
  796 
  797         return error;
  798 }
  799 
  800 int
  801 mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops,
  802     const char *path, int flags, void *data, size_t *data_len)
  803 {
  804         vnode_t *vp = *vpp;
  805         struct mount *mp;
  806         struct pathbuf *pb;
  807         struct nameidata nd;
  808         int error, error2;
  809 
  810         error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
  811             KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
  812         if (error) {
  813                 vfs_delref(vfsops);
  814                 return error;
  815         }
  816 
  817         /* Cannot make a non-dir a mount-point (from here anyway). */
  818         if (vp->v_type != VDIR) {
  819                 vfs_delref(vfsops);
  820                 return ENOTDIR;
  821         }
  822 
  823         if (flags & MNT_EXPORTED) {
  824                 vfs_delref(vfsops);
  825                 return EINVAL;
  826         }
  827 
  828         if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) {
  829                 vfs_delref(vfsops);
  830                 return ENOMEM;
  831         }
  832 
  833         mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
  834 
  835         /*
  836          * The underlying file system may refuse the mount for
  837          * various reasons.  Allow the user to force it to happen.
  838          *
  839          * Set the mount level flags.
  840          */
  841         mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE);
  842 
  843         error = VFS_MOUNT(mp, path, data, data_len);
  844         mp->mnt_flag &= ~MNT_OP_FLAGS;
  845 
  846         if (error != 0) {
  847                 vfs_rele(mp);
  848                 return error;
  849         }
  850 
  851         /* Suspend new file system before taking mnt_updating. */
  852         do {
  853                 error2 = vfs_suspend(mp, 0);
  854         } while (error2 == EINTR || error2 == ERESTART);
  855         KASSERT(error2 == 0 || error2 == EOPNOTSUPP);
  856         mutex_enter(mp->mnt_updating);
  857 
  858         /*
  859          * Validate and prepare the mount point.
  860          */
  861         error = pathbuf_copyin(path, &pb);
  862         if (error != 0) {
  863                 goto err_mounted;
  864         }
  865         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
  866         error = namei(&nd);
  867         pathbuf_destroy(pb);
  868         if (error != 0) {
  869                 goto err_mounted;
  870         }
  871         if (nd.ni_vp != vp) {
  872                 vput(nd.ni_vp);
  873                 error = EINVAL;
  874                 goto err_mounted;
  875         }
  876         if (vp->v_mountedhere != NULL) {
  877                 vput(nd.ni_vp);
  878                 error = EBUSY;
  879                 goto err_mounted;
  880         }
  881         error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0);
  882         if (error != 0) {
  883                 vput(nd.ni_vp);
  884                 goto err_mounted;
  885         }
  886 
  887         /*
  888          * Put the new filesystem on the mount list after root.
  889          */
  890         cache_purge(vp);
  891         mp->mnt_iflag &= ~IMNT_WANTRDWR;
  892 
  893         mountlist_append(mp);
  894         if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
  895                 vfs_syncer_add_to_worklist(mp);
  896         vp->v_mountedhere = mp;
  897         vput(nd.ni_vp);
  898 
  899         mount_checkdirs(vp);
  900         mutex_exit(mp->mnt_updating);
  901         if (error2 == 0)
  902                 vfs_resume(mp);
  903 
  904         /* Hold an additional reference to the mount across VFS_START(). */
  905         vfs_ref(mp);
  906         (void) VFS_STATVFS(mp, &mp->mnt_stat);
  907         error = VFS_START(mp, 0);
  908         if (error) {
  909                 vrele(vp);
  910         } else if (flags & MNT_EXTATTR) {
  911                 if (start_extattr(mp) != 0)
  912                         mp->mnt_flag &= ~MNT_EXTATTR;
  913         }
  914         /* Drop reference held for VFS_START(). */
  915         vfs_rele(mp);
  916         *vpp = NULL;
  917         return error;
  918 
  919 err_mounted:
  920         if (VFS_UNMOUNT(mp, MNT_FORCE) != 0)
  921                 panic("Unmounting fresh file system failed");
  922         mutex_exit(mp->mnt_updating);
  923         if (error2 == 0)
  924                 vfs_resume(mp);
  925         vfs_set_lowermount(mp, NULL);
  926         vfs_rele(mp);
  927 
  928         return error;
  929 }
  930 
  931 /*
  932  * Do the actual file system unmount.  File system is assumed to have
  933  * been locked by the caller.
  934  *
  935  * => Caller hold reference to the mount, explicitly for dounmount().
  936  */
  937 int
  938 dounmount(struct mount *mp, int flags, struct lwp *l)
  939 {
  940         vnode_t *coveredvp;
  941         int error, async, used_syncer, used_extattr;
  942         const bool was_suspended = fstrans_is_owner(mp);
  943 
  944 #if NVERIEXEC > 0
  945         error = veriexec_unmountchk(mp);
  946         if (error)
  947                 return (error);
  948 #endif /* NVERIEXEC > 0 */
  949 
  950         if (!was_suspended) {
  951                 error = vfs_suspend(mp, 0);
  952                 if (error) {
  953                         return error;
  954                 }
  955         }
  956 
  957         KASSERT((mp->mnt_iflag & IMNT_GONE) == 0);
  958 
  959         used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0;
  960         used_extattr = mp->mnt_flag & MNT_EXTATTR;
  961 
  962         mp->mnt_iflag |= IMNT_UNMOUNT;
  963         mutex_enter(mp->mnt_updating);
  964         async = mp->mnt_flag & MNT_ASYNC;
  965         mp->mnt_flag &= ~MNT_ASYNC;
  966         cache_purgevfs(mp);     /* remove cache entries for this file sys */
  967         if (used_syncer)
  968                 vfs_syncer_remove_from_worklist(mp);
  969         error = 0;
  970         if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) {
  971                 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
  972         }
  973         if (error == 0 || (flags & MNT_FORCE)) {
  974                 error = VFS_UNMOUNT(mp, flags);
  975         }
  976         if (error) {
  977                 mp->mnt_iflag &= ~IMNT_UNMOUNT;
  978                 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
  979                         vfs_syncer_add_to_worklist(mp);
  980                 mp->mnt_flag |= async;
  981                 mutex_exit(mp->mnt_updating);
  982                 if (!was_suspended)
  983                         vfs_resume(mp);
  984                 if (used_extattr) {
  985                         if (start_extattr(mp) != 0)
  986                                 mp->mnt_flag &= ~MNT_EXTATTR;
  987                         else
  988                                 mp->mnt_flag |= MNT_EXTATTR;
  989                 }
  990                 return (error);
  991         }
  992         mutex_exit(mp->mnt_updating);
  993 
  994         /*
  995          * mark filesystem as gone to prevent further umounts
  996          * after mnt_umounting lock is gone, this also prevents
  997          * vfs_busy() from succeeding.
  998          */
  999         mp->mnt_iflag |= IMNT_GONE;
 1000         if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
 1001                 coveredvp->v_mountedhere = NULL;
 1002         }
 1003         if (!was_suspended)
 1004                 vfs_resume(mp);
 1005 
 1006         mountlist_remove(mp);
 1007         if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
 1008                 panic("unmount: dangling vnode");
 1009         vfs_hooks_unmount(mp);
 1010 
 1011         vfs_set_lowermount(mp, NULL);
 1012         vfs_rele(mp);   /* reference from mount() */
 1013         if (coveredvp != NULLVP) {
 1014                 vrele(coveredvp);
 1015         }
 1016         return (0);
 1017 }
 1018 
 1019 /*
 1020  * Unmount all file systems.
 1021  * We traverse the list in reverse order under the assumption that doing so
 1022  * will avoid needing to worry about dependencies.
 1023  */
 1024 bool
 1025 vfs_unmountall(struct lwp *l)
 1026 {
 1027 
 1028         printf("unmounting file systems...\n");
 1029         return vfs_unmountall1(l, true, true);
 1030 }
 1031 
 1032 static void
 1033 vfs_unmount_print(struct mount *mp, const char *pfx)
 1034 {
 1035 
 1036         aprint_verbose("%sunmounted %s on %s type %s\n", pfx,
 1037             mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname,
 1038             mp->mnt_stat.f_fstypename);
 1039 }
 1040 
 1041 /*
 1042  * Return the mount with the highest generation less than "gen".
 1043  */
 1044 static struct mount *
 1045 vfs_unmount_next(uint64_t gen)
 1046 {
 1047         mount_iterator_t *iter;
 1048         struct mount *mp, *nmp;
 1049 
 1050         nmp = NULL;
 1051 
 1052         mountlist_iterator_init(&iter);
 1053         while ((mp = mountlist_iterator_next(iter)) != NULL) {
 1054                 if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) && 
 1055                     mp->mnt_gen < gen) {
 1056                         if (nmp != NULL)
 1057                                 vfs_rele(nmp);
 1058                         nmp = mp;
 1059                         vfs_ref(nmp);
 1060                 }
 1061         }
 1062         mountlist_iterator_destroy(iter);
 1063 
 1064         return nmp;
 1065 }
 1066 
 1067 bool
 1068 vfs_unmount_forceone(struct lwp *l)
 1069 {
 1070         struct mount *mp;
 1071         int error;
 1072 
 1073         mp = vfs_unmount_next(mountgen);
 1074         if (mp == NULL) {
 1075                 return false;
 1076         }
 1077 
 1078 #ifdef DEBUG
 1079         printf("forcefully unmounting %s (%s)...\n",
 1080             mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
 1081 #endif
 1082         if ((error = dounmount(mp, MNT_FORCE, l)) == 0) {
 1083                 vfs_unmount_print(mp, "forcefully ");
 1084                 return true;
 1085         } else {
 1086                 vfs_rele(mp);
 1087         }
 1088 
 1089 #ifdef DEBUG
 1090         printf("forceful unmount of %s failed with error %d\n",
 1091             mp->mnt_stat.f_mntonname, error);
 1092 #endif
 1093 
 1094         return false;
 1095 }
 1096 
 1097 bool
 1098 vfs_unmountall1(struct lwp *l, bool force, bool verbose)
 1099 {
 1100         struct mount *mp;
 1101         mount_iterator_t *iter;
 1102         bool any_error = false, progress = false;
 1103         uint64_t gen;
 1104         int error;
 1105 
 1106         gen = mountgen;
 1107         for (;;) {
 1108                 mp = vfs_unmount_next(gen);
 1109                 if (mp == NULL)
 1110                         break;
 1111                 gen = mp->mnt_gen;
 1112 
 1113 #ifdef DEBUG
 1114                 printf("unmounting %p %s (%s)...\n",
 1115                     (void *)mp, mp->mnt_stat.f_mntonname,
 1116                     mp->mnt_stat.f_mntfromname);
 1117 #endif
 1118                 if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) {
 1119                         vfs_unmount_print(mp, "");
 1120                         progress = true;
 1121                 } else {
 1122                         vfs_rele(mp);
 1123                         if (verbose) {
 1124                                 printf("unmount of %s failed with error %d\n",
 1125                                     mp->mnt_stat.f_mntonname, error);
 1126                         }
 1127                         any_error = true;
 1128                 }
 1129         }
 1130         if (verbose) {
 1131                 printf("unmounting done\n");
 1132         }
 1133         if (any_error && verbose) {
 1134                 printf("WARNING: some file systems would not unmount\n");
 1135         }
 1136         /* If the mountlist is empty it is time to remove swap. */
 1137         mountlist_iterator_init(&iter);
 1138         if (mountlist_iterator_next(iter) == NULL) {
 1139                 uvm_swap_shutdown(l);
 1140         }
 1141         mountlist_iterator_destroy(iter);
 1142 
 1143         return progress;
 1144 }
 1145 
 1146 void
 1147 vfs_sync_all(struct lwp *l)
 1148 {
 1149         printf("syncing disks... ");
 1150 
 1151         /* remove user processes from run queue */
 1152         suspendsched();
 1153         (void)spl0();
 1154 
 1155         /* avoid coming back this way again if we panic. */
 1156         doing_shutdown = 1;
 1157 
 1158         do_sys_sync(l);
 1159 
 1160         /* Wait for sync to finish. */
 1161         if (vfs_syncwait() != 0) {
 1162 #if defined(DDB) && defined(DEBUG_HALT_BUSY)
 1163                 Debugger();
 1164 #endif
 1165                 printf("giving up\n");
 1166                 return;
 1167         } else
 1168                 printf("done\n");
 1169 }
 1170 
 1171 /*
 1172  * Sync and unmount file systems before shutting down.
 1173  */
 1174 void
 1175 vfs_shutdown(void)
 1176 {
 1177         lwp_t *l = curlwp;
 1178 
 1179         vfs_sync_all(l);
 1180 
 1181         /*
 1182          * If we have panicked - do not make the situation potentially
 1183          * worse by unmounting the file systems.
 1184          */
 1185         if (panicstr != NULL) {
 1186                 return;
 1187         }
 1188 
 1189         /* Unmount file systems. */
 1190         vfs_unmountall(l);
 1191 }
 1192 
 1193 /*
 1194  * Print a list of supported file system types (used by vfs_mountroot)
 1195  */
 1196 static void
 1197 vfs_print_fstypes(void)
 1198 {
 1199         struct vfsops *v;
 1200         int cnt = 0;
 1201 
 1202         mutex_enter(&vfs_list_lock);
 1203         LIST_FOREACH(v, &vfs_list, vfs_list)
 1204                 ++cnt;
 1205         mutex_exit(&vfs_list_lock);
 1206 
 1207         if (cnt == 0) {
 1208                 printf("WARNING: No file system modules have been loaded.\n");
 1209                 return;
 1210         }
 1211 
 1212         printf("Supported file systems:");
 1213         mutex_enter(&vfs_list_lock);
 1214         LIST_FOREACH(v, &vfs_list, vfs_list) {
 1215                 printf(" %s", v->vfs_name);
 1216         }
 1217         mutex_exit(&vfs_list_lock);
 1218         printf("\n");
 1219 }
 1220 
 1221 /*
 1222  * Mount the root file system.  If the operator didn't specify a
 1223  * file system to use, try all possible file systems until one
 1224  * succeeds.
 1225  */
 1226 int
 1227 vfs_mountroot(void)
 1228 {
 1229         struct vfsops *v;
 1230         int error = ENODEV;
 1231 
 1232         if (root_device == NULL)
 1233                 panic("vfs_mountroot: root device unknown");
 1234 
 1235         switch (device_class(root_device)) {
 1236         case DV_IFNET:
 1237                 if (rootdev != NODEV)
 1238                         panic("vfs_mountroot: rootdev set for DV_IFNET "
 1239                             "(0x%llx -> %llu,%llu)",
 1240                             (unsigned long long)rootdev,
 1241                             (unsigned long long)major(rootdev),
 1242                             (unsigned long long)minor(rootdev));
 1243                 break;
 1244 
 1245         case DV_DISK:
 1246                 if (rootdev == NODEV)
 1247                         panic("vfs_mountroot: rootdev not set for DV_DISK");
 1248                 if (bdevvp(rootdev, &rootvp))
 1249                         panic("vfs_mountroot: can't get vnode for rootdev");
 1250                 vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY);
 1251                 error = VOP_OPEN(rootvp, FREAD, FSCRED);
 1252                 VOP_UNLOCK(rootvp);
 1253                 if (error) {
 1254                         printf("vfs_mountroot: can't open root device\n");
 1255                         return (error);
 1256                 }
 1257                 break;
 1258 
 1259         case DV_VIRTUAL:
 1260                 break;
 1261 
 1262         default:
 1263                 printf("%s: inappropriate for root file system\n",
 1264                     device_xname(root_device));
 1265                 return (ENODEV);
 1266         }
 1267 
 1268         /*
 1269          * If user specified a root fs type, use it.  Make sure the
 1270          * specified type exists and has a mount_root()
 1271          */
 1272         if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) {
 1273                 v = vfs_getopsbyname(rootfstype);
 1274                 error = EFTYPE;
 1275                 if (v != NULL) {
 1276                         if (v->vfs_mountroot != NULL) {
 1277                                 error = (v->vfs_mountroot)();
 1278                         }
 1279                         v->vfs_refcount--;
 1280                 }
 1281                 goto done;
 1282         }
 1283 
 1284         /*
 1285          * Try each file system currently configured into the kernel.
 1286          */
 1287         mutex_enter(&vfs_list_lock);
 1288         LIST_FOREACH(v, &vfs_list, vfs_list) {
 1289                 if (v->vfs_mountroot == NULL)
 1290                         continue;
 1291 #ifdef DEBUG
 1292                 aprint_normal("mountroot: trying %s...\n", v->vfs_name);
 1293 #endif
 1294                 v->vfs_refcount++;
 1295                 mutex_exit(&vfs_list_lock);
 1296                 error = (*v->vfs_mountroot)();
 1297                 mutex_enter(&vfs_list_lock);
 1298                 v->vfs_refcount--;
 1299                 if (!error) {
 1300                         aprint_normal("root file system type: %s\n",
 1301                             v->vfs_name);
 1302                         break;
 1303                 }
 1304         }
 1305         mutex_exit(&vfs_list_lock);
 1306 
 1307         if (v == NULL) {
 1308                 vfs_print_fstypes();
 1309                 printf("no file system for %s", device_xname(root_device));
 1310                 if (device_class(root_device) == DV_DISK)
 1311                         printf(" (dev 0x%llx)", (unsigned long long)rootdev);
 1312                 printf("\n");
 1313                 error = EFTYPE;
 1314         }
 1315 
 1316 done:
 1317         if (error && device_class(root_device) == DV_DISK) {
 1318                 vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY);
 1319                 VOP_CLOSE(rootvp, FREAD, FSCRED);
 1320                 VOP_UNLOCK(rootvp);
 1321                 vrele(rootvp);
 1322         }
 1323         if (error == 0) {
 1324                 mount_iterator_t *iter;
 1325                 struct mount *mp;
 1326 
 1327                 mountlist_iterator_init(&iter);
 1328                 mp = mountlist_iterator_next(iter);
 1329                 KASSERT(mp != NULL);
 1330                 mountlist_iterator_destroy(iter);
 1331 
 1332                 mp->mnt_flag |= MNT_ROOTFS;
 1333                 mp->mnt_op->vfs_refcount++;
 1334 
 1335                 /*
 1336                  * Get the vnode for '/'.  Set cwdi0.cwdi_cdir to
 1337                  * reference it, and donate it the reference grabbed
 1338                  * with VFS_ROOT().
 1339                  */
 1340                 error = VFS_ROOT(mp, LK_NONE, &rootvnode);
 1341                 if (error)
 1342                         panic("cannot find root vnode, error=%d", error);
 1343                 cwdi0.cwdi_cdir = rootvnode;
 1344                 cwdi0.cwdi_rdir = NULL;
 1345 
 1346                 /*
 1347                  * Now that root is mounted, we can fixup initproc's CWD
 1348                  * info.  All other processes are kthreads, which merely
 1349                  * share proc0's CWD info.
 1350                  */
 1351                 initproc->p_cwdi->cwdi_cdir = rootvnode;
 1352                 vref(initproc->p_cwdi->cwdi_cdir);
 1353                 initproc->p_cwdi->cwdi_rdir = NULL;
 1354                 /*
 1355                  * Enable loading of modules from the filesystem
 1356                  */
 1357                 module_load_vfs_init();
 1358 
 1359         }
 1360         return (error);
 1361 }
 1362 
 1363 /*
 1364  * mount_specific_key_create --
 1365  *      Create a key for subsystem mount-specific data.
 1366  */
 1367 int
 1368 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
 1369 {
 1370 
 1371         return specificdata_key_create(mount_specificdata_domain, keyp, dtor);
 1372 }
 1373 
 1374 /*
 1375  * mount_specific_key_delete --
 1376  *      Delete a key for subsystem mount-specific data.
 1377  */
 1378 void
 1379 mount_specific_key_delete(specificdata_key_t key)
 1380 {
 1381 
 1382         specificdata_key_delete(mount_specificdata_domain, key);
 1383 }
 1384 
 1385 /*
 1386  * mount_initspecific --
 1387  *      Initialize a mount's specificdata container.
 1388  */
 1389 void
 1390 mount_initspecific(struct mount *mp)
 1391 {
 1392         int error __diagused;
 1393 
 1394         error = specificdata_init(mount_specificdata_domain,
 1395                                   &mp->mnt_specdataref);
 1396         KASSERT(error == 0);
 1397 }
 1398 
 1399 /*
 1400  * mount_finispecific --
 1401  *      Finalize a mount's specificdata container.
 1402  */
 1403 void
 1404 mount_finispecific(struct mount *mp)
 1405 {
 1406 
 1407         specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
 1408 }
 1409 
 1410 /*
 1411  * mount_getspecific --
 1412  *      Return mount-specific data corresponding to the specified key.
 1413  */
 1414 void *
 1415 mount_getspecific(struct mount *mp, specificdata_key_t key)
 1416 {
 1417 
 1418         return specificdata_getspecific(mount_specificdata_domain,
 1419                                          &mp->mnt_specdataref, key);
 1420 }
 1421 
 1422 /*
 1423  * mount_setspecific --
 1424  *      Set mount-specific data corresponding to the specified key.
 1425  */
 1426 void
 1427 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data)
 1428 {
 1429 
 1430         specificdata_setspecific(mount_specificdata_domain,
 1431                                  &mp->mnt_specdataref, key, data);
 1432 }
 1433 
 1434 /*
 1435  * Check to see if a filesystem is mounted on a block device.
 1436  */
 1437 int
 1438 vfs_mountedon(vnode_t *vp)
 1439 {
 1440         vnode_t *vq;
 1441         int error = 0;
 1442 
 1443         if (vp->v_type != VBLK)
 1444                 return ENOTBLK;
 1445         if (spec_node_getmountedfs(vp) != NULL)
 1446                 return EBUSY;
 1447         if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, VDEAD_NOWAIT, &vq)
 1448             == 0) {
 1449                 if (spec_node_getmountedfs(vq) != NULL)
 1450                         error = EBUSY;
 1451                 vrele(vq);
 1452         }
 1453 
 1454         return error;
 1455 }
 1456 
 1457 /*
 1458  * Check if a device pointed to by vp is mounted.
 1459  *
 1460  * Returns:
 1461  *   EINVAL     if it's not a disk
 1462  *   EBUSY      if it's a disk and mounted
 1463  *   0          if it's a disk and not mounted
 1464  */
 1465 int
 1466 rawdev_mounted(vnode_t *vp, vnode_t **bvpp)
 1467 {
 1468         vnode_t *bvp;
 1469         dev_t dev;
 1470         int d_type;
 1471 
 1472         bvp = NULL;
 1473         d_type = D_OTHER;
 1474 
 1475         if (iskmemvp(vp))
 1476                 return EINVAL;
 1477 
 1478         switch (vp->v_type) {
 1479         case VCHR: {
 1480                 const struct cdevsw *cdev;
 1481 
 1482                 dev = vp->v_rdev;
 1483                 cdev = cdevsw_lookup(dev);
 1484                 if (cdev != NULL) {
 1485                         dev_t blkdev;
 1486 
 1487                         blkdev = devsw_chr2blk(dev);
 1488                         if (blkdev != NODEV) {
 1489                                 if (vfinddev(blkdev, VBLK, &bvp) != 0) {
 1490                                         d_type = (cdev->d_flag & D_TYPEMASK);
 1491                                         /* XXX: what if bvp disappears? */
 1492                                         vrele(bvp);
 1493                                 }
 1494                         }
 1495                 }
 1496 
 1497                 break;
 1498                 }
 1499 
 1500         case VBLK: {
 1501                 const struct bdevsw *bdev;
 1502 
 1503                 dev = vp->v_rdev;
 1504                 bdev = bdevsw_lookup(dev);
 1505                 if (bdev != NULL)
 1506                         d_type = (bdev->d_flag & D_TYPEMASK);
 1507 
 1508                 bvp = vp;
 1509 
 1510                 break;
 1511                 }
 1512 
 1513         default:
 1514                 break;
 1515         }
 1516 
 1517         if (d_type != D_DISK)
 1518                 return EINVAL;
 1519 
 1520         if (bvpp != NULL)
 1521                 *bvpp = bvp;
 1522 
 1523         /*
 1524          * XXX: This is bogus. We should be failing the request
 1525          * XXX: not only if this specific slice is mounted, but
 1526          * XXX: if it's on a disk with any other mounted slice.
 1527          */
 1528         if (vfs_mountedon(bvp))
 1529                 return EBUSY;
 1530 
 1531         return 0;
 1532 }
 1533 
 1534 /*
 1535  * Make a 'unique' number from a mount type name.
 1536  */
 1537 long
 1538 makefstype(const char *type)
 1539 {
 1540         long rv;
 1541 
 1542         for (rv = 0; *type; type++) {
 1543                 rv <<= 2;
 1544                 rv ^= *type;
 1545         }
 1546         return rv;
 1547 }
 1548 
 1549 static struct mountlist_entry *
 1550 mountlist_alloc(enum mountlist_type type, struct mount *mp)
 1551 {
 1552         struct mountlist_entry *me;
 1553 
 1554         me = kmem_zalloc(sizeof(*me), KM_SLEEP);
 1555         me->me_mount = mp;
 1556         me->me_type = type;
 1557 
 1558         return me;
 1559 }
 1560 
 1561 static void
 1562 mountlist_free(struct mountlist_entry *me)
 1563 {
 1564 
 1565         kmem_free(me, sizeof(*me));
 1566 }
 1567 
 1568 void
 1569 mountlist_iterator_init(mount_iterator_t **mip)
 1570 {
 1571         struct mountlist_entry *me;
 1572 
 1573         me = mountlist_alloc(ME_MARKER, NULL);
 1574         mutex_enter(&mountlist_lock);
 1575         TAILQ_INSERT_HEAD(&mountlist, me, me_list);
 1576         mutex_exit(&mountlist_lock);
 1577         *mip = (mount_iterator_t *)me;
 1578 }
 1579 
 1580 void
 1581 mountlist_iterator_destroy(mount_iterator_t *mi)
 1582 {
 1583         struct mountlist_entry *marker = &mi->mi_entry;
 1584 
 1585         if (marker->me_mount != NULL)
 1586                 vfs_unbusy(marker->me_mount);
 1587 
 1588         mutex_enter(&mountlist_lock);
 1589         TAILQ_REMOVE(&mountlist, marker, me_list);
 1590         mutex_exit(&mountlist_lock);
 1591 
 1592         mountlist_free(marker);
 1593 
 1594 }
 1595 
 1596 /*
 1597  * Return the next mount or NULL for this iterator.
 1598  * Mark it busy on success.
 1599  */
 1600 static inline struct mount *
 1601 _mountlist_iterator_next(mount_iterator_t *mi, bool wait)
 1602 {
 1603         struct mountlist_entry *me, *marker = &mi->mi_entry;
 1604         struct mount *mp;
 1605         int error;
 1606 
 1607         if (marker->me_mount != NULL) {
 1608                 vfs_unbusy(marker->me_mount);
 1609                 marker->me_mount = NULL;
 1610         }
 1611 
 1612         mutex_enter(&mountlist_lock);
 1613         for (;;) {
 1614                 KASSERT(marker->me_type == ME_MARKER);
 1615 
 1616                 me = TAILQ_NEXT(marker, me_list);
 1617                 if (me == NULL) {
 1618                         /* End of list: keep marker and return. */
 1619                         mutex_exit(&mountlist_lock);
 1620                         return NULL;
 1621                 }
 1622                 TAILQ_REMOVE(&mountlist, marker, me_list);
 1623                 TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list);
 1624 
 1625                 /* Skip other markers. */
 1626                 if (me->me_type != ME_MOUNT)
 1627                         continue;
 1628 
 1629                 /* Take an initial reference for vfs_busy() below. */
 1630                 mp = me->me_mount;
 1631                 KASSERT(mp != NULL);
 1632                 vfs_ref(mp);
 1633                 mutex_exit(&mountlist_lock);
 1634 
 1635                 /* Try to mark this mount busy and return on success. */
 1636                 if (wait)
 1637                         error = vfs_busy(mp);
 1638                 else
 1639                         error = vfs_trybusy(mp);
 1640                 if (error == 0) {
 1641                         vfs_rele(mp);
 1642                         marker->me_mount = mp;
 1643                         return mp;
 1644                 }
 1645                 vfs_rele(mp);
 1646                 mutex_enter(&mountlist_lock);
 1647         }
 1648 }
 1649 
 1650 struct mount *
 1651 mountlist_iterator_next(mount_iterator_t *mi)
 1652 {
 1653 
 1654         return _mountlist_iterator_next(mi, true);
 1655 }
 1656 
 1657 struct mount *
 1658 mountlist_iterator_trynext(mount_iterator_t *mi)
 1659 {
 1660 
 1661         return _mountlist_iterator_next(mi, false);
 1662 }
 1663 
 1664 /*
 1665  * Attach new mount to the end of the mount list.
 1666  */
 1667 void
 1668 mountlist_append(struct mount *mp)
 1669 {
 1670         struct mountlist_entry *me;
 1671 
 1672         me = mountlist_alloc(ME_MOUNT, mp);
 1673         mutex_enter(&mountlist_lock);
 1674         TAILQ_INSERT_TAIL(&mountlist, me, me_list);
 1675         mutex_exit(&mountlist_lock);
 1676 }
 1677 
 1678 /*
 1679  * Remove mount from mount list.
 1680  */void
 1681 mountlist_remove(struct mount *mp)
 1682 {
 1683         struct mountlist_entry *me;
 1684 
 1685         mutex_enter(&mountlist_lock);
 1686         TAILQ_FOREACH(me, &mountlist, me_list)
 1687                 if (me->me_type == ME_MOUNT && me->me_mount == mp)
 1688                         break;
 1689         KASSERT(me != NULL);
 1690         TAILQ_REMOVE(&mountlist, me, me_list);
 1691         mutex_exit(&mountlist_lock);
 1692         mountlist_free(me);
 1693 }
 1694 
 1695 /*
 1696  * Unlocked variant to traverse the mountlist.
 1697  * To be used from DDB only.
 1698  */
 1699 struct mount *
 1700 _mountlist_next(struct mount *mp)
 1701 {
 1702         struct mountlist_entry *me;
 1703 
 1704         if (mp == NULL) {
 1705                 me = TAILQ_FIRST(&mountlist);
 1706         } else {
 1707                 TAILQ_FOREACH(me, &mountlist, me_list)
 1708                         if (me->me_type == ME_MOUNT && me->me_mount == mp)
 1709                                 break;
 1710                 if (me != NULL)
 1711                         me = TAILQ_NEXT(me, me_list);
 1712         }
 1713 
 1714         while (me != NULL && me->me_type != ME_MOUNT)
 1715                 me = TAILQ_NEXT(me, me_list);
 1716 
 1717         return (me ? me->me_mount : NULL);
 1718 }
Cache object: a633a9fcb224100496c3139e17f587b2
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_mount.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_mount.c