The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_mount.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1999-2004 Poul-Henning Kamp
    5  * Copyright (c) 1999 Michael Smith
    6  * Copyright (c) 1989, 1993
    7  *      The Regents of the University of California.  All rights reserved.
    8  * (c) UNIX System Laboratories, Inc.
    9  * All or some portions of this file are derived from material licensed
   10  * to the University of California by American Telephone and Telegraph
   11  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   12  * the permission of UNIX System Laboratories, Inc.
   13  *
   14  * Redistribution and use in source and binary forms, with or without
   15  * modification, are permitted provided that the following conditions
   16  * are met:
   17  * 1. Redistributions of source code must retain the above copyright
   18  *    notice, this list of conditions and the following disclaimer.
   19  * 2. Redistributions in binary form must reproduce the above copyright
   20  *    notice, this list of conditions and the following disclaimer in the
   21  *    documentation and/or other materials provided with the distribution.
   22  * 3. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __FBSDID("$FreeBSD$");
   41 
   42 #include <sys/param.h>
   43 #include <sys/conf.h>
   44 #include <sys/smp.h>
   45 #include <sys/devctl.h>
   46 #include <sys/eventhandler.h>
   47 #include <sys/fcntl.h>
   48 #include <sys/jail.h>
   49 #include <sys/kernel.h>
   50 #include <sys/ktr.h>
   51 #include <sys/libkern.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mount.h>
   54 #include <sys/mutex.h>
   55 #include <sys/namei.h>
   56 #include <sys/priv.h>
   57 #include <sys/proc.h>
   58 #include <sys/filedesc.h>
   59 #include <sys/reboot.h>
   60 #include <sys/sbuf.h>
   61 #include <sys/syscallsubr.h>
   62 #include <sys/sysproto.h>
   63 #include <sys/sx.h>
   64 #include <sys/sysctl.h>
   65 #include <sys/sysent.h>
   66 #include <sys/systm.h>
   67 #include <sys/vnode.h>
   68 #include <vm/uma.h>
   69 
   70 #include <geom/geom.h>
   71 
   72 #include <machine/stdarg.h>
   73 
   74 #include <security/audit/audit.h>
   75 #include <security/mac/mac_framework.h>
   76 
   77 #define VFS_MOUNTARG_SIZE_MAX   (1024 * 64)
   78 
   79 static int      vfs_domount(struct thread *td, const char *fstype, char *fspath,
   80                     uint64_t fsflags, struct vfsoptlist **optlist);
   81 static void     free_mntarg(struct mntarg *ma);
   82 
   83 static int      usermount = 0;
   84 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
   85     "Unprivileged users may mount and unmount file systems");
   86 
   87 static bool     default_autoro = false;
   88 SYSCTL_BOOL(_vfs, OID_AUTO, default_autoro, CTLFLAG_RW, &default_autoro, 0,
   89     "Retry failed r/w mount as r/o if no explicit ro/rw option is specified");
   90 
   91 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
   92 MALLOC_DEFINE(M_STATFS, "statfs", "statfs structure");
   93 static uma_zone_t mount_zone;
   94 
   95 /* List of mounted filesystems. */
   96 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
   97 
   98 /* For any iteration/modification of mountlist */
   99 struct mtx_padalign __exclusive_cache_line mountlist_mtx;
  100 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF);
  101 
  102 EVENTHANDLER_LIST_DEFINE(vfs_mounted);
  103 EVENTHANDLER_LIST_DEFINE(vfs_unmounted);
  104 
  105 static void mount_devctl_event(const char *type, struct mount *mp, bool donew);
  106 
  107 /*
  108  * Global opts, taken by all filesystems
  109  */
  110 static const char *global_opts[] = {
  111         "errmsg",
  112         "fstype",
  113         "fspath",
  114         "ro",
  115         "rw",
  116         "nosuid",
  117         "noexec",
  118         NULL
  119 };
  120 
  121 static int
  122 mount_init(void *mem, int size, int flags)
  123 {
  124         struct mount *mp;
  125 
  126         mp = (struct mount *)mem;
  127         mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
  128         mtx_init(&mp->mnt_listmtx, "struct mount vlist mtx", NULL, MTX_DEF);
  129         lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
  130         mp->mnt_pcpu = uma_zalloc_pcpu(pcpu_zone_16, M_WAITOK | M_ZERO);
  131         mp->mnt_ref = 0;
  132         mp->mnt_vfs_ops = 1;
  133         mp->mnt_rootvnode = NULL;
  134         return (0);
  135 }
  136 
  137 static void
  138 mount_fini(void *mem, int size)
  139 {
  140         struct mount *mp;
  141 
  142         mp = (struct mount *)mem;
  143         uma_zfree_pcpu(pcpu_zone_16, mp->mnt_pcpu);
  144         lockdestroy(&mp->mnt_explock);
  145         mtx_destroy(&mp->mnt_listmtx);
  146         mtx_destroy(&mp->mnt_mtx);
  147 }
  148 
  149 static void
  150 vfs_mount_init(void *dummy __unused)
  151 {
  152 
  153         mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount), NULL,
  154             NULL, mount_init, mount_fini, UMA_ALIGN_CACHE, UMA_ZONE_NOFREE);
  155 }
  156 SYSINIT(vfs_mount, SI_SUB_VFS, SI_ORDER_ANY, vfs_mount_init, NULL);
  157 
  158 /*
  159  * ---------------------------------------------------------------------
  160  * Functions for building and sanitizing the mount options
  161  */
  162 
  163 /* Remove one mount option. */
  164 static void
  165 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
  166 {
  167 
  168         TAILQ_REMOVE(opts, opt, link);
  169         free(opt->name, M_MOUNT);
  170         if (opt->value != NULL)
  171                 free(opt->value, M_MOUNT);
  172         free(opt, M_MOUNT);
  173 }
  174 
  175 /* Release all resources related to the mount options. */
  176 void
  177 vfs_freeopts(struct vfsoptlist *opts)
  178 {
  179         struct vfsopt *opt;
  180 
  181         while (!TAILQ_EMPTY(opts)) {
  182                 opt = TAILQ_FIRST(opts);
  183                 vfs_freeopt(opts, opt);
  184         }
  185         free(opts, M_MOUNT);
  186 }
  187 
  188 void
  189 vfs_deleteopt(struct vfsoptlist *opts, const char *name)
  190 {
  191         struct vfsopt *opt, *temp;
  192 
  193         if (opts == NULL)
  194                 return;
  195         TAILQ_FOREACH_SAFE(opt, opts, link, temp)  {
  196                 if (strcmp(opt->name, name) == 0)
  197                         vfs_freeopt(opts, opt);
  198         }
  199 }
  200 
  201 static int
  202 vfs_isopt_ro(const char *opt)
  203 {
  204 
  205         if (strcmp(opt, "ro") == 0 || strcmp(opt, "rdonly") == 0 ||
  206             strcmp(opt, "norw") == 0)
  207                 return (1);
  208         return (0);
  209 }
  210 
  211 static int
  212 vfs_isopt_rw(const char *opt)
  213 {
  214 
  215         if (strcmp(opt, "rw") == 0 || strcmp(opt, "noro") == 0)
  216                 return (1);
  217         return (0);
  218 }
  219 
  220 /*
  221  * Check if options are equal (with or without the "no" prefix).
  222  */
  223 static int
  224 vfs_equalopts(const char *opt1, const char *opt2)
  225 {
  226         char *p;
  227 
  228         /* "opt" vs. "opt" or "noopt" vs. "noopt" */
  229         if (strcmp(opt1, opt2) == 0)
  230                 return (1);
  231         /* "noopt" vs. "opt" */
  232         if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
  233                 return (1);
  234         /* "opt" vs. "noopt" */
  235         if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
  236                 return (1);
  237         while ((p = strchr(opt1, '.')) != NULL &&
  238             !strncmp(opt1, opt2, ++p - opt1)) {
  239                 opt2 += p - opt1;
  240                 opt1 = p;
  241                 /* "foo.noopt" vs. "foo.opt" */
  242                 if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
  243                         return (1);
  244                 /* "foo.opt" vs. "foo.noopt" */
  245                 if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
  246                         return (1);
  247         }
  248         /* "ro" / "rdonly" / "norw" / "rw" / "noro" */
  249         if ((vfs_isopt_ro(opt1) || vfs_isopt_rw(opt1)) &&
  250             (vfs_isopt_ro(opt2) || vfs_isopt_rw(opt2)))
  251                 return (1);
  252         return (0);
  253 }
  254 
  255 /*
  256  * If a mount option is specified several times,
  257  * (with or without the "no" prefix) only keep
  258  * the last occurrence of it.
  259  */
  260 static void
  261 vfs_sanitizeopts(struct vfsoptlist *opts)
  262 {
  263         struct vfsopt *opt, *opt2, *tmp;
  264 
  265         TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
  266                 opt2 = TAILQ_PREV(opt, vfsoptlist, link);
  267                 while (opt2 != NULL) {
  268                         if (vfs_equalopts(opt->name, opt2->name)) {
  269                                 tmp = TAILQ_PREV(opt2, vfsoptlist, link);
  270                                 vfs_freeopt(opts, opt2);
  271                                 opt2 = tmp;
  272                         } else {
  273                                 opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
  274                         }
  275                 }
  276         }
  277 }
  278 
  279 /*
  280  * Build a linked list of mount options from a struct uio.
  281  */
  282 int
  283 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
  284 {
  285         struct vfsoptlist *opts;
  286         struct vfsopt *opt;
  287         size_t memused, namelen, optlen;
  288         unsigned int i, iovcnt;
  289         int error;
  290 
  291         opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
  292         TAILQ_INIT(opts);
  293         memused = 0;
  294         iovcnt = auio->uio_iovcnt;
  295         for (i = 0; i < iovcnt; i += 2) {
  296                 namelen = auio->uio_iov[i].iov_len;
  297                 optlen = auio->uio_iov[i + 1].iov_len;
  298                 memused += sizeof(struct vfsopt) + optlen + namelen;
  299                 /*
  300                  * Avoid consuming too much memory, and attempts to overflow
  301                  * memused.
  302                  */
  303                 if (memused > VFS_MOUNTARG_SIZE_MAX ||
  304                     optlen > VFS_MOUNTARG_SIZE_MAX ||
  305                     namelen > VFS_MOUNTARG_SIZE_MAX) {
  306                         error = EINVAL;
  307                         goto bad;
  308                 }
  309 
  310                 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  311                 opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
  312                 opt->value = NULL;
  313                 opt->len = 0;
  314                 opt->pos = i / 2;
  315                 opt->seen = 0;
  316 
  317                 /*
  318                  * Do this early, so jumps to "bad" will free the current
  319                  * option.
  320                  */
  321                 TAILQ_INSERT_TAIL(opts, opt, link);
  322 
  323                 if (auio->uio_segflg == UIO_SYSSPACE) {
  324                         bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
  325                 } else {
  326                         error = copyin(auio->uio_iov[i].iov_base, opt->name,
  327                             namelen);
  328                         if (error)
  329                                 goto bad;
  330                 }
  331                 /* Ensure names are null-terminated strings. */
  332                 if (namelen == 0 || opt->name[namelen - 1] != '\0') {
  333                         error = EINVAL;
  334                         goto bad;
  335                 }
  336                 if (optlen != 0) {
  337                         opt->len = optlen;
  338                         opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
  339                         if (auio->uio_segflg == UIO_SYSSPACE) {
  340                                 bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
  341                                     optlen);
  342                         } else {
  343                                 error = copyin(auio->uio_iov[i + 1].iov_base,
  344                                     opt->value, optlen);
  345                                 if (error)
  346                                         goto bad;
  347                         }
  348                 }
  349         }
  350         vfs_sanitizeopts(opts);
  351         *options = opts;
  352         return (0);
  353 bad:
  354         vfs_freeopts(opts);
  355         return (error);
  356 }
  357 
  358 /*
  359  * Merge the old mount options with the new ones passed
  360  * in the MNT_UPDATE case.
  361  *
  362  * XXX: This function will keep a "nofoo" option in the new
  363  * options.  E.g, if the option's canonical name is "foo",
  364  * "nofoo" ends up in the mount point's active options.
  365  */
  366 static void
  367 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *oldopts)
  368 {
  369         struct vfsopt *opt, *new;
  370 
  371         TAILQ_FOREACH(opt, oldopts, link) {
  372                 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  373                 new->name = strdup(opt->name, M_MOUNT);
  374                 if (opt->len != 0) {
  375                         new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
  376                         bcopy(opt->value, new->value, opt->len);
  377                 } else
  378                         new->value = NULL;
  379                 new->len = opt->len;
  380                 new->seen = opt->seen;
  381                 TAILQ_INSERT_HEAD(toopts, new, link);
  382         }
  383         vfs_sanitizeopts(toopts);
  384 }
  385 
  386 /*
  387  * Mount a filesystem.
  388  */
  389 #ifndef _SYS_SYSPROTO_H_
  390 struct nmount_args {
  391         struct iovec *iovp;
  392         unsigned int iovcnt;
  393         int flags;
  394 };
  395 #endif
  396 int
  397 sys_nmount(struct thread *td, struct nmount_args *uap)
  398 {
  399         struct uio *auio;
  400         int error;
  401         u_int iovcnt;
  402         uint64_t flags;
  403 
  404         /*
  405          * Mount flags are now 64-bits. On 32-bit archtectures only
  406          * 32-bits are passed in, but from here on everything handles
  407          * 64-bit flags correctly.
  408          */
  409         flags = uap->flags;
  410 
  411         AUDIT_ARG_FFLAGS(flags);
  412         CTR4(KTR_VFS, "%s: iovp %p with iovcnt %d and flags %d", __func__,
  413             uap->iovp, uap->iovcnt, flags);
  414 
  415         /*
  416          * Filter out MNT_ROOTFS.  We do not want clients of nmount() in
  417          * userspace to set this flag, but we must filter it out if we want
  418          * MNT_UPDATE on the root file system to work.
  419          * MNT_ROOTFS should only be set by the kernel when mounting its
  420          * root file system.
  421          */
  422         flags &= ~MNT_ROOTFS;
  423 
  424         iovcnt = uap->iovcnt;
  425         /*
  426          * Check that we have an even number of iovec's
  427          * and that we have at least two options.
  428          */
  429         if ((iovcnt & 1) || (iovcnt < 4)) {
  430                 CTR2(KTR_VFS, "%s: failed for invalid iovcnt %d", __func__,
  431                     uap->iovcnt);
  432                 return (EINVAL);
  433         }
  434 
  435         error = copyinuio(uap->iovp, iovcnt, &auio);
  436         if (error) {
  437                 CTR2(KTR_VFS, "%s: failed for invalid uio op with %d errno",
  438                     __func__, error);
  439                 return (error);
  440         }
  441         error = vfs_donmount(td, flags, auio);
  442 
  443         free(auio, M_IOV);
  444         return (error);
  445 }
  446 
  447 /*
  448  * ---------------------------------------------------------------------
  449  * Various utility functions
  450  */
  451 
  452 void
  453 vfs_ref(struct mount *mp)
  454 {
  455         struct mount_pcpu *mpcpu;
  456 
  457         CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
  458         if (vfs_op_thread_enter(mp, mpcpu)) {
  459                 vfs_mp_count_add_pcpu(mpcpu, ref, 1);
  460                 vfs_op_thread_exit(mp, mpcpu);
  461                 return;
  462         }
  463 
  464         MNT_ILOCK(mp);
  465         MNT_REF(mp);
  466         MNT_IUNLOCK(mp);
  467 }
  468 
  469 void
  470 vfs_rel(struct mount *mp)
  471 {
  472         struct mount_pcpu *mpcpu;
  473 
  474         CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
  475         if (vfs_op_thread_enter(mp, mpcpu)) {
  476                 vfs_mp_count_sub_pcpu(mpcpu, ref, 1);
  477                 vfs_op_thread_exit(mp, mpcpu);
  478                 return;
  479         }
  480 
  481         MNT_ILOCK(mp);
  482         MNT_REL(mp);
  483         MNT_IUNLOCK(mp);
  484 }
  485 
  486 /*
  487  * Allocate and initialize the mount point struct.
  488  */
  489 struct mount *
  490 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath,
  491     struct ucred *cred)
  492 {
  493         struct mount *mp;
  494 
  495         mp = uma_zalloc(mount_zone, M_WAITOK);
  496         bzero(&mp->mnt_startzero,
  497             __rangeof(struct mount, mnt_startzero, mnt_endzero));
  498         mp->mnt_kern_flag = 0;
  499         mp->mnt_flag = 0;
  500         mp->mnt_rootvnode = NULL;
  501         mp->mnt_vnodecovered = NULL;
  502         mp->mnt_op = NULL;
  503         mp->mnt_vfc = NULL;
  504         TAILQ_INIT(&mp->mnt_nvnodelist);
  505         mp->mnt_nvnodelistsize = 0;
  506         TAILQ_INIT(&mp->mnt_lazyvnodelist);
  507         mp->mnt_lazyvnodelistsize = 0;
  508         if (mp->mnt_ref != 0 || mp->mnt_lockref != 0 ||
  509             mp->mnt_writeopcount != 0)
  510                 panic("%s: non-zero counters on new mp %p\n", __func__, mp);
  511         if (mp->mnt_vfs_ops != 1)
  512                 panic("%s: vfs_ops should be 1 but %d found\n", __func__,
  513                     mp->mnt_vfs_ops);
  514         (void) vfs_busy(mp, MBF_NOWAIT);
  515         atomic_add_acq_int(&vfsp->vfc_refcount, 1);
  516         mp->mnt_op = vfsp->vfc_vfsops;
  517         mp->mnt_vfc = vfsp;
  518         mp->mnt_stat.f_type = vfsp->vfc_typenum;
  519         mp->mnt_gen++;
  520         strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
  521         mp->mnt_vnodecovered = vp;
  522         mp->mnt_cred = crdup(cred);
  523         mp->mnt_stat.f_owner = cred->cr_uid;
  524         strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
  525         mp->mnt_iosize_max = DFLTPHYS;
  526 #ifdef MAC
  527         mac_mount_init(mp);
  528         mac_mount_create(cred, mp);
  529 #endif
  530         arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
  531         TAILQ_INIT(&mp->mnt_uppers);
  532         return (mp);
  533 }
  534 
  535 /*
  536  * Destroy the mount struct previously allocated by vfs_mount_alloc().
  537  */
  538 void
  539 vfs_mount_destroy(struct mount *mp)
  540 {
  541 
  542         if (mp->mnt_vfs_ops == 0)
  543                 panic("%s: entered with zero vfs_ops\n", __func__);
  544 
  545         vfs_assert_mount_counters(mp);
  546 
  547         MNT_ILOCK(mp);
  548         mp->mnt_kern_flag |= MNTK_REFEXPIRE;
  549         if (mp->mnt_kern_flag & MNTK_MWAIT) {
  550                 mp->mnt_kern_flag &= ~MNTK_MWAIT;
  551                 wakeup(mp);
  552         }
  553         while (mp->mnt_ref)
  554                 msleep(mp, MNT_MTX(mp), PVFS, "mntref", 0);
  555         KASSERT(mp->mnt_ref == 0,
  556             ("%s: invalid refcount in the drain path @ %s:%d", __func__,
  557             __FILE__, __LINE__));
  558         if (mp->mnt_writeopcount != 0)
  559                 panic("vfs_mount_destroy: nonzero writeopcount");
  560         if (mp->mnt_secondary_writes != 0)
  561                 panic("vfs_mount_destroy: nonzero secondary_writes");
  562         atomic_subtract_rel_int(&mp->mnt_vfc->vfc_refcount, 1);
  563         if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
  564                 struct vnode *vp;
  565 
  566                 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes)
  567                         vn_printf(vp, "dangling vnode ");
  568                 panic("unmount: dangling vnode");
  569         }
  570         KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers"));
  571         if (mp->mnt_nvnodelistsize != 0)
  572                 panic("vfs_mount_destroy: nonzero nvnodelistsize");
  573         if (mp->mnt_lazyvnodelistsize != 0)
  574                 panic("vfs_mount_destroy: nonzero lazyvnodelistsize");
  575         if (mp->mnt_lockref != 0)
  576                 panic("vfs_mount_destroy: nonzero lock refcount");
  577         MNT_IUNLOCK(mp);
  578 
  579         if (mp->mnt_vfs_ops != 1)
  580                 panic("%s: vfs_ops should be 1 but %d found\n", __func__,
  581                     mp->mnt_vfs_ops);
  582 
  583         if (mp->mnt_rootvnode != NULL)
  584                 panic("%s: mount point still has a root vnode %p\n", __func__,
  585                     mp->mnt_rootvnode);
  586 
  587         if (mp->mnt_vnodecovered != NULL)
  588                 vrele(mp->mnt_vnodecovered);
  589 #ifdef MAC
  590         mac_mount_destroy(mp);
  591 #endif
  592         if (mp->mnt_opt != NULL)
  593                 vfs_freeopts(mp->mnt_opt);
  594         crfree(mp->mnt_cred);
  595         uma_zfree(mount_zone, mp);
  596 }
  597 
  598 static bool
  599 vfs_should_downgrade_to_ro_mount(uint64_t fsflags, int error)
  600 {
  601         /* This is an upgrade of an exisiting mount. */
  602         if ((fsflags & MNT_UPDATE) != 0)
  603                 return (false);
  604         /* This is already an R/O mount. */
  605         if ((fsflags & MNT_RDONLY) != 0)
  606                 return (false);
  607 
  608         switch (error) {
  609         case ENODEV:    /* generic, geom, ... */
  610         case EACCES:    /* cam/scsi, ... */
  611         case EROFS:     /* md, mmcsd, ... */
  612                 /*
  613                  * These errors can be returned by the storage layer to signal
  614                  * that the media is read-only.  No harm in the R/O mount
  615                  * attempt if the error was returned for some other reason.
  616                  */
  617                 return (true);
  618         default:
  619                 return (false);
  620         }
  621 }
  622 
  623 int
  624 vfs_donmount(struct thread *td, uint64_t fsflags, struct uio *fsoptions)
  625 {
  626         struct vfsoptlist *optlist;
  627         struct vfsopt *opt, *tmp_opt;
  628         char *fstype, *fspath, *errmsg;
  629         int error, fstypelen, fspathlen, errmsg_len, errmsg_pos;
  630         bool autoro;
  631 
  632         errmsg = fspath = NULL;
  633         errmsg_len = fspathlen = 0;
  634         errmsg_pos = -1;
  635         autoro = default_autoro;
  636 
  637         error = vfs_buildopts(fsoptions, &optlist);
  638         if (error)
  639                 return (error);
  640 
  641         if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0)
  642                 errmsg_pos = vfs_getopt_pos(optlist, "errmsg");
  643 
  644         /*
  645          * We need these two options before the others,
  646          * and they are mandatory for any filesystem.
  647          * Ensure they are NUL terminated as well.
  648          */
  649         fstypelen = 0;
  650         error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
  651         if (error || fstypelen <= 0 || fstype[fstypelen - 1] != '\0') {
  652                 error = EINVAL;
  653                 if (errmsg != NULL)
  654                         strncpy(errmsg, "Invalid fstype", errmsg_len);
  655                 goto bail;
  656         }
  657         fspathlen = 0;
  658         error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
  659         if (error || fspathlen <= 0 || fspath[fspathlen - 1] != '\0') {
  660                 error = EINVAL;
  661                 if (errmsg != NULL)
  662                         strncpy(errmsg, "Invalid fspath", errmsg_len);
  663                 goto bail;
  664         }
  665 
  666         /*
  667          * We need to see if we have the "update" option
  668          * before we call vfs_domount(), since vfs_domount() has special
  669          * logic based on MNT_UPDATE.  This is very important
  670          * when we want to update the root filesystem.
  671          */
  672         TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) {
  673                 int do_freeopt = 0;
  674 
  675                 if (strcmp(opt->name, "update") == 0) {
  676                         fsflags |= MNT_UPDATE;
  677                         do_freeopt = 1;
  678                 }
  679                 else if (strcmp(opt->name, "async") == 0)
  680                         fsflags |= MNT_ASYNC;
  681                 else if (strcmp(opt->name, "force") == 0) {
  682                         fsflags |= MNT_FORCE;
  683                         do_freeopt = 1;
  684                 }
  685                 else if (strcmp(opt->name, "reload") == 0) {
  686                         fsflags |= MNT_RELOAD;
  687                         do_freeopt = 1;
  688                 }
  689                 else if (strcmp(opt->name, "multilabel") == 0)
  690                         fsflags |= MNT_MULTILABEL;
  691                 else if (strcmp(opt->name, "noasync") == 0)
  692                         fsflags &= ~MNT_ASYNC;
  693                 else if (strcmp(opt->name, "noatime") == 0)
  694                         fsflags |= MNT_NOATIME;
  695                 else if (strcmp(opt->name, "atime") == 0) {
  696                         free(opt->name, M_MOUNT);
  697                         opt->name = strdup("nonoatime", M_MOUNT);
  698                 }
  699                 else if (strcmp(opt->name, "noclusterr") == 0)
  700                         fsflags |= MNT_NOCLUSTERR;
  701                 else if (strcmp(opt->name, "clusterr") == 0) {
  702                         free(opt->name, M_MOUNT);
  703                         opt->name = strdup("nonoclusterr", M_MOUNT);
  704                 }
  705                 else if (strcmp(opt->name, "noclusterw") == 0)
  706                         fsflags |= MNT_NOCLUSTERW;
  707                 else if (strcmp(opt->name, "clusterw") == 0) {
  708                         free(opt->name, M_MOUNT);
  709                         opt->name = strdup("nonoclusterw", M_MOUNT);
  710                 }
  711                 else if (strcmp(opt->name, "noexec") == 0)
  712                         fsflags |= MNT_NOEXEC;
  713                 else if (strcmp(opt->name, "exec") == 0) {
  714                         free(opt->name, M_MOUNT);
  715                         opt->name = strdup("nonoexec", M_MOUNT);
  716                 }
  717                 else if (strcmp(opt->name, "nosuid") == 0)
  718                         fsflags |= MNT_NOSUID;
  719                 else if (strcmp(opt->name, "suid") == 0) {
  720                         free(opt->name, M_MOUNT);
  721                         opt->name = strdup("nonosuid", M_MOUNT);
  722                 }
  723                 else if (strcmp(opt->name, "nosymfollow") == 0)
  724                         fsflags |= MNT_NOSYMFOLLOW;
  725                 else if (strcmp(opt->name, "symfollow") == 0) {
  726                         free(opt->name, M_MOUNT);
  727                         opt->name = strdup("nonosymfollow", M_MOUNT);
  728                 }
  729                 else if (strcmp(opt->name, "noro") == 0) {
  730                         fsflags &= ~MNT_RDONLY;
  731                         autoro = false;
  732                 }
  733                 else if (strcmp(opt->name, "rw") == 0) {
  734                         fsflags &= ~MNT_RDONLY;
  735                         autoro = false;
  736                 }
  737                 else if (strcmp(opt->name, "ro") == 0) {
  738                         fsflags |= MNT_RDONLY;
  739                         autoro = false;
  740                 }
  741                 else if (strcmp(opt->name, "rdonly") == 0) {
  742                         free(opt->name, M_MOUNT);
  743                         opt->name = strdup("ro", M_MOUNT);
  744                         fsflags |= MNT_RDONLY;
  745                         autoro = false;
  746                 }
  747                 else if (strcmp(opt->name, "autoro") == 0) {
  748                         do_freeopt = 1;
  749                         autoro = true;
  750                 }
  751                 else if (strcmp(opt->name, "suiddir") == 0)
  752                         fsflags |= MNT_SUIDDIR;
  753                 else if (strcmp(opt->name, "sync") == 0)
  754                         fsflags |= MNT_SYNCHRONOUS;
  755                 else if (strcmp(opt->name, "union") == 0)
  756                         fsflags |= MNT_UNION;
  757                 else if (strcmp(opt->name, "automounted") == 0) {
  758                         fsflags |= MNT_AUTOMOUNTED;
  759                         do_freeopt = 1;
  760                 } else if (strcmp(opt->name, "nocover") == 0) {
  761                         fsflags |= MNT_NOCOVER;
  762                         do_freeopt = 1;
  763                 } else if (strcmp(opt->name, "cover") == 0) {
  764                         fsflags &= ~MNT_NOCOVER;
  765                         do_freeopt = 1;
  766                 } else if (strcmp(opt->name, "emptydir") == 0) {
  767                         fsflags |= MNT_EMPTYDIR;
  768                         do_freeopt = 1;
  769                 } else if (strcmp(opt->name, "noemptydir") == 0) {
  770                         fsflags &= ~MNT_EMPTYDIR;
  771                         do_freeopt = 1;
  772                 }
  773                 if (do_freeopt)
  774                         vfs_freeopt(optlist, opt);
  775         }
  776 
  777         /*
  778          * Be ultra-paranoid about making sure the type and fspath
  779          * variables will fit in our mp buffers, including the
  780          * terminating NUL.
  781          */
  782         if (fstypelen > MFSNAMELEN || fspathlen > MNAMELEN) {
  783                 error = ENAMETOOLONG;
  784                 goto bail;
  785         }
  786 
  787         error = vfs_domount(td, fstype, fspath, fsflags, &optlist);
  788 
  789         /*
  790          * See if we can mount in the read-only mode if the error code suggests
  791          * that it could be possible and the mount options allow for that.
  792          * Never try it if "[no]{ro|rw}" has been explicitly requested and not
  793          * overridden by "autoro".
  794          */
  795         if (autoro && vfs_should_downgrade_to_ro_mount(fsflags, error)) {
  796                 printf("%s: R/W mount failed, possibly R/O media,"
  797                     " trying R/O mount\n", __func__);
  798                 fsflags |= MNT_RDONLY;
  799                 error = vfs_domount(td, fstype, fspath, fsflags, &optlist);
  800         }
  801 bail:
  802         /* copyout the errmsg */
  803         if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt)
  804             && errmsg_len > 0 && errmsg != NULL) {
  805                 if (fsoptions->uio_segflg == UIO_SYSSPACE) {
  806                         bcopy(errmsg,
  807                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
  808                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
  809                 } else {
  810                         copyout(errmsg,
  811                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
  812                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
  813                 }
  814         }
  815 
  816         if (optlist != NULL)
  817                 vfs_freeopts(optlist);
  818         return (error);
  819 }
  820 
  821 /*
  822  * Old mount API.
  823  */
  824 #ifndef _SYS_SYSPROTO_H_
  825 struct mount_args {
  826         char    *type;
  827         char    *path;
  828         int     flags;
  829         caddr_t data;
  830 };
  831 #endif
  832 /* ARGSUSED */
  833 int
  834 sys_mount(struct thread *td, struct mount_args *uap)
  835 {
  836         char *fstype;
  837         struct vfsconf *vfsp = NULL;
  838         struct mntarg *ma = NULL;
  839         uint64_t flags;
  840         int error;
  841 
  842         /*
  843          * Mount flags are now 64-bits. On 32-bit architectures only
  844          * 32-bits are passed in, but from here on everything handles
  845          * 64-bit flags correctly.
  846          */
  847         flags = uap->flags;
  848 
  849         AUDIT_ARG_FFLAGS(flags);
  850 
  851         /*
  852          * Filter out MNT_ROOTFS.  We do not want clients of mount() in
  853          * userspace to set this flag, but we must filter it out if we want
  854          * MNT_UPDATE on the root file system to work.
  855          * MNT_ROOTFS should only be set by the kernel when mounting its
  856          * root file system.
  857          */
  858         flags &= ~MNT_ROOTFS;
  859 
  860         fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
  861         error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
  862         if (error) {
  863                 free(fstype, M_TEMP);
  864                 return (error);
  865         }
  866 
  867         AUDIT_ARG_TEXT(fstype);
  868         vfsp = vfs_byname_kld(fstype, td, &error);
  869         free(fstype, M_TEMP);
  870         if (vfsp == NULL)
  871                 return (ENOENT);
  872         if (((vfsp->vfc_flags & VFCF_SBDRY) != 0 &&
  873             vfsp->vfc_vfsops_sd->vfs_cmount == NULL) ||
  874             ((vfsp->vfc_flags & VFCF_SBDRY) == 0 &&
  875             vfsp->vfc_vfsops->vfs_cmount == NULL))
  876                 return (EOPNOTSUPP);
  877 
  878         ma = mount_argsu(ma, "fstype", uap->type, MFSNAMELEN);
  879         ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
  880         ma = mount_argb(ma, flags & MNT_RDONLY, "noro");
  881         ma = mount_argb(ma, !(flags & MNT_NOSUID), "nosuid");
  882         ma = mount_argb(ma, !(flags & MNT_NOEXEC), "noexec");
  883 
  884         if ((vfsp->vfc_flags & VFCF_SBDRY) != 0)
  885                 return (vfsp->vfc_vfsops_sd->vfs_cmount(ma, uap->data, flags));
  886         return (vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, flags));
  887 }
  888 
  889 /*
  890  * vfs_domount_first(): first file system mount (not update)
  891  */
  892 static int
  893 vfs_domount_first(
  894         struct thread *td,              /* Calling thread. */
  895         struct vfsconf *vfsp,           /* File system type. */
  896         char *fspath,                   /* Mount path. */
  897         struct vnode *vp,               /* Vnode to be covered. */
  898         uint64_t fsflags,               /* Flags common to all filesystems. */
  899         struct vfsoptlist **optlist     /* Options local to the filesystem. */
  900         )
  901 {
  902         struct vattr va;
  903         struct mount *mp;
  904         struct vnode *newdp, *rootvp;
  905         int error, error1;
  906         bool unmounted;
  907 
  908         ASSERT_VOP_ELOCKED(vp, __func__);
  909         KASSERT((fsflags & MNT_UPDATE) == 0, ("MNT_UPDATE shouldn't be here"));
  910 
  911         if ((fsflags & MNT_EMPTYDIR) != 0) {
  912                 error = vfs_emptydir(vp);
  913                 if (error != 0) {
  914                         vput(vp);
  915                         return (error);
  916                 }
  917         }
  918 
  919         /*
  920          * If the jail of the calling thread lacks permission for this type of
  921          * file system, or is trying to cover its own root, deny immediately.
  922          */
  923         if (jailed(td->td_ucred) && (!prison_allow(td->td_ucred,
  924             vfsp->vfc_prison_flag) || vp == td->td_ucred->cr_prison->pr_root)) {
  925                 vput(vp);
  926                 return (EPERM);
  927         }
  928 
  929         /*
  930          * If the user is not root, ensure that they own the directory
  931          * onto which we are attempting to mount.
  932          */
  933         error = VOP_GETATTR(vp, &va, td->td_ucred);
  934         if (error == 0 && va.va_uid != td->td_ucred->cr_uid)
  935                 error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN);
  936         if (error == 0)
  937                 error = vinvalbuf(vp, V_SAVE, 0, 0);
  938         if (error == 0 && vp->v_type != VDIR)
  939                 error = ENOTDIR;
  940         if (error == 0) {
  941                 VI_LOCK(vp);
  942                 if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL)
  943                         vp->v_iflag |= VI_MOUNT;
  944                 else
  945                         error = EBUSY;
  946                 VI_UNLOCK(vp);
  947         }
  948         if (error != 0) {
  949                 vput(vp);
  950                 return (error);
  951         }
  952         vn_seqc_write_begin(vp);
  953         VOP_UNLOCK(vp);
  954 
  955         /* Allocate and initialize the filesystem. */
  956         mp = vfs_mount_alloc(vp, vfsp, fspath, td->td_ucred);
  957         /* XXXMAC: pass to vfs_mount_alloc? */
  958         mp->mnt_optnew = *optlist;
  959         /* Set the mount level flags. */
  960         mp->mnt_flag = (fsflags & (MNT_UPDATEMASK | MNT_ROOTFS | MNT_RDONLY));
  961 
  962         /*
  963          * Mount the filesystem.
  964          * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
  965          * get.  No freeing of cn_pnbuf.
  966          */
  967         error1 = 0;
  968         unmounted = true;
  969         if ((error = VFS_MOUNT(mp)) != 0 ||
  970             (error1 = VFS_STATFS(mp, &mp->mnt_stat)) != 0 ||
  971             (error1 = VFS_ROOT(mp, LK_EXCLUSIVE, &newdp)) != 0) {
  972                 rootvp = NULL;
  973                 if (error1 != 0) {
  974                         MPASS(error == 0);
  975                         rootvp = vfs_cache_root_clear(mp);
  976                         if (rootvp != NULL) {
  977                                 vhold(rootvp);
  978                                 vrele(rootvp);
  979                         }
  980                         (void)vn_start_write(NULL, &mp, V_WAIT);
  981                         MNT_ILOCK(mp);
  982                         mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_UNMOUNTF;
  983                         MNT_IUNLOCK(mp);
  984                         VFS_PURGE(mp);
  985                         error = VFS_UNMOUNT(mp, 0);
  986                         vn_finished_write(mp);
  987                         if (error != 0) {
  988                                 printf(
  989                     "failed post-mount (%d): rollback unmount returned %d\n",
  990                                     error1, error);
  991                                 unmounted = false;
  992                         }
  993                         error = error1;
  994                 }
  995                 vfs_unbusy(mp);
  996                 mp->mnt_vnodecovered = NULL;
  997                 if (unmounted) {
  998                         /* XXXKIB wait for mnt_lockref drain? */
  999                         vfs_mount_destroy(mp);
 1000                 }
 1001                 VI_LOCK(vp);
 1002                 vp->v_iflag &= ~VI_MOUNT;
 1003                 VI_UNLOCK(vp);
 1004                 if (rootvp != NULL) {
 1005                         vn_seqc_write_end(rootvp);
 1006                         vdrop(rootvp);
 1007                 }
 1008                 vn_seqc_write_end(vp);
 1009                 vrele(vp);
 1010                 return (error);
 1011         }
 1012         vn_seqc_write_begin(newdp);
 1013         VOP_UNLOCK(newdp);
 1014 
 1015         if (mp->mnt_opt != NULL)
 1016                 vfs_freeopts(mp->mnt_opt);
 1017         mp->mnt_opt = mp->mnt_optnew;
 1018         *optlist = NULL;
 1019 
 1020         /*
 1021          * Prevent external consumers of mount options from reading mnt_optnew.
 1022          */
 1023         mp->mnt_optnew = NULL;
 1024 
 1025         MNT_ILOCK(mp);
 1026         if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
 1027             (mp->mnt_kern_flag & MNTK_NOASYNC) == 0)
 1028                 mp->mnt_kern_flag |= MNTK_ASYNC;
 1029         else
 1030                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1031         MNT_IUNLOCK(mp);
 1032 
 1033         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1034         cache_purge(vp);
 1035         VI_LOCK(vp);
 1036         vp->v_iflag &= ~VI_MOUNT;
 1037         vn_irflag_set_locked(vp, VIRF_MOUNTPOINT);
 1038         vp->v_mountedhere = mp;
 1039         VI_UNLOCK(vp);
 1040         /* Place the new filesystem at the end of the mount list. */
 1041         mtx_lock(&mountlist_mtx);
 1042         TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 1043         mtx_unlock(&mountlist_mtx);
 1044         vfs_event_signal(NULL, VQ_MOUNT, 0);
 1045         vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY);
 1046         VOP_UNLOCK(vp);
 1047         EVENTHANDLER_DIRECT_INVOKE(vfs_mounted, mp, newdp, td);
 1048         VOP_UNLOCK(newdp);
 1049         mount_devctl_event("MOUNT", mp, false);
 1050         mountcheckdirs(vp, newdp);
 1051         vn_seqc_write_end(vp);
 1052         vn_seqc_write_end(newdp);
 1053         vrele(newdp);
 1054         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 1055                 vfs_allocate_syncvnode(mp);
 1056         vfs_op_exit(mp);
 1057         vfs_unbusy(mp);
 1058         return (0);
 1059 }
 1060 
 1061 /*
 1062  * vfs_domount_update(): update of mounted file system
 1063  */
 1064 static int
 1065 vfs_domount_update(
 1066         struct thread *td,              /* Calling thread. */
 1067         struct vnode *vp,               /* Mount point vnode. */
 1068         uint64_t fsflags,               /* Flags common to all filesystems. */
 1069         struct vfsoptlist **optlist     /* Options local to the filesystem. */
 1070         )
 1071 {
 1072         struct export_args export;
 1073         struct o2export_args o2export;
 1074         struct vnode *rootvp;
 1075         void *bufp;
 1076         struct mount *mp;
 1077         int error, export_error, i, len;
 1078         uint64_t flag;
 1079         gid_t *grps;
 1080 
 1081         ASSERT_VOP_ELOCKED(vp, __func__);
 1082         KASSERT((fsflags & MNT_UPDATE) != 0, ("MNT_UPDATE should be here"));
 1083         mp = vp->v_mount;
 1084 
 1085         if ((vp->v_vflag & VV_ROOT) == 0) {
 1086                 if (vfs_copyopt(*optlist, "export", &export, sizeof(export))
 1087                     == 0)
 1088                         error = EXDEV;
 1089                 else
 1090                         error = EINVAL;
 1091                 vput(vp);
 1092                 return (error);
 1093         }
 1094 
 1095         /*
 1096          * We only allow the filesystem to be reloaded if it
 1097          * is currently mounted read-only.
 1098          */
 1099         flag = mp->mnt_flag;
 1100         if ((fsflags & MNT_RELOAD) != 0 && (flag & MNT_RDONLY) == 0) {
 1101                 vput(vp);
 1102                 return (EOPNOTSUPP);    /* Needs translation */
 1103         }
 1104         /*
 1105          * Only privileged root, or (if MNT_USER is set) the user that
 1106          * did the original mount is permitted to update it.
 1107          */
 1108         error = vfs_suser(mp, td);
 1109         if (error != 0) {
 1110                 vput(vp);
 1111                 return (error);
 1112         }
 1113         if (vfs_busy(mp, MBF_NOWAIT)) {
 1114                 vput(vp);
 1115                 return (EBUSY);
 1116         }
 1117         VI_LOCK(vp);
 1118         if ((vp->v_iflag & VI_MOUNT) != 0 || vp->v_mountedhere != NULL) {
 1119                 VI_UNLOCK(vp);
 1120                 vfs_unbusy(mp);
 1121                 vput(vp);
 1122                 return (EBUSY);
 1123         }
 1124         vp->v_iflag |= VI_MOUNT;
 1125         VI_UNLOCK(vp);
 1126         VOP_UNLOCK(vp);
 1127 
 1128         vfs_op_enter(mp);
 1129         vn_seqc_write_begin(vp);
 1130 
 1131         rootvp = NULL;
 1132         MNT_ILOCK(mp);
 1133         if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
 1134                 MNT_IUNLOCK(mp);
 1135                 error = EBUSY;
 1136                 goto end;
 1137         }
 1138         mp->mnt_flag &= ~MNT_UPDATEMASK;
 1139         mp->mnt_flag |= fsflags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE |
 1140             MNT_SNAPSHOT | MNT_ROOTFS | MNT_UPDATEMASK | MNT_RDONLY);
 1141         if ((mp->mnt_flag & MNT_ASYNC) == 0)
 1142                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1143         rootvp = vfs_cache_root_clear(mp);
 1144         MNT_IUNLOCK(mp);
 1145         mp->mnt_optnew = *optlist;
 1146         vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
 1147 
 1148         /*
 1149          * Mount the filesystem.
 1150          * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
 1151          * get.  No freeing of cn_pnbuf.
 1152          */
 1153         error = VFS_MOUNT(mp);
 1154 
 1155         export_error = 0;
 1156         /* Process the export option. */
 1157         if (error == 0 && vfs_getopt(mp->mnt_optnew, "export", &bufp,
 1158             &len) == 0) {
 1159                 /* Assume that there is only 1 ABI for each length. */
 1160                 switch (len) {
 1161                 case (sizeof(struct oexport_args)):
 1162                         bzero(&o2export, sizeof(o2export));
 1163                         /* FALLTHROUGH */
 1164                 case (sizeof(o2export)):
 1165                         bcopy(bufp, &o2export, len);
 1166                         export.ex_flags = (uint64_t)o2export.ex_flags;
 1167                         export.ex_root = o2export.ex_root;
 1168                         export.ex_uid = o2export.ex_anon.cr_uid;
 1169                         export.ex_groups = NULL;
 1170                         export.ex_ngroups = o2export.ex_anon.cr_ngroups;
 1171                         if (export.ex_ngroups > 0) {
 1172                                 if (export.ex_ngroups <= XU_NGROUPS) {
 1173                                         export.ex_groups = malloc(
 1174                                             export.ex_ngroups * sizeof(gid_t),
 1175                                             M_TEMP, M_WAITOK);
 1176                                         for (i = 0; i < export.ex_ngroups; i++)
 1177                                                 export.ex_groups[i] =
 1178                                                   o2export.ex_anon.cr_groups[i];
 1179                                 } else
 1180                                         export_error = EINVAL;
 1181                         } else if (export.ex_ngroups < 0)
 1182                                 export_error = EINVAL;
 1183                         export.ex_addr = o2export.ex_addr;
 1184                         export.ex_addrlen = o2export.ex_addrlen;
 1185                         export.ex_mask = o2export.ex_mask;
 1186                         export.ex_masklen = o2export.ex_masklen;
 1187                         export.ex_indexfile = o2export.ex_indexfile;
 1188                         export.ex_numsecflavors = o2export.ex_numsecflavors;
 1189                         if (export.ex_numsecflavors < MAXSECFLAVORS) {
 1190                                 for (i = 0; i < export.ex_numsecflavors; i++)
 1191                                         export.ex_secflavors[i] =
 1192                                             o2export.ex_secflavors[i];
 1193                         } else
 1194                                 export_error = EINVAL;
 1195                         if (export_error == 0)
 1196                                 export_error = vfs_export(mp, &export);
 1197                         free(export.ex_groups, M_TEMP);
 1198                         break;
 1199                 case (sizeof(export)):
 1200                         bcopy(bufp, &export, len);
 1201                         grps = NULL;
 1202                         if (export.ex_ngroups > 0) {
 1203                                 if (export.ex_ngroups <= NGROUPS_MAX) {
 1204                                         grps = malloc(export.ex_ngroups *
 1205                                             sizeof(gid_t), M_TEMP, M_WAITOK);
 1206                                         export_error = copyin(export.ex_groups,
 1207                                             grps, export.ex_ngroups *
 1208                                             sizeof(gid_t));
 1209                                         if (export_error == 0)
 1210                                                 export.ex_groups = grps;
 1211                                 } else
 1212                                         export_error = EINVAL;
 1213                         } else if (export.ex_ngroups == 0)
 1214                                 export.ex_groups = NULL;
 1215                         else
 1216                                 export_error = EINVAL;
 1217                         if (export_error == 0)
 1218                                 export_error = vfs_export(mp, &export);
 1219                         free(grps, M_TEMP);
 1220                         break;
 1221                 default:
 1222                         export_error = EINVAL;
 1223                         break;
 1224                 }
 1225         }
 1226 
 1227         MNT_ILOCK(mp);
 1228         if (error == 0) {
 1229                 mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE |
 1230                     MNT_SNAPSHOT);
 1231         } else {
 1232                 /*
 1233                  * If we fail, restore old mount flags. MNT_QUOTA is special,
 1234                  * because it is not part of MNT_UPDATEMASK, but it could have
 1235                  * changed in the meantime if quotactl(2) was called.
 1236                  * All in all we want current value of MNT_QUOTA, not the old
 1237                  * one.
 1238                  */
 1239                 mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | (flag & ~MNT_QUOTA);
 1240         }
 1241         if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
 1242             (mp->mnt_kern_flag & MNTK_NOASYNC) == 0)
 1243                 mp->mnt_kern_flag |= MNTK_ASYNC;
 1244         else
 1245                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1246         MNT_IUNLOCK(mp);
 1247 
 1248         if (error != 0)
 1249                 goto end;
 1250 
 1251         mount_devctl_event("REMOUNT", mp, true);
 1252         if (mp->mnt_opt != NULL)
 1253                 vfs_freeopts(mp->mnt_opt);
 1254         mp->mnt_opt = mp->mnt_optnew;
 1255         *optlist = NULL;
 1256         (void)VFS_STATFS(mp, &mp->mnt_stat);
 1257         /*
 1258          * Prevent external consumers of mount options from reading
 1259          * mnt_optnew.
 1260          */
 1261         mp->mnt_optnew = NULL;
 1262 
 1263         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 1264                 vfs_allocate_syncvnode(mp);
 1265         else
 1266                 vfs_deallocate_syncvnode(mp);
 1267 end:
 1268         vfs_op_exit(mp);
 1269         if (rootvp != NULL) {
 1270                 vn_seqc_write_end(rootvp);
 1271                 vrele(rootvp);
 1272         }
 1273         vn_seqc_write_end(vp);
 1274         vfs_unbusy(mp);
 1275         VI_LOCK(vp);
 1276         vp->v_iflag &= ~VI_MOUNT;
 1277         VI_UNLOCK(vp);
 1278         vrele(vp);
 1279         return (error != 0 ? error : export_error);
 1280 }
 1281 
 1282 /*
 1283  * vfs_domount(): actually attempt a filesystem mount.
 1284  */
 1285 static int
 1286 vfs_domount(
 1287         struct thread *td,              /* Calling thread. */
 1288         const char *fstype,             /* Filesystem type. */
 1289         char *fspath,                   /* Mount path. */
 1290         uint64_t fsflags,               /* Flags common to all filesystems. */
 1291         struct vfsoptlist **optlist     /* Options local to the filesystem. */
 1292         )
 1293 {
 1294         struct vfsconf *vfsp;
 1295         struct nameidata nd;
 1296         struct vnode *vp;
 1297         char *pathbuf;
 1298         int error;
 1299 
 1300         /*
 1301          * Be ultra-paranoid about making sure the type and fspath
 1302          * variables will fit in our mp buffers, including the
 1303          * terminating NUL.
 1304          */
 1305         if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
 1306                 return (ENAMETOOLONG);
 1307 
 1308         if (jailed(td->td_ucred) || usermount == 0) {
 1309                 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0)
 1310                         return (error);
 1311         }
 1312 
 1313         /*
 1314          * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
 1315          */
 1316         if (fsflags & MNT_EXPORTED) {
 1317                 error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED);
 1318                 if (error)
 1319                         return (error);
 1320         }
 1321         if (fsflags & MNT_SUIDDIR) {
 1322                 error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR);
 1323                 if (error)
 1324                         return (error);
 1325         }
 1326         /*
 1327          * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users.
 1328          */
 1329         if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) {
 1330                 if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0)
 1331                         fsflags |= MNT_NOSUID | MNT_USER;
 1332         }
 1333 
 1334         /* Load KLDs before we lock the covered vnode to avoid reversals. */
 1335         vfsp = NULL;
 1336         if ((fsflags & MNT_UPDATE) == 0) {
 1337                 /* Don't try to load KLDs if we're mounting the root. */
 1338                 if (fsflags & MNT_ROOTFS)
 1339                         vfsp = vfs_byname(fstype);
 1340                 else
 1341                         vfsp = vfs_byname_kld(fstype, td, &error);
 1342                 if (vfsp == NULL)
 1343                         return (ENODEV);
 1344         }
 1345 
 1346         /*
 1347          * Get vnode to be covered or mount point's vnode in case of MNT_UPDATE.
 1348          */
 1349         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1,
 1350             UIO_SYSSPACE, fspath, td);
 1351         error = namei(&nd);
 1352         if (error != 0)
 1353                 return (error);
 1354         NDFREE(&nd, NDF_ONLY_PNBUF);
 1355         vp = nd.ni_vp;
 1356         if ((fsflags & MNT_UPDATE) == 0) {
 1357                 if ((vp->v_vflag & VV_ROOT) != 0 &&
 1358                     (fsflags & MNT_NOCOVER) != 0) {
 1359                         vput(vp);
 1360                         return (EBUSY);
 1361                 }
 1362                 pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
 1363                 strcpy(pathbuf, fspath);
 1364                 error = vn_path_to_global_path(td, vp, pathbuf, MNAMELEN);
 1365                 if (error == 0) {
 1366                         error = vfs_domount_first(td, vfsp, pathbuf, vp,
 1367                             fsflags, optlist);
 1368                 }
 1369                 free(pathbuf, M_TEMP);
 1370         } else
 1371                 error = vfs_domount_update(td, vp, fsflags, optlist);
 1372 
 1373         return (error);
 1374 }
 1375 
 1376 /*
 1377  * Unmount a filesystem.
 1378  *
 1379  * Note: unmount takes a path to the vnode mounted on as argument, not
 1380  * special file (as before).
 1381  */
 1382 #ifndef _SYS_SYSPROTO_H_
 1383 struct unmount_args {
 1384         char    *path;
 1385         int     flags;
 1386 };
 1387 #endif
 1388 /* ARGSUSED */
 1389 int
 1390 sys_unmount(struct thread *td, struct unmount_args *uap)
 1391 {
 1392 
 1393         return (kern_unmount(td, uap->path, uap->flags));
 1394 }
 1395 
 1396 int
 1397 kern_unmount(struct thread *td, const char *path, int flags)
 1398 {
 1399         struct nameidata nd;
 1400         struct mount *mp;
 1401         char *pathbuf;
 1402         int error, id0, id1;
 1403 
 1404         AUDIT_ARG_VALUE(flags);
 1405         if (jailed(td->td_ucred) || usermount == 0) {
 1406                 error = priv_check(td, PRIV_VFS_UNMOUNT);
 1407                 if (error)
 1408                         return (error);
 1409         }
 1410 
 1411         pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
 1412         error = copyinstr(path, pathbuf, MNAMELEN, NULL);
 1413         if (error) {
 1414                 free(pathbuf, M_TEMP);
 1415                 return (error);
 1416         }
 1417         if (flags & MNT_BYFSID) {
 1418                 AUDIT_ARG_TEXT(pathbuf);
 1419                 /* Decode the filesystem ID. */
 1420                 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
 1421                         free(pathbuf, M_TEMP);
 1422                         return (EINVAL);
 1423                 }
 1424 
 1425                 mtx_lock(&mountlist_mtx);
 1426                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 1427                         if (mp->mnt_stat.f_fsid.val[0] == id0 &&
 1428                             mp->mnt_stat.f_fsid.val[1] == id1) {
 1429                                 vfs_ref(mp);
 1430                                 break;
 1431                         }
 1432                 }
 1433                 mtx_unlock(&mountlist_mtx);
 1434         } else {
 1435                 /*
 1436                  * Try to find global path for path argument.
 1437                  */
 1438                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1,
 1439                     UIO_SYSSPACE, pathbuf, td);
 1440                 if (namei(&nd) == 0) {
 1441                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1442                         error = vn_path_to_global_path(td, nd.ni_vp, pathbuf,
 1443                             MNAMELEN);
 1444                         if (error == 0)
 1445                                 vput(nd.ni_vp);
 1446                 }
 1447                 mtx_lock(&mountlist_mtx);
 1448                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 1449                         if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0) {
 1450                                 vfs_ref(mp);
 1451                                 break;
 1452                         }
 1453                 }
 1454                 mtx_unlock(&mountlist_mtx);
 1455         }
 1456         free(pathbuf, M_TEMP);
 1457         if (mp == NULL) {
 1458                 /*
 1459                  * Previously we returned ENOENT for a nonexistent path and
 1460                  * EINVAL for a non-mountpoint.  We cannot tell these apart
 1461                  * now, so in the !MNT_BYFSID case return the more likely
 1462                  * EINVAL for compatibility.
 1463                  */
 1464                 return ((flags & MNT_BYFSID) ? ENOENT : EINVAL);
 1465         }
 1466 
 1467         /*
 1468          * Don't allow unmounting the root filesystem.
 1469          */
 1470         if (mp->mnt_flag & MNT_ROOTFS) {
 1471                 vfs_rel(mp);
 1472                 return (EINVAL);
 1473         }
 1474         error = dounmount(mp, flags, td);
 1475         return (error);
 1476 }
 1477 
 1478 /*
 1479  * Return error if any of the vnodes, ignoring the root vnode
 1480  * and the syncer vnode, have non-zero usecount.
 1481  *
 1482  * This function is purely advisory - it can return false positives
 1483  * and negatives.
 1484  */
 1485 static int
 1486 vfs_check_usecounts(struct mount *mp)
 1487 {
 1488         struct vnode *vp, *mvp;
 1489 
 1490         MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 1491                 if ((vp->v_vflag & VV_ROOT) == 0 && vp->v_type != VNON &&
 1492                     vp->v_usecount != 0) {
 1493                         VI_UNLOCK(vp);
 1494                         MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 1495                         return (EBUSY);
 1496                 }
 1497                 VI_UNLOCK(vp);
 1498         }
 1499 
 1500         return (0);
 1501 }
 1502 
 1503 static void
 1504 dounmount_cleanup(struct mount *mp, struct vnode *coveredvp, int mntkflags)
 1505 {
 1506 
 1507         mtx_assert(MNT_MTX(mp), MA_OWNED);
 1508         mp->mnt_kern_flag &= ~mntkflags;
 1509         if ((mp->mnt_kern_flag & MNTK_MWAIT) != 0) {
 1510                 mp->mnt_kern_flag &= ~MNTK_MWAIT;
 1511                 wakeup(mp);
 1512         }
 1513         vfs_op_exit_locked(mp);
 1514         MNT_IUNLOCK(mp);
 1515         if (coveredvp != NULL) {
 1516                 VOP_UNLOCK(coveredvp);
 1517                 vdrop(coveredvp);
 1518         }
 1519         vn_finished_write(mp);
 1520 }
 1521 
 1522 /*
 1523  * There are various reference counters associated with the mount point.
 1524  * Normally it is permitted to modify them without taking the mnt ilock,
 1525  * but this behavior can be temporarily disabled if stable value is needed
 1526  * or callers are expected to block (e.g. to not allow new users during
 1527  * forced unmount).
 1528  */
 1529 void
 1530 vfs_op_enter(struct mount *mp)
 1531 {
 1532         struct mount_pcpu *mpcpu;
 1533         int cpu;
 1534 
 1535         MNT_ILOCK(mp);
 1536         mp->mnt_vfs_ops++;
 1537         if (mp->mnt_vfs_ops > 1) {
 1538                 MNT_IUNLOCK(mp);
 1539                 return;
 1540         }
 1541         vfs_op_barrier_wait(mp);
 1542         CPU_FOREACH(cpu) {
 1543                 mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1544 
 1545                 mp->mnt_ref += mpcpu->mntp_ref;
 1546                 mpcpu->mntp_ref = 0;
 1547 
 1548                 mp->mnt_lockref += mpcpu->mntp_lockref;
 1549                 mpcpu->mntp_lockref = 0;
 1550 
 1551                 mp->mnt_writeopcount += mpcpu->mntp_writeopcount;
 1552                 mpcpu->mntp_writeopcount = 0;
 1553         }
 1554         if (mp->mnt_ref <= 0 || mp->mnt_lockref < 0 || mp->mnt_writeopcount < 0)
 1555                 panic("%s: invalid count(s) on mp %p: ref %d lockref %d writeopcount %d\n",
 1556                     __func__, mp, mp->mnt_ref, mp->mnt_lockref, mp->mnt_writeopcount);
 1557         MNT_IUNLOCK(mp);
 1558         vfs_assert_mount_counters(mp);
 1559 }
 1560 
 1561 void
 1562 vfs_op_exit_locked(struct mount *mp)
 1563 {
 1564 
 1565         mtx_assert(MNT_MTX(mp), MA_OWNED);
 1566 
 1567         if (mp->mnt_vfs_ops <= 0)
 1568                 panic("%s: invalid vfs_ops count %d for mp %p\n",
 1569                     __func__, mp->mnt_vfs_ops, mp);
 1570         mp->mnt_vfs_ops--;
 1571 }
 1572 
 1573 void
 1574 vfs_op_exit(struct mount *mp)
 1575 {
 1576 
 1577         MNT_ILOCK(mp);
 1578         vfs_op_exit_locked(mp);
 1579         MNT_IUNLOCK(mp);
 1580 }
 1581 
 1582 struct vfs_op_barrier_ipi {
 1583         struct mount *mp;
 1584         struct smp_rendezvous_cpus_retry_arg srcra;
 1585 };
 1586 
 1587 static void
 1588 vfs_op_action_func(void *arg)
 1589 {
 1590         struct vfs_op_barrier_ipi *vfsopipi;
 1591         struct mount *mp;
 1592 
 1593         vfsopipi = __containerof(arg, struct vfs_op_barrier_ipi, srcra);
 1594         mp = vfsopipi->mp;
 1595 
 1596         if (!vfs_op_thread_entered(mp))
 1597                 smp_rendezvous_cpus_done(arg);
 1598 }
 1599 
 1600 static void
 1601 vfs_op_wait_func(void *arg, int cpu)
 1602 {
 1603         struct vfs_op_barrier_ipi *vfsopipi;
 1604         struct mount *mp;
 1605         struct mount_pcpu *mpcpu;
 1606 
 1607         vfsopipi = __containerof(arg, struct vfs_op_barrier_ipi, srcra);
 1608         mp = vfsopipi->mp;
 1609 
 1610         mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1611         while (atomic_load_int(&mpcpu->mntp_thread_in_ops))
 1612                 cpu_spinwait();
 1613 }
 1614 
 1615 void
 1616 vfs_op_barrier_wait(struct mount *mp)
 1617 {
 1618         struct vfs_op_barrier_ipi vfsopipi;
 1619 
 1620         vfsopipi.mp = mp;
 1621 
 1622         smp_rendezvous_cpus_retry(all_cpus,
 1623             smp_no_rendezvous_barrier,
 1624             vfs_op_action_func,
 1625             smp_no_rendezvous_barrier,
 1626             vfs_op_wait_func,
 1627             &vfsopipi.srcra);
 1628 }
 1629 
 1630 #ifdef DIAGNOSTIC
 1631 void
 1632 vfs_assert_mount_counters(struct mount *mp)
 1633 {
 1634         struct mount_pcpu *mpcpu;
 1635         int cpu;
 1636 
 1637         if (mp->mnt_vfs_ops == 0)
 1638                 return;
 1639 
 1640         CPU_FOREACH(cpu) {
 1641                 mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1642                 if (mpcpu->mntp_ref != 0 ||
 1643                     mpcpu->mntp_lockref != 0 ||
 1644                     mpcpu->mntp_writeopcount != 0)
 1645                         vfs_dump_mount_counters(mp);
 1646         }
 1647 }
 1648 
 1649 void
 1650 vfs_dump_mount_counters(struct mount *mp)
 1651 {
 1652         struct mount_pcpu *mpcpu;
 1653         int ref, lockref, writeopcount;
 1654         int cpu;
 1655 
 1656         printf("%s: mp %p vfs_ops %d\n", __func__, mp, mp->mnt_vfs_ops);
 1657 
 1658         printf("        ref : ");
 1659         ref = mp->mnt_ref;
 1660         CPU_FOREACH(cpu) {
 1661                 mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1662                 printf("%d ", mpcpu->mntp_ref);
 1663                 ref += mpcpu->mntp_ref;
 1664         }
 1665         printf("\n");
 1666         printf("    lockref : ");
 1667         lockref = mp->mnt_lockref;
 1668         CPU_FOREACH(cpu) {
 1669                 mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1670                 printf("%d ", mpcpu->mntp_lockref);
 1671                 lockref += mpcpu->mntp_lockref;
 1672         }
 1673         printf("\n");
 1674         printf("writeopcount: ");
 1675         writeopcount = mp->mnt_writeopcount;
 1676         CPU_FOREACH(cpu) {
 1677                 mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1678                 printf("%d ", mpcpu->mntp_writeopcount);
 1679                 writeopcount += mpcpu->mntp_writeopcount;
 1680         }
 1681         printf("\n");
 1682 
 1683         printf("counter       struct total\n");
 1684         printf("ref             %-5d  %-5d\n", mp->mnt_ref, ref);
 1685         printf("lockref         %-5d  %-5d\n", mp->mnt_lockref, lockref);
 1686         printf("writeopcount    %-5d  %-5d\n", mp->mnt_writeopcount, writeopcount);
 1687 
 1688         panic("invalid counts on struct mount");
 1689 }
 1690 #endif
 1691 
 1692 int
 1693 vfs_mount_fetch_counter(struct mount *mp, enum mount_counter which)
 1694 {
 1695         struct mount_pcpu *mpcpu;
 1696         int cpu, sum;
 1697 
 1698         switch (which) {
 1699         case MNT_COUNT_REF:
 1700                 sum = mp->mnt_ref;
 1701                 break;
 1702         case MNT_COUNT_LOCKREF:
 1703                 sum = mp->mnt_lockref;
 1704                 break;
 1705         case MNT_COUNT_WRITEOPCOUNT:
 1706                 sum = mp->mnt_writeopcount;
 1707                 break;
 1708         }
 1709 
 1710         CPU_FOREACH(cpu) {
 1711                 mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1712                 switch (which) {
 1713                 case MNT_COUNT_REF:
 1714                         sum += mpcpu->mntp_ref;
 1715                         break;
 1716                 case MNT_COUNT_LOCKREF:
 1717                         sum += mpcpu->mntp_lockref;
 1718                         break;
 1719                 case MNT_COUNT_WRITEOPCOUNT:
 1720                         sum += mpcpu->mntp_writeopcount;
 1721                         break;
 1722                 }
 1723         }
 1724         return (sum);
 1725 }
 1726 
 1727 /*
 1728  * Do the actual filesystem unmount.
 1729  */
 1730 int
 1731 dounmount(struct mount *mp, int flags, struct thread *td)
 1732 {
 1733         struct vnode *coveredvp, *rootvp;
 1734         int error;
 1735         uint64_t async_flag;
 1736         int mnt_gen_r;
 1737 
 1738         if ((coveredvp = mp->mnt_vnodecovered) != NULL) {
 1739                 mnt_gen_r = mp->mnt_gen;
 1740                 VI_LOCK(coveredvp);
 1741                 vholdl(coveredvp);
 1742                 vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY);
 1743                 /*
 1744                  * Check for mp being unmounted while waiting for the
 1745                  * covered vnode lock.
 1746                  */
 1747                 if (coveredvp->v_mountedhere != mp ||
 1748                     coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) {
 1749                         VOP_UNLOCK(coveredvp);
 1750                         vdrop(coveredvp);
 1751                         vfs_rel(mp);
 1752                         return (EBUSY);
 1753                 }
 1754         }
 1755 
 1756         /*
 1757          * Only privileged root, or (if MNT_USER is set) the user that did the
 1758          * original mount is permitted to unmount this filesystem.
 1759          */
 1760         error = vfs_suser(mp, td);
 1761         if (error != 0) {
 1762                 if (coveredvp != NULL) {
 1763                         VOP_UNLOCK(coveredvp);
 1764                         vdrop(coveredvp);
 1765                 }
 1766                 vfs_rel(mp);
 1767                 return (error);
 1768         }
 1769 
 1770         vfs_op_enter(mp);
 1771 
 1772         vn_start_write(NULL, &mp, V_WAIT | V_MNTREF);
 1773         MNT_ILOCK(mp);
 1774         if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 ||
 1775             (mp->mnt_flag & MNT_UPDATE) != 0 ||
 1776             !TAILQ_EMPTY(&mp->mnt_uppers)) {
 1777                 dounmount_cleanup(mp, coveredvp, 0);
 1778                 return (EBUSY);
 1779         }
 1780         mp->mnt_kern_flag |= MNTK_UNMOUNT;
 1781         rootvp = vfs_cache_root_clear(mp);
 1782         if (coveredvp != NULL)
 1783                 vn_seqc_write_begin(coveredvp);
 1784         if (flags & MNT_NONBUSY) {
 1785                 MNT_IUNLOCK(mp);
 1786                 error = vfs_check_usecounts(mp);
 1787                 MNT_ILOCK(mp);
 1788                 if (error != 0) {
 1789                         vn_seqc_write_end(coveredvp);
 1790                         dounmount_cleanup(mp, coveredvp, MNTK_UNMOUNT);
 1791                         if (rootvp != NULL) {
 1792                                 vn_seqc_write_end(rootvp);
 1793                                 vrele(rootvp);
 1794                         }
 1795                         return (error);
 1796                 }
 1797         }
 1798         /* Allow filesystems to detect that a forced unmount is in progress. */
 1799         if (flags & MNT_FORCE) {
 1800                 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
 1801                 MNT_IUNLOCK(mp);
 1802                 /*
 1803                  * Must be done after setting MNTK_UNMOUNTF and before
 1804                  * waiting for mnt_lockref to become 0.
 1805                  */
 1806                 VFS_PURGE(mp);
 1807                 MNT_ILOCK(mp);
 1808         }
 1809         error = 0;
 1810         if (mp->mnt_lockref) {
 1811                 mp->mnt_kern_flag |= MNTK_DRAINING;
 1812                 error = msleep(&mp->mnt_lockref, MNT_MTX(mp), PVFS,
 1813                     "mount drain", 0);
 1814         }
 1815         MNT_IUNLOCK(mp);
 1816         KASSERT(mp->mnt_lockref == 0,
 1817             ("%s: invalid lock refcount in the drain path @ %s:%d",
 1818             __func__, __FILE__, __LINE__));
 1819         KASSERT(error == 0,
 1820             ("%s: invalid return value for msleep in the drain path @ %s:%d",
 1821             __func__, __FILE__, __LINE__));
 1822 
 1823         /*
 1824          * We want to keep the vnode around so that we can vn_seqc_write_end
 1825          * after we are done with unmount. Downgrade our reference to a mere
 1826          * hold count so that we don't interefere with anything.
 1827          */
 1828         if (rootvp != NULL) {
 1829                 vhold(rootvp);
 1830                 vrele(rootvp);
 1831         }
 1832 
 1833         if (mp->mnt_flag & MNT_EXPUBLIC)
 1834                 vfs_setpublicfs(NULL, NULL, NULL);
 1835 
 1836         vfs_periodic(mp, MNT_WAIT);
 1837         MNT_ILOCK(mp);
 1838         async_flag = mp->mnt_flag & MNT_ASYNC;
 1839         mp->mnt_flag &= ~MNT_ASYNC;
 1840         mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1841         MNT_IUNLOCK(mp);
 1842         vfs_deallocate_syncvnode(mp);
 1843         error = VFS_UNMOUNT(mp, flags);
 1844         vn_finished_write(mp);
 1845         /*
 1846          * If we failed to flush the dirty blocks for this mount point,
 1847          * undo all the cdir/rdir and rootvnode changes we made above.
 1848          * Unless we failed to do so because the device is reporting that
 1849          * it doesn't exist anymore.
 1850          */
 1851         if (error && error != ENXIO) {
 1852                 MNT_ILOCK(mp);
 1853                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
 1854                         MNT_IUNLOCK(mp);
 1855                         vfs_allocate_syncvnode(mp);
 1856                         MNT_ILOCK(mp);
 1857                 }
 1858                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
 1859                 mp->mnt_flag |= async_flag;
 1860                 if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
 1861                     (mp->mnt_kern_flag & MNTK_NOASYNC) == 0)
 1862                         mp->mnt_kern_flag |= MNTK_ASYNC;
 1863                 if (mp->mnt_kern_flag & MNTK_MWAIT) {
 1864                         mp->mnt_kern_flag &= ~MNTK_MWAIT;
 1865                         wakeup(mp);
 1866                 }
 1867                 vfs_op_exit_locked(mp);
 1868                 MNT_IUNLOCK(mp);
 1869                 if (coveredvp) {
 1870                         vn_seqc_write_end(coveredvp);
 1871                         VOP_UNLOCK(coveredvp);
 1872                         vdrop(coveredvp);
 1873                 }
 1874                 if (rootvp != NULL) {
 1875                         vn_seqc_write_end(rootvp);
 1876                         vdrop(rootvp);
 1877                 }
 1878                 return (error);
 1879         }
 1880         mtx_lock(&mountlist_mtx);
 1881         TAILQ_REMOVE(&mountlist, mp, mnt_list);
 1882         mtx_unlock(&mountlist_mtx);
 1883         EVENTHANDLER_DIRECT_INVOKE(vfs_unmounted, mp, td);
 1884         if (coveredvp != NULL) {
 1885                 VI_LOCK(coveredvp);
 1886                 vn_irflag_unset_locked(coveredvp, VIRF_MOUNTPOINT);
 1887                 coveredvp->v_mountedhere = NULL;
 1888                 vn_seqc_write_end_locked(coveredvp);
 1889                 VI_UNLOCK(coveredvp);
 1890                 VOP_UNLOCK(coveredvp);
 1891                 vdrop(coveredvp);
 1892         }
 1893         mount_devctl_event("UNMOUNT", mp, false);
 1894         if (rootvp != NULL) {
 1895                 vn_seqc_write_end(rootvp);
 1896                 vdrop(rootvp);
 1897         }
 1898         vfs_event_signal(NULL, VQ_UNMOUNT, 0);
 1899         if (rootvnode != NULL && mp == rootvnode->v_mount) {
 1900                 vrele(rootvnode);
 1901                 rootvnode = NULL;
 1902         }
 1903         if (mp == rootdevmp)
 1904                 rootdevmp = NULL;
 1905         vfs_mount_destroy(mp);
 1906         return (0);
 1907 }
 1908 
 1909 /*
 1910  * Report errors during filesystem mounting.
 1911  */
 1912 void
 1913 vfs_mount_error(struct mount *mp, const char *fmt, ...)
 1914 {
 1915         struct vfsoptlist *moptlist = mp->mnt_optnew;
 1916         va_list ap;
 1917         int error, len;
 1918         char *errmsg;
 1919 
 1920         error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len);
 1921         if (error || errmsg == NULL || len <= 0)
 1922                 return;
 1923 
 1924         va_start(ap, fmt);
 1925         vsnprintf(errmsg, (size_t)len, fmt, ap);
 1926         va_end(ap);
 1927 }
 1928 
 1929 void
 1930 vfs_opterror(struct vfsoptlist *opts, const char *fmt, ...)
 1931 {
 1932         va_list ap;
 1933         int error, len;
 1934         char *errmsg;
 1935 
 1936         error = vfs_getopt(opts, "errmsg", (void **)&errmsg, &len);
 1937         if (error || errmsg == NULL || len <= 0)
 1938                 return;
 1939 
 1940         va_start(ap, fmt);
 1941         vsnprintf(errmsg, (size_t)len, fmt, ap);
 1942         va_end(ap);
 1943 }
 1944 
 1945 /*
 1946  * ---------------------------------------------------------------------
 1947  * Functions for querying mount options/arguments from filesystems.
 1948  */
 1949 
 1950 /*
 1951  * Check that no unknown options are given
 1952  */
 1953 int
 1954 vfs_filteropt(struct vfsoptlist *opts, const char **legal)
 1955 {
 1956         struct vfsopt *opt;
 1957         char errmsg[255];
 1958         const char **t, *p, *q;
 1959         int ret = 0;
 1960 
 1961         TAILQ_FOREACH(opt, opts, link) {
 1962                 p = opt->name;
 1963                 q = NULL;
 1964                 if (p[0] == 'n' && p[1] == 'o')
 1965                         q = p + 2;
 1966                 for(t = global_opts; *t != NULL; t++) {
 1967                         if (strcmp(*t, p) == 0)
 1968                                 break;
 1969                         if (q != NULL) {
 1970                                 if (strcmp(*t, q) == 0)
 1971                                         break;
 1972                         }
 1973                 }
 1974                 if (*t != NULL)
 1975                         continue;
 1976                 for(t = legal; *t != NULL; t++) {
 1977                         if (strcmp(*t, p) == 0)
 1978                                 break;
 1979                         if (q != NULL) {
 1980                                 if (strcmp(*t, q) == 0)
 1981                                         break;
 1982                         }
 1983                 }
 1984                 if (*t != NULL)
 1985                         continue;
 1986                 snprintf(errmsg, sizeof(errmsg),
 1987                     "mount option <%s> is unknown", p);
 1988                 ret = EINVAL;
 1989         }
 1990         if (ret != 0) {
 1991                 TAILQ_FOREACH(opt, opts, link) {
 1992                         if (strcmp(opt->name, "errmsg") == 0) {
 1993                                 strncpy((char *)opt->value, errmsg, opt->len);
 1994                                 break;
 1995                         }
 1996                 }
 1997                 if (opt == NULL)
 1998                         printf("%s\n", errmsg);
 1999         }
 2000         return (ret);
 2001 }
 2002 
 2003 /*
 2004  * Get a mount option by its name.
 2005  *
 2006  * Return 0 if the option was found, ENOENT otherwise.
 2007  * If len is non-NULL it will be filled with the length
 2008  * of the option. If buf is non-NULL, it will be filled
 2009  * with the address of the option.
 2010  */
 2011 int
 2012 vfs_getopt(struct vfsoptlist *opts, const char *name, void **buf, int *len)
 2013 {
 2014         struct vfsopt *opt;
 2015 
 2016         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 2017 
 2018         TAILQ_FOREACH(opt, opts, link) {
 2019                 if (strcmp(name, opt->name) == 0) {
 2020                         opt->seen = 1;
 2021                         if (len != NULL)
 2022                                 *len = opt->len;
 2023                         if (buf != NULL)
 2024                                 *buf = opt->value;
 2025                         return (0);
 2026                 }
 2027         }
 2028         return (ENOENT);
 2029 }
 2030 
 2031 int
 2032 vfs_getopt_pos(struct vfsoptlist *opts, const char *name)
 2033 {
 2034         struct vfsopt *opt;
 2035 
 2036         if (opts == NULL)
 2037                 return (-1);
 2038 
 2039         TAILQ_FOREACH(opt, opts, link) {
 2040                 if (strcmp(name, opt->name) == 0) {
 2041                         opt->seen = 1;
 2042                         return (opt->pos);
 2043                 }
 2044         }
 2045         return (-1);
 2046 }
 2047 
 2048 int
 2049 vfs_getopt_size(struct vfsoptlist *opts, const char *name, off_t *value)
 2050 {
 2051         char *opt_value, *vtp;
 2052         quad_t iv;
 2053         int error, opt_len;
 2054 
 2055         error = vfs_getopt(opts, name, (void **)&opt_value, &opt_len);
 2056         if (error != 0)
 2057                 return (error);
 2058         if (opt_len == 0 || opt_value == NULL)
 2059                 return (EINVAL);
 2060         if (opt_value[0] == '\0' || opt_value[opt_len - 1] != '\0')
 2061                 return (EINVAL);
 2062         iv = strtoq(opt_value, &vtp, 0);
 2063         if (vtp == opt_value || (vtp[0] != '\0' && vtp[1] != '\0'))
 2064                 return (EINVAL);
 2065         if (iv < 0)
 2066                 return (EINVAL);
 2067         switch (vtp[0]) {
 2068         case 't': case 'T':
 2069                 iv *= 1024;
 2070                 /* FALLTHROUGH */
 2071         case 'g': case 'G':
 2072                 iv *= 1024;
 2073                 /* FALLTHROUGH */
 2074         case 'm': case 'M':
 2075                 iv *= 1024;
 2076                 /* FALLTHROUGH */
 2077         case 'k': case 'K':
 2078                 iv *= 1024;
 2079         case '\0':
 2080                 break;
 2081         default:
 2082                 return (EINVAL);
 2083         }
 2084         *value = iv;
 2085 
 2086         return (0);
 2087 }
 2088 
 2089 char *
 2090 vfs_getopts(struct vfsoptlist *opts, const char *name, int *error)
 2091 {
 2092         struct vfsopt *opt;
 2093 
 2094         *error = 0;
 2095         TAILQ_FOREACH(opt, opts, link) {
 2096                 if (strcmp(name, opt->name) != 0)
 2097                         continue;
 2098                 opt->seen = 1;
 2099                 if (opt->len == 0 ||
 2100                     ((char *)opt->value)[opt->len - 1] != '\0') {
 2101                         *error = EINVAL;
 2102                         return (NULL);
 2103                 }
 2104                 return (opt->value);
 2105         }
 2106         *error = ENOENT;
 2107         return (NULL);
 2108 }
 2109 
 2110 int
 2111 vfs_flagopt(struct vfsoptlist *opts, const char *name, uint64_t *w,
 2112         uint64_t val)
 2113 {
 2114         struct vfsopt *opt;
 2115 
 2116         TAILQ_FOREACH(opt, opts, link) {
 2117                 if (strcmp(name, opt->name) == 0) {
 2118                         opt->seen = 1;
 2119                         if (w != NULL)
 2120                                 *w |= val;
 2121                         return (1);
 2122                 }
 2123         }
 2124         if (w != NULL)
 2125                 *w &= ~val;
 2126         return (0);
 2127 }
 2128 
 2129 int
 2130 vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...)
 2131 {
 2132         va_list ap;
 2133         struct vfsopt *opt;
 2134         int ret;
 2135 
 2136         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 2137 
 2138         TAILQ_FOREACH(opt, opts, link) {
 2139                 if (strcmp(name, opt->name) != 0)
 2140                         continue;
 2141                 opt->seen = 1;
 2142                 if (opt->len == 0 || opt->value == NULL)
 2143                         return (0);
 2144                 if (((char *)opt->value)[opt->len - 1] != '\0')
 2145                         return (0);
 2146                 va_start(ap, fmt);
 2147                 ret = vsscanf(opt->value, fmt, ap);
 2148                 va_end(ap);
 2149                 return (ret);
 2150         }
 2151         return (0);
 2152 }
 2153 
 2154 int
 2155 vfs_setopt(struct vfsoptlist *opts, const char *name, void *value, int len)
 2156 {
 2157         struct vfsopt *opt;
 2158 
 2159         TAILQ_FOREACH(opt, opts, link) {
 2160                 if (strcmp(name, opt->name) != 0)
 2161                         continue;
 2162                 opt->seen = 1;
 2163                 if (opt->value == NULL)
 2164                         opt->len = len;
 2165                 else {
 2166                         if (opt->len != len)
 2167                                 return (EINVAL);
 2168                         bcopy(value, opt->value, len);
 2169                 }
 2170                 return (0);
 2171         }
 2172         return (ENOENT);
 2173 }
 2174 
 2175 int
 2176 vfs_setopt_part(struct vfsoptlist *opts, const char *name, void *value, int len)
 2177 {
 2178         struct vfsopt *opt;
 2179 
 2180         TAILQ_FOREACH(opt, opts, link) {
 2181                 if (strcmp(name, opt->name) != 0)
 2182                         continue;
 2183                 opt->seen = 1;
 2184                 if (opt->value == NULL)
 2185                         opt->len = len;
 2186                 else {
 2187                         if (opt->len < len)
 2188                                 return (EINVAL);
 2189                         opt->len = len;
 2190                         bcopy(value, opt->value, len);
 2191                 }
 2192                 return (0);
 2193         }
 2194         return (ENOENT);
 2195 }
 2196 
 2197 int
 2198 vfs_setopts(struct vfsoptlist *opts, const char *name, const char *value)
 2199 {
 2200         struct vfsopt *opt;
 2201 
 2202         TAILQ_FOREACH(opt, opts, link) {
 2203                 if (strcmp(name, opt->name) != 0)
 2204                         continue;
 2205                 opt->seen = 1;
 2206                 if (opt->value == NULL)
 2207                         opt->len = strlen(value) + 1;
 2208                 else if (strlcpy(opt->value, value, opt->len) >= opt->len)
 2209                         return (EINVAL);
 2210                 return (0);
 2211         }
 2212         return (ENOENT);
 2213 }
 2214 
 2215 /*
 2216  * Find and copy a mount option.
 2217  *
 2218  * The size of the buffer has to be specified
 2219  * in len, if it is not the same length as the
 2220  * mount option, EINVAL is returned.
 2221  * Returns ENOENT if the option is not found.
 2222  */
 2223 int
 2224 vfs_copyopt(struct vfsoptlist *opts, const char *name, void *dest, int len)
 2225 {
 2226         struct vfsopt *opt;
 2227 
 2228         KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
 2229 
 2230         TAILQ_FOREACH(opt, opts, link) {
 2231                 if (strcmp(name, opt->name) == 0) {
 2232                         opt->seen = 1;
 2233                         if (len != opt->len)
 2234                                 return (EINVAL);
 2235                         bcopy(opt->value, dest, opt->len);
 2236                         return (0);
 2237                 }
 2238         }
 2239         return (ENOENT);
 2240 }
 2241 
 2242 int
 2243 __vfs_statfs(struct mount *mp, struct statfs *sbp)
 2244 {
 2245 
 2246         /*
 2247          * Filesystems only fill in part of the structure for updates, we
 2248          * have to read the entirety first to get all content.
 2249          */
 2250         if (sbp != &mp->mnt_stat)
 2251                 memcpy(sbp, &mp->mnt_stat, sizeof(*sbp));
 2252 
 2253         /*
 2254          * Set these in case the underlying filesystem fails to do so.
 2255          */
 2256         sbp->f_version = STATFS_VERSION;
 2257         sbp->f_namemax = NAME_MAX;
 2258         sbp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 2259 
 2260         return (mp->mnt_op->vfs_statfs(mp, sbp));
 2261 }
 2262 
 2263 void
 2264 vfs_mountedfrom(struct mount *mp, const char *from)
 2265 {
 2266 
 2267         bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname);
 2268         strlcpy(mp->mnt_stat.f_mntfromname, from,
 2269             sizeof mp->mnt_stat.f_mntfromname);
 2270 }
 2271 
 2272 /*
 2273  * ---------------------------------------------------------------------
 2274  * This is the api for building mount args and mounting filesystems from
 2275  * inside the kernel.
 2276  *
 2277  * The API works by accumulation of individual args.  First error is
 2278  * latched.
 2279  *
 2280  * XXX: should be documented in new manpage kernel_mount(9)
 2281  */
 2282 
 2283 /* A memory allocation which must be freed when we are done */
 2284 struct mntaarg {
 2285         SLIST_ENTRY(mntaarg)    next;
 2286 };
 2287 
 2288 /* The header for the mount arguments */
 2289 struct mntarg {
 2290         struct iovec *v;
 2291         int len;
 2292         int error;
 2293         SLIST_HEAD(, mntaarg)   list;
 2294 };
 2295 
 2296 /*
 2297  * Add a boolean argument.
 2298  *
 2299  * flag is the boolean value.
 2300  * name must start with "no".
 2301  */
 2302 struct mntarg *
 2303 mount_argb(struct mntarg *ma, int flag, const char *name)
 2304 {
 2305 
 2306         KASSERT(name[0] == 'n' && name[1] == 'o',
 2307             ("mount_argb(...,%s): name must start with 'no'", name));
 2308 
 2309         return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0));
 2310 }
 2311 
 2312 /*
 2313  * Add an argument printf style
 2314  */
 2315 struct mntarg *
 2316 mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...)
 2317 {
 2318         va_list ap;
 2319         struct mntaarg *maa;
 2320         struct sbuf *sb;
 2321         int len;
 2322 
 2323         if (ma == NULL) {
 2324                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2325                 SLIST_INIT(&ma->list);
 2326         }
 2327         if (ma->error)
 2328                 return (ma);
 2329 
 2330         ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 2331             M_MOUNT, M_WAITOK);
 2332         ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 2333         ma->v[ma->len].iov_len = strlen(name) + 1;
 2334         ma->len++;
 2335 
 2336         sb = sbuf_new_auto();
 2337         va_start(ap, fmt);
 2338         sbuf_vprintf(sb, fmt, ap);
 2339         va_end(ap);
 2340         sbuf_finish(sb);
 2341         len = sbuf_len(sb) + 1;
 2342         maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 2343         SLIST_INSERT_HEAD(&ma->list, maa, next);
 2344         bcopy(sbuf_data(sb), maa + 1, len);
 2345         sbuf_delete(sb);
 2346 
 2347         ma->v[ma->len].iov_base = maa + 1;
 2348         ma->v[ma->len].iov_len = len;
 2349         ma->len++;
 2350 
 2351         return (ma);
 2352 }
 2353 
 2354 /*
 2355  * Add an argument which is a userland string.
 2356  */
 2357 struct mntarg *
 2358 mount_argsu(struct mntarg *ma, const char *name, const void *val, int len)
 2359 {
 2360         struct mntaarg *maa;
 2361         char *tbuf;
 2362 
 2363         if (val == NULL)
 2364                 return (ma);
 2365         if (ma == NULL) {
 2366                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2367                 SLIST_INIT(&ma->list);
 2368         }
 2369         if (ma->error)
 2370                 return (ma);
 2371         maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 2372         SLIST_INSERT_HEAD(&ma->list, maa, next);
 2373         tbuf = (void *)(maa + 1);
 2374         ma->error = copyinstr(val, tbuf, len, NULL);
 2375         return (mount_arg(ma, name, tbuf, -1));
 2376 }
 2377 
 2378 /*
 2379  * Plain argument.
 2380  *
 2381  * If length is -1, treat value as a C string.
 2382  */
 2383 struct mntarg *
 2384 mount_arg(struct mntarg *ma, const char *name, const void *val, int len)
 2385 {
 2386 
 2387         if (ma == NULL) {
 2388                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2389                 SLIST_INIT(&ma->list);
 2390         }
 2391         if (ma->error)
 2392                 return (ma);
 2393 
 2394         ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 2395             M_MOUNT, M_WAITOK);
 2396         ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 2397         ma->v[ma->len].iov_len = strlen(name) + 1;
 2398         ma->len++;
 2399 
 2400         ma->v[ma->len].iov_base = (void *)(uintptr_t)val;
 2401         if (len < 0)
 2402                 ma->v[ma->len].iov_len = strlen(val) + 1;
 2403         else
 2404                 ma->v[ma->len].iov_len = len;
 2405         ma->len++;
 2406         return (ma);
 2407 }
 2408 
 2409 /*
 2410  * Free a mntarg structure
 2411  */
 2412 static void
 2413 free_mntarg(struct mntarg *ma)
 2414 {
 2415         struct mntaarg *maa;
 2416 
 2417         while (!SLIST_EMPTY(&ma->list)) {
 2418                 maa = SLIST_FIRST(&ma->list);
 2419                 SLIST_REMOVE_HEAD(&ma->list, next);
 2420                 free(maa, M_MOUNT);
 2421         }
 2422         free(ma->v, M_MOUNT);
 2423         free(ma, M_MOUNT);
 2424 }
 2425 
 2426 /*
 2427  * Mount a filesystem
 2428  */
 2429 int
 2430 kernel_mount(struct mntarg *ma, uint64_t flags)
 2431 {
 2432         struct uio auio;
 2433         int error;
 2434 
 2435         KASSERT(ma != NULL, ("kernel_mount NULL ma"));
 2436         KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v"));
 2437         KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len));
 2438 
 2439         auio.uio_iov = ma->v;
 2440         auio.uio_iovcnt = ma->len;
 2441         auio.uio_segflg = UIO_SYSSPACE;
 2442 
 2443         error = ma->error;
 2444         if (!error)
 2445                 error = vfs_donmount(curthread, flags, &auio);
 2446         free_mntarg(ma);
 2447         return (error);
 2448 }
 2449 
 2450 /*
 2451  * A printflike function to mount a filesystem.
 2452  */
 2453 int
 2454 kernel_vmount(int flags, ...)
 2455 {
 2456         struct mntarg *ma = NULL;
 2457         va_list ap;
 2458         const char *cp;
 2459         const void *vp;
 2460         int error;
 2461 
 2462         va_start(ap, flags);
 2463         for (;;) {
 2464                 cp = va_arg(ap, const char *);
 2465                 if (cp == NULL)
 2466                         break;
 2467                 vp = va_arg(ap, const void *);
 2468                 ma = mount_arg(ma, cp, vp, (vp != NULL ? -1 : 0));
 2469         }
 2470         va_end(ap);
 2471 
 2472         error = kernel_mount(ma, flags);
 2473         return (error);
 2474 }
 2475 
 2476 /* Map from mount options to printable formats. */
 2477 static struct mntoptnames optnames[] = {
 2478         MNTOPT_NAMES
 2479 };
 2480 
 2481 static void
 2482 mount_devctl_event_mntopt(struct sbuf *sb, const char *what, struct vfsoptlist *opts)
 2483 {
 2484         struct vfsopt *opt;
 2485 
 2486         if (opts == NULL || TAILQ_EMPTY(opts))
 2487                 return;
 2488         sbuf_printf(sb, " %s=\"", what);
 2489         TAILQ_FOREACH(opt, opts, link) {
 2490                 if (opt->name[0] == '\0' || (opt->len > 0 && *(char *)opt->value == '\0'))
 2491                         continue;
 2492                 devctl_safe_quote_sb(sb, opt->name);
 2493                 if (opt->len > 0) {
 2494                         sbuf_putc(sb, '=');
 2495                         devctl_safe_quote_sb(sb, opt->value);
 2496                 }
 2497                 sbuf_putc(sb, ';');
 2498         }
 2499         sbuf_putc(sb, '"');
 2500 }
 2501 
 2502 #define DEVCTL_LEN 1024
 2503 static void
 2504 mount_devctl_event(const char *type, struct mount *mp, bool donew)
 2505 {
 2506         const uint8_t *cp;
 2507         struct mntoptnames *fp;
 2508         struct sbuf sb;
 2509         struct statfs *sfp = &mp->mnt_stat;
 2510         char *buf;
 2511 
 2512         buf = malloc(DEVCTL_LEN, M_MOUNT, M_NOWAIT);
 2513         if (buf == NULL)
 2514                 return;
 2515         sbuf_new(&sb, buf, DEVCTL_LEN, SBUF_FIXEDLEN);
 2516         sbuf_cpy(&sb, "mount-point=\"");
 2517         devctl_safe_quote_sb(&sb, sfp->f_mntonname);
 2518         sbuf_cat(&sb, "\" mount-dev=\"");
 2519         devctl_safe_quote_sb(&sb, sfp->f_mntfromname);
 2520         sbuf_cat(&sb, "\" mount-type=\"");
 2521         devctl_safe_quote_sb(&sb, sfp->f_fstypename);
 2522         sbuf_cat(&sb, "\" fsid=0x");
 2523         cp = (const uint8_t *)&sfp->f_fsid.val[0];
 2524         for (int i = 0; i < sizeof(sfp->f_fsid); i++)
 2525                 sbuf_printf(&sb, "%02x", cp[i]);
 2526         sbuf_printf(&sb, " owner=%u flags=\"", sfp->f_owner);
 2527         for (fp = optnames; fp->o_opt != 0; fp++) {
 2528                 if ((mp->mnt_flag & fp->o_opt) != 0) {
 2529                         sbuf_cat(&sb, fp->o_name);
 2530                         sbuf_putc(&sb, ';');
 2531                 }
 2532         }
 2533         sbuf_putc(&sb, '"');
 2534         mount_devctl_event_mntopt(&sb, "opt", mp->mnt_opt);
 2535         if (donew)
 2536                 mount_devctl_event_mntopt(&sb, "optnew", mp->mnt_optnew);
 2537         sbuf_finish(&sb);
 2538 
 2539         if (sbuf_error(&sb) == 0)
 2540                 devctl_notify("VFS", "FS", type, sbuf_data(&sb));
 2541         sbuf_delete(&sb);
 2542         free(buf, M_MOUNT);
 2543 }
 2544 
 2545 /*
 2546  * Suspend write operations on all local writeable filesystems.  Does
 2547  * full sync of them in the process.
 2548  *
 2549  * Iterate over the mount points in reverse order, suspending most
 2550  * recently mounted filesystems first.  It handles a case where a
 2551  * filesystem mounted from a md(4) vnode-backed device should be
 2552  * suspended before the filesystem that owns the vnode.
 2553  */
 2554 void
 2555 suspend_all_fs(void)
 2556 {
 2557         struct mount *mp;
 2558         int error;
 2559 
 2560         mtx_lock(&mountlist_mtx);
 2561         TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 2562                 error = vfs_busy(mp, MBF_MNTLSTLOCK | MBF_NOWAIT);
 2563                 if (error != 0)
 2564                         continue;
 2565                 if ((mp->mnt_flag & (MNT_RDONLY | MNT_LOCAL)) != MNT_LOCAL ||
 2566                     (mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
 2567                         mtx_lock(&mountlist_mtx);
 2568                         vfs_unbusy(mp);
 2569                         continue;
 2570                 }
 2571                 error = vfs_write_suspend(mp, 0);
 2572                 if (error == 0) {
 2573                         MNT_ILOCK(mp);
 2574                         MPASS((mp->mnt_kern_flag & MNTK_SUSPEND_ALL) == 0);
 2575                         mp->mnt_kern_flag |= MNTK_SUSPEND_ALL;
 2576                         MNT_IUNLOCK(mp);
 2577                         mtx_lock(&mountlist_mtx);
 2578                 } else {
 2579                         printf("suspend of %s failed, error %d\n",
 2580                             mp->mnt_stat.f_mntonname, error);
 2581                         mtx_lock(&mountlist_mtx);
 2582                         vfs_unbusy(mp);
 2583                 }
 2584         }
 2585         mtx_unlock(&mountlist_mtx);
 2586 }
 2587 
 2588 void
 2589 resume_all_fs(void)
 2590 {
 2591         struct mount *mp;
 2592 
 2593         mtx_lock(&mountlist_mtx);
 2594         TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 2595                 if ((mp->mnt_kern_flag & MNTK_SUSPEND_ALL) == 0)
 2596                         continue;
 2597                 mtx_unlock(&mountlist_mtx);
 2598                 MNT_ILOCK(mp);
 2599                 MPASS((mp->mnt_kern_flag & MNTK_SUSPEND) != 0);
 2600                 mp->mnt_kern_flag &= ~MNTK_SUSPEND_ALL;
 2601                 MNT_IUNLOCK(mp);
 2602                 vfs_write_resume(mp, 0);
 2603                 mtx_lock(&mountlist_mtx);
 2604                 vfs_unbusy(mp);
 2605         }
 2606         mtx_unlock(&mountlist_mtx);
 2607 }

Cache object: 172d89a6b2d3558b282b41423bcb4fa1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.