The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_mount.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1999-2004 Poul-Henning Kamp
    5  * Copyright (c) 1999 Michael Smith
    6  * Copyright (c) 1989, 1993
    7  *      The Regents of the University of California.  All rights reserved.
    8  * (c) UNIX System Laboratories, Inc.
    9  * All or some portions of this file are derived from material licensed
   10  * to the University of California by American Telephone and Telegraph
   11  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   12  * the permission of UNIX System Laboratories, Inc.
   13  *
   14  * Redistribution and use in source and binary forms, with or without
   15  * modification, are permitted provided that the following conditions
   16  * are met:
   17  * 1. Redistributions of source code must retain the above copyright
   18  *    notice, this list of conditions and the following disclaimer.
   19  * 2. Redistributions in binary form must reproduce the above copyright
   20  *    notice, this list of conditions and the following disclaimer in the
   21  *    documentation and/or other materials provided with the distribution.
   22  * 3. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __FBSDID("$FreeBSD$");
   41 
   42 #include <sys/param.h>
   43 #include <sys/conf.h>
   44 #include <sys/smp.h>
   45 #include <sys/devctl.h>
   46 #include <sys/eventhandler.h>
   47 #include <sys/fcntl.h>
   48 #include <sys/jail.h>
   49 #include <sys/kernel.h>
   50 #include <sys/ktr.h>
   51 #include <sys/libkern.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mount.h>
   54 #include <sys/mutex.h>
   55 #include <sys/namei.h>
   56 #include <sys/priv.h>
   57 #include <sys/proc.h>
   58 #include <sys/filedesc.h>
   59 #include <sys/reboot.h>
   60 #include <sys/sbuf.h>
   61 #include <sys/syscallsubr.h>
   62 #include <sys/sysproto.h>
   63 #include <sys/sx.h>
   64 #include <sys/sysctl.h>
   65 #include <sys/systm.h>
   66 #include <sys/vnode.h>
   67 #include <vm/uma.h>
   68 
   69 #include <geom/geom.h>
   70 
   71 #include <machine/stdarg.h>
   72 
   73 #include <security/audit/audit.h>
   74 #include <security/mac/mac_framework.h>
   75 
   76 #define VFS_MOUNTARG_SIZE_MAX   (1024 * 64)
   77 
   78 static int      vfs_domount(struct thread *td, const char *fstype, char *fspath,
   79                     uint64_t fsflags, struct vfsoptlist **optlist);
   80 static void     free_mntarg(struct mntarg *ma);
   81 
   82 static int      usermount = 0;
   83 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
   84     "Unprivileged users may mount and unmount file systems");
   85 
   86 static bool     default_autoro = false;
   87 SYSCTL_BOOL(_vfs, OID_AUTO, default_autoro, CTLFLAG_RW, &default_autoro, 0,
   88     "Retry failed r/w mount as r/o if no explicit ro/rw option is specified");
   89 
   90 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
   91 MALLOC_DEFINE(M_STATFS, "statfs", "statfs structure");
   92 static uma_zone_t mount_zone;
   93 
   94 /* List of mounted filesystems. */
   95 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
   96 
   97 /* For any iteration/modification of mountlist */
   98 struct mtx_padalign __exclusive_cache_line mountlist_mtx;
   99 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF);
  100 
  101 EVENTHANDLER_LIST_DEFINE(vfs_mounted);
  102 EVENTHANDLER_LIST_DEFINE(vfs_unmounted);
  103 
  104 static void mount_devctl_event(const char *type, struct mount *mp, bool donew);
  105 
  106 /*
  107  * Global opts, taken by all filesystems
  108  */
  109 static const char *global_opts[] = {
  110         "errmsg",
  111         "fstype",
  112         "fspath",
  113         "ro",
  114         "rw",
  115         "nosuid",
  116         "noexec",
  117         NULL
  118 };
  119 
  120 static int
  121 mount_init(void *mem, int size, int flags)
  122 {
  123         struct mount *mp;
  124 
  125         mp = (struct mount *)mem;
  126         mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
  127         mtx_init(&mp->mnt_listmtx, "struct mount vlist mtx", NULL, MTX_DEF);
  128         lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
  129         mp->mnt_pcpu = uma_zalloc_pcpu(pcpu_zone_16, M_WAITOK | M_ZERO);
  130         mp->mnt_ref = 0;
  131         mp->mnt_vfs_ops = 1;
  132         mp->mnt_rootvnode = NULL;
  133         return (0);
  134 }
  135 
  136 static void
  137 mount_fini(void *mem, int size)
  138 {
  139         struct mount *mp;
  140 
  141         mp = (struct mount *)mem;
  142         uma_zfree_pcpu(pcpu_zone_16, mp->mnt_pcpu);
  143         lockdestroy(&mp->mnt_explock);
  144         mtx_destroy(&mp->mnt_listmtx);
  145         mtx_destroy(&mp->mnt_mtx);
  146 }
  147 
  148 static void
  149 vfs_mount_init(void *dummy __unused)
  150 {
  151 
  152         mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount), NULL,
  153             NULL, mount_init, mount_fini, UMA_ALIGN_CACHE, UMA_ZONE_NOFREE);
  154 }
  155 SYSINIT(vfs_mount, SI_SUB_VFS, SI_ORDER_ANY, vfs_mount_init, NULL);
  156 
  157 /*
  158  * ---------------------------------------------------------------------
  159  * Functions for building and sanitizing the mount options
  160  */
  161 
  162 /* Remove one mount option. */
  163 static void
  164 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
  165 {
  166 
  167         TAILQ_REMOVE(opts, opt, link);
  168         free(opt->name, M_MOUNT);
  169         if (opt->value != NULL)
  170                 free(opt->value, M_MOUNT);
  171         free(opt, M_MOUNT);
  172 }
  173 
  174 /* Release all resources related to the mount options. */
  175 void
  176 vfs_freeopts(struct vfsoptlist *opts)
  177 {
  178         struct vfsopt *opt;
  179 
  180         while (!TAILQ_EMPTY(opts)) {
  181                 opt = TAILQ_FIRST(opts);
  182                 vfs_freeopt(opts, opt);
  183         }
  184         free(opts, M_MOUNT);
  185 }
  186 
  187 void
  188 vfs_deleteopt(struct vfsoptlist *opts, const char *name)
  189 {
  190         struct vfsopt *opt, *temp;
  191 
  192         if (opts == NULL)
  193                 return;
  194         TAILQ_FOREACH_SAFE(opt, opts, link, temp)  {
  195                 if (strcmp(opt->name, name) == 0)
  196                         vfs_freeopt(opts, opt);
  197         }
  198 }
  199 
  200 static int
  201 vfs_isopt_ro(const char *opt)
  202 {
  203 
  204         if (strcmp(opt, "ro") == 0 || strcmp(opt, "rdonly") == 0 ||
  205             strcmp(opt, "norw") == 0)
  206                 return (1);
  207         return (0);
  208 }
  209 
  210 static int
  211 vfs_isopt_rw(const char *opt)
  212 {
  213 
  214         if (strcmp(opt, "rw") == 0 || strcmp(opt, "noro") == 0)
  215                 return (1);
  216         return (0);
  217 }
  218 
  219 /*
  220  * Check if options are equal (with or without the "no" prefix).
  221  */
  222 static int
  223 vfs_equalopts(const char *opt1, const char *opt2)
  224 {
  225         char *p;
  226 
  227         /* "opt" vs. "opt" or "noopt" vs. "noopt" */
  228         if (strcmp(opt1, opt2) == 0)
  229                 return (1);
  230         /* "noopt" vs. "opt" */
  231         if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
  232                 return (1);
  233         /* "opt" vs. "noopt" */
  234         if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
  235                 return (1);
  236         while ((p = strchr(opt1, '.')) != NULL &&
  237             !strncmp(opt1, opt2, ++p - opt1)) {
  238                 opt2 += p - opt1;
  239                 opt1 = p;
  240                 /* "foo.noopt" vs. "foo.opt" */
  241                 if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
  242                         return (1);
  243                 /* "foo.opt" vs. "foo.noopt" */
  244                 if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
  245                         return (1);
  246         }
  247         /* "ro" / "rdonly" / "norw" / "rw" / "noro" */
  248         if ((vfs_isopt_ro(opt1) || vfs_isopt_rw(opt1)) &&
  249             (vfs_isopt_ro(opt2) || vfs_isopt_rw(opt2)))
  250                 return (1);
  251         return (0);
  252 }
  253 
  254 /*
  255  * If a mount option is specified several times,
  256  * (with or without the "no" prefix) only keep
  257  * the last occurrence of it.
  258  */
  259 static void
  260 vfs_sanitizeopts(struct vfsoptlist *opts)
  261 {
  262         struct vfsopt *opt, *opt2, *tmp;
  263 
  264         TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
  265                 opt2 = TAILQ_PREV(opt, vfsoptlist, link);
  266                 while (opt2 != NULL) {
  267                         if (vfs_equalopts(opt->name, opt2->name)) {
  268                                 tmp = TAILQ_PREV(opt2, vfsoptlist, link);
  269                                 vfs_freeopt(opts, opt2);
  270                                 opt2 = tmp;
  271                         } else {
  272                                 opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
  273                         }
  274                 }
  275         }
  276 }
  277 
  278 /*
  279  * Build a linked list of mount options from a struct uio.
  280  */
  281 int
  282 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
  283 {
  284         struct vfsoptlist *opts;
  285         struct vfsopt *opt;
  286         size_t memused, namelen, optlen;
  287         unsigned int i, iovcnt;
  288         int error;
  289 
  290         opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
  291         TAILQ_INIT(opts);
  292         memused = 0;
  293         iovcnt = auio->uio_iovcnt;
  294         for (i = 0; i < iovcnt; i += 2) {
  295                 namelen = auio->uio_iov[i].iov_len;
  296                 optlen = auio->uio_iov[i + 1].iov_len;
  297                 memused += sizeof(struct vfsopt) + optlen + namelen;
  298                 /*
  299                  * Avoid consuming too much memory, and attempts to overflow
  300                  * memused.
  301                  */
  302                 if (memused > VFS_MOUNTARG_SIZE_MAX ||
  303                     optlen > VFS_MOUNTARG_SIZE_MAX ||
  304                     namelen > VFS_MOUNTARG_SIZE_MAX) {
  305                         error = EINVAL;
  306                         goto bad;
  307                 }
  308 
  309                 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  310                 opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
  311                 opt->value = NULL;
  312                 opt->len = 0;
  313                 opt->pos = i / 2;
  314                 opt->seen = 0;
  315 
  316                 /*
  317                  * Do this early, so jumps to "bad" will free the current
  318                  * option.
  319                  */
  320                 TAILQ_INSERT_TAIL(opts, opt, link);
  321 
  322                 if (auio->uio_segflg == UIO_SYSSPACE) {
  323                         bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
  324                 } else {
  325                         error = copyin(auio->uio_iov[i].iov_base, opt->name,
  326                             namelen);
  327                         if (error)
  328                                 goto bad;
  329                 }
  330                 /* Ensure names are null-terminated strings. */
  331                 if (namelen == 0 || opt->name[namelen - 1] != '\0') {
  332                         error = EINVAL;
  333                         goto bad;
  334                 }
  335                 if (optlen != 0) {
  336                         opt->len = optlen;
  337                         opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
  338                         if (auio->uio_segflg == UIO_SYSSPACE) {
  339                                 bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
  340                                     optlen);
  341                         } else {
  342                                 error = copyin(auio->uio_iov[i + 1].iov_base,
  343                                     opt->value, optlen);
  344                                 if (error)
  345                                         goto bad;
  346                         }
  347                 }
  348         }
  349         vfs_sanitizeopts(opts);
  350         *options = opts;
  351         return (0);
  352 bad:
  353         vfs_freeopts(opts);
  354         return (error);
  355 }
  356 
  357 /*
  358  * Merge the old mount options with the new ones passed
  359  * in the MNT_UPDATE case.
  360  *
  361  * XXX: This function will keep a "nofoo" option in the new
  362  * options.  E.g, if the option's canonical name is "foo",
  363  * "nofoo" ends up in the mount point's active options.
  364  */
  365 static void
  366 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *oldopts)
  367 {
  368         struct vfsopt *opt, *new;
  369 
  370         TAILQ_FOREACH(opt, oldopts, link) {
  371                 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  372                 new->name = strdup(opt->name, M_MOUNT);
  373                 if (opt->len != 0) {
  374                         new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
  375                         bcopy(opt->value, new->value, opt->len);
  376                 } else
  377                         new->value = NULL;
  378                 new->len = opt->len;
  379                 new->seen = opt->seen;
  380                 TAILQ_INSERT_HEAD(toopts, new, link);
  381         }
  382         vfs_sanitizeopts(toopts);
  383 }
  384 
  385 /*
  386  * Mount a filesystem.
  387  */
  388 #ifndef _SYS_SYSPROTO_H_
  389 struct nmount_args {
  390         struct iovec *iovp;
  391         unsigned int iovcnt;
  392         int flags;
  393 };
  394 #endif
  395 int
  396 sys_nmount(struct thread *td, struct nmount_args *uap)
  397 {
  398         struct uio *auio;
  399         int error;
  400         u_int iovcnt;
  401         uint64_t flags;
  402 
  403         /*
  404          * Mount flags are now 64-bits. On 32-bit archtectures only
  405          * 32-bits are passed in, but from here on everything handles
  406          * 64-bit flags correctly.
  407          */
  408         flags = uap->flags;
  409 
  410         AUDIT_ARG_FFLAGS(flags);
  411         CTR4(KTR_VFS, "%s: iovp %p with iovcnt %d and flags %d", __func__,
  412             uap->iovp, uap->iovcnt, flags);
  413 
  414         /*
  415          * Filter out MNT_ROOTFS.  We do not want clients of nmount() in
  416          * userspace to set this flag, but we must filter it out if we want
  417          * MNT_UPDATE on the root file system to work.
  418          * MNT_ROOTFS should only be set by the kernel when mounting its
  419          * root file system.
  420          */
  421         flags &= ~MNT_ROOTFS;
  422 
  423         iovcnt = uap->iovcnt;
  424         /*
  425          * Check that we have an even number of iovec's
  426          * and that we have at least two options.
  427          */
  428         if ((iovcnt & 1) || (iovcnt < 4)) {
  429                 CTR2(KTR_VFS, "%s: failed for invalid iovcnt %d", __func__,
  430                     uap->iovcnt);
  431                 return (EINVAL);
  432         }
  433 
  434         error = copyinuio(uap->iovp, iovcnt, &auio);
  435         if (error) {
  436                 CTR2(KTR_VFS, "%s: failed for invalid uio op with %d errno",
  437                     __func__, error);
  438                 return (error);
  439         }
  440         error = vfs_donmount(td, flags, auio);
  441 
  442         free(auio, M_IOV);
  443         return (error);
  444 }
  445 
  446 /*
  447  * ---------------------------------------------------------------------
  448  * Various utility functions
  449  */
  450 
  451 /*
  452  * Get a reference on a mount point from a vnode.
  453  *
  454  * The vnode is allowed to be passed unlocked and race against dooming. Note in
  455  * such case there are no guarantees the referenced mount point will still be
  456  * associated with it after the function returns.
  457  */
  458 struct mount *
  459 vfs_ref_from_vp(struct vnode *vp)
  460 {
  461         struct mount *mp;
  462         struct mount_pcpu *mpcpu;
  463 
  464         mp = atomic_load_ptr(&vp->v_mount);
  465         if (__predict_false(mp == NULL)) {
  466                 return (mp);
  467         }
  468         if (vfs_op_thread_enter(mp, mpcpu)) {
  469                 if (__predict_true(mp == vp->v_mount)) {
  470                         vfs_mp_count_add_pcpu(mpcpu, ref, 1);
  471                         vfs_op_thread_exit(mp, mpcpu);
  472                 } else {
  473                         vfs_op_thread_exit(mp, mpcpu);
  474                         mp = NULL;
  475                 }
  476         } else {
  477                 MNT_ILOCK(mp);
  478                 if (mp == vp->v_mount) {
  479                         MNT_REF(mp);
  480                         MNT_IUNLOCK(mp);
  481                 } else {
  482                         MNT_IUNLOCK(mp);
  483                         mp = NULL;
  484                 }
  485         }
  486         return (mp);
  487 }
  488 
  489 void
  490 vfs_ref(struct mount *mp)
  491 {
  492         struct mount_pcpu *mpcpu;
  493 
  494         CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
  495         if (vfs_op_thread_enter(mp, mpcpu)) {
  496                 vfs_mp_count_add_pcpu(mpcpu, ref, 1);
  497                 vfs_op_thread_exit(mp, mpcpu);
  498                 return;
  499         }
  500 
  501         MNT_ILOCK(mp);
  502         MNT_REF(mp);
  503         MNT_IUNLOCK(mp);
  504 }
  505 
  506 void
  507 vfs_rel(struct mount *mp)
  508 {
  509         struct mount_pcpu *mpcpu;
  510 
  511         CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
  512         if (vfs_op_thread_enter(mp, mpcpu)) {
  513                 vfs_mp_count_sub_pcpu(mpcpu, ref, 1);
  514                 vfs_op_thread_exit(mp, mpcpu);
  515                 return;
  516         }
  517 
  518         MNT_ILOCK(mp);
  519         MNT_REL(mp);
  520         MNT_IUNLOCK(mp);
  521 }
  522 
  523 /*
  524  * Allocate and initialize the mount point struct.
  525  */
  526 struct mount *
  527 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath,
  528     struct ucred *cred)
  529 {
  530         struct mount *mp;
  531 
  532         mp = uma_zalloc(mount_zone, M_WAITOK);
  533         bzero(&mp->mnt_startzero,
  534             __rangeof(struct mount, mnt_startzero, mnt_endzero));
  535         mp->mnt_kern_flag = 0;
  536         mp->mnt_flag = 0;
  537         mp->mnt_rootvnode = NULL;
  538         mp->mnt_vnodecovered = NULL;
  539         mp->mnt_op = NULL;
  540         mp->mnt_vfc = NULL;
  541         TAILQ_INIT(&mp->mnt_nvnodelist);
  542         mp->mnt_nvnodelistsize = 0;
  543         TAILQ_INIT(&mp->mnt_lazyvnodelist);
  544         mp->mnt_lazyvnodelistsize = 0;
  545         MPPASS(mp->mnt_ref == 0 && mp->mnt_lockref == 0 &&
  546             mp->mnt_writeopcount == 0, mp);
  547         MPASSERT(mp->mnt_vfs_ops == 1, mp,
  548             ("vfs_ops should be 1 but %d found", mp->mnt_vfs_ops));
  549         (void) vfs_busy(mp, MBF_NOWAIT);
  550         atomic_add_acq_int(&vfsp->vfc_refcount, 1);
  551         mp->mnt_op = vfsp->vfc_vfsops;
  552         mp->mnt_vfc = vfsp;
  553         mp->mnt_stat.f_type = vfsp->vfc_typenum;
  554         mp->mnt_gen++;
  555         strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
  556         mp->mnt_vnodecovered = vp;
  557         mp->mnt_cred = crdup(cred);
  558         mp->mnt_stat.f_owner = cred->cr_uid;
  559         strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
  560         mp->mnt_iosize_max = DFLTPHYS;
  561 #ifdef MAC
  562         mac_mount_init(mp);
  563         mac_mount_create(cred, mp);
  564 #endif
  565         arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
  566         TAILQ_INIT(&mp->mnt_uppers);
  567         return (mp);
  568 }
  569 
  570 /*
  571  * Destroy the mount struct previously allocated by vfs_mount_alloc().
  572  */
  573 void
  574 vfs_mount_destroy(struct mount *mp)
  575 {
  576 
  577         MPPASS(mp->mnt_vfs_ops != 0, mp);
  578 
  579         vfs_assert_mount_counters(mp);
  580 
  581         MNT_ILOCK(mp);
  582         mp->mnt_kern_flag |= MNTK_REFEXPIRE;
  583         if (mp->mnt_kern_flag & MNTK_MWAIT) {
  584                 mp->mnt_kern_flag &= ~MNTK_MWAIT;
  585                 wakeup(mp);
  586         }
  587         while (mp->mnt_ref)
  588                 msleep(mp, MNT_MTX(mp), PVFS, "mntref", 0);
  589         KASSERT(mp->mnt_ref == 0,
  590             ("%s: invalid refcount in the drain path @ %s:%d", __func__,
  591             __FILE__, __LINE__));
  592         MPPASS(mp->mnt_writeopcount == 0, mp);
  593         MPPASS(mp->mnt_secondary_writes == 0, mp);
  594         atomic_subtract_rel_int(&mp->mnt_vfc->vfc_refcount, 1);
  595         if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
  596                 struct vnode *vp;
  597 
  598                 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes)
  599                         vn_printf(vp, "dangling vnode ");
  600                 panic("unmount: dangling vnode");
  601         }
  602         KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers"));
  603         MPPASS(mp->mnt_nvnodelistsize == 0, mp);
  604         MPPASS(mp->mnt_lazyvnodelistsize == 0, mp);
  605         MPPASS(mp->mnt_lockref == 0, mp);
  606         MNT_IUNLOCK(mp);
  607 
  608         MPASSERT(mp->mnt_vfs_ops == 1, mp,
  609             ("vfs_ops should be 1 but %d found", mp->mnt_vfs_ops));
  610 
  611         MPASSERT(mp->mnt_rootvnode == NULL, mp,
  612             ("mount point still has a root vnode %p", mp->mnt_rootvnode));
  613 
  614         if (mp->mnt_vnodecovered != NULL)
  615                 vrele(mp->mnt_vnodecovered);
  616 #ifdef MAC
  617         mac_mount_destroy(mp);
  618 #endif
  619         if (mp->mnt_opt != NULL)
  620                 vfs_freeopts(mp->mnt_opt);
  621         crfree(mp->mnt_cred);
  622         uma_zfree(mount_zone, mp);
  623 }
  624 
  625 static bool
  626 vfs_should_downgrade_to_ro_mount(uint64_t fsflags, int error)
  627 {
  628         /* This is an upgrade of an exisiting mount. */
  629         if ((fsflags & MNT_UPDATE) != 0)
  630                 return (false);
  631         /* This is already an R/O mount. */
  632         if ((fsflags & MNT_RDONLY) != 0)
  633                 return (false);
  634 
  635         switch (error) {
  636         case ENODEV:    /* generic, geom, ... */
  637         case EACCES:    /* cam/scsi, ... */
  638         case EROFS:     /* md, mmcsd, ... */
  639                 /*
  640                  * These errors can be returned by the storage layer to signal
  641                  * that the media is read-only.  No harm in the R/O mount
  642                  * attempt if the error was returned for some other reason.
  643                  */
  644                 return (true);
  645         default:
  646                 return (false);
  647         }
  648 }
  649 
  650 int
  651 vfs_donmount(struct thread *td, uint64_t fsflags, struct uio *fsoptions)
  652 {
  653         struct vfsoptlist *optlist;
  654         struct vfsopt *opt, *tmp_opt;
  655         char *fstype, *fspath, *errmsg;
  656         int error, fstypelen, fspathlen, errmsg_len, errmsg_pos;
  657         bool autoro;
  658 
  659         errmsg = fspath = NULL;
  660         errmsg_len = fspathlen = 0;
  661         errmsg_pos = -1;
  662         autoro = default_autoro;
  663 
  664         error = vfs_buildopts(fsoptions, &optlist);
  665         if (error)
  666                 return (error);
  667 
  668         if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0)
  669                 errmsg_pos = vfs_getopt_pos(optlist, "errmsg");
  670 
  671         /*
  672          * We need these two options before the others,
  673          * and they are mandatory for any filesystem.
  674          * Ensure they are NUL terminated as well.
  675          */
  676         fstypelen = 0;
  677         error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
  678         if (error || fstypelen <= 0 || fstype[fstypelen - 1] != '\0') {
  679                 error = EINVAL;
  680                 if (errmsg != NULL)
  681                         strncpy(errmsg, "Invalid fstype", errmsg_len);
  682                 goto bail;
  683         }
  684         fspathlen = 0;
  685         error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
  686         if (error || fspathlen <= 0 || fspath[fspathlen - 1] != '\0') {
  687                 error = EINVAL;
  688                 if (errmsg != NULL)
  689                         strncpy(errmsg, "Invalid fspath", errmsg_len);
  690                 goto bail;
  691         }
  692 
  693         /*
  694          * We need to see if we have the "update" option
  695          * before we call vfs_domount(), since vfs_domount() has special
  696          * logic based on MNT_UPDATE.  This is very important
  697          * when we want to update the root filesystem.
  698          */
  699         TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) {
  700                 int do_freeopt = 0;
  701 
  702                 if (strcmp(opt->name, "update") == 0) {
  703                         fsflags |= MNT_UPDATE;
  704                         do_freeopt = 1;
  705                 }
  706                 else if (strcmp(opt->name, "async") == 0)
  707                         fsflags |= MNT_ASYNC;
  708                 else if (strcmp(opt->name, "force") == 0) {
  709                         fsflags |= MNT_FORCE;
  710                         do_freeopt = 1;
  711                 }
  712                 else if (strcmp(opt->name, "reload") == 0) {
  713                         fsflags |= MNT_RELOAD;
  714                         do_freeopt = 1;
  715                 }
  716                 else if (strcmp(opt->name, "multilabel") == 0)
  717                         fsflags |= MNT_MULTILABEL;
  718                 else if (strcmp(opt->name, "noasync") == 0)
  719                         fsflags &= ~MNT_ASYNC;
  720                 else if (strcmp(opt->name, "noatime") == 0)
  721                         fsflags |= MNT_NOATIME;
  722                 else if (strcmp(opt->name, "atime") == 0) {
  723                         free(opt->name, M_MOUNT);
  724                         opt->name = strdup("nonoatime", M_MOUNT);
  725                 }
  726                 else if (strcmp(opt->name, "noclusterr") == 0)
  727                         fsflags |= MNT_NOCLUSTERR;
  728                 else if (strcmp(opt->name, "clusterr") == 0) {
  729                         free(opt->name, M_MOUNT);
  730                         opt->name = strdup("nonoclusterr", M_MOUNT);
  731                 }
  732                 else if (strcmp(opt->name, "noclusterw") == 0)
  733                         fsflags |= MNT_NOCLUSTERW;
  734                 else if (strcmp(opt->name, "clusterw") == 0) {
  735                         free(opt->name, M_MOUNT);
  736                         opt->name = strdup("nonoclusterw", M_MOUNT);
  737                 }
  738                 else if (strcmp(opt->name, "noexec") == 0)
  739                         fsflags |= MNT_NOEXEC;
  740                 else if (strcmp(opt->name, "exec") == 0) {
  741                         free(opt->name, M_MOUNT);
  742                         opt->name = strdup("nonoexec", M_MOUNT);
  743                 }
  744                 else if (strcmp(opt->name, "nosuid") == 0)
  745                         fsflags |= MNT_NOSUID;
  746                 else if (strcmp(opt->name, "suid") == 0) {
  747                         free(opt->name, M_MOUNT);
  748                         opt->name = strdup("nonosuid", M_MOUNT);
  749                 }
  750                 else if (strcmp(opt->name, "nosymfollow") == 0)
  751                         fsflags |= MNT_NOSYMFOLLOW;
  752                 else if (strcmp(opt->name, "symfollow") == 0) {
  753                         free(opt->name, M_MOUNT);
  754                         opt->name = strdup("nonosymfollow", M_MOUNT);
  755                 }
  756                 else if (strcmp(opt->name, "noro") == 0) {
  757                         fsflags &= ~MNT_RDONLY;
  758                         autoro = false;
  759                 }
  760                 else if (strcmp(opt->name, "rw") == 0) {
  761                         fsflags &= ~MNT_RDONLY;
  762                         autoro = false;
  763                 }
  764                 else if (strcmp(opt->name, "ro") == 0) {
  765                         fsflags |= MNT_RDONLY;
  766                         autoro = false;
  767                 }
  768                 else if (strcmp(opt->name, "rdonly") == 0) {
  769                         free(opt->name, M_MOUNT);
  770                         opt->name = strdup("ro", M_MOUNT);
  771                         fsflags |= MNT_RDONLY;
  772                         autoro = false;
  773                 }
  774                 else if (strcmp(opt->name, "autoro") == 0) {
  775                         do_freeopt = 1;
  776                         autoro = true;
  777                 }
  778                 else if (strcmp(opt->name, "suiddir") == 0)
  779                         fsflags |= MNT_SUIDDIR;
  780                 else if (strcmp(opt->name, "sync") == 0)
  781                         fsflags |= MNT_SYNCHRONOUS;
  782                 else if (strcmp(opt->name, "union") == 0)
  783                         fsflags |= MNT_UNION;
  784                 else if (strcmp(opt->name, "export") == 0)
  785                         fsflags |= MNT_EXPORTED;
  786                 else if (strcmp(opt->name, "automounted") == 0) {
  787                         fsflags |= MNT_AUTOMOUNTED;
  788                         do_freeopt = 1;
  789                 } else if (strcmp(opt->name, "nocover") == 0) {
  790                         fsflags |= MNT_NOCOVER;
  791                         do_freeopt = 1;
  792                 } else if (strcmp(opt->name, "cover") == 0) {
  793                         fsflags &= ~MNT_NOCOVER;
  794                         do_freeopt = 1;
  795                 } else if (strcmp(opt->name, "emptydir") == 0) {
  796                         fsflags |= MNT_EMPTYDIR;
  797                         do_freeopt = 1;
  798                 } else if (strcmp(opt->name, "noemptydir") == 0) {
  799                         fsflags &= ~MNT_EMPTYDIR;
  800                         do_freeopt = 1;
  801                 }
  802                 if (do_freeopt)
  803                         vfs_freeopt(optlist, opt);
  804         }
  805 
  806         /*
  807          * Be ultra-paranoid about making sure the type and fspath
  808          * variables will fit in our mp buffers, including the
  809          * terminating NUL.
  810          */
  811         if (fstypelen > MFSNAMELEN || fspathlen > MNAMELEN) {
  812                 error = ENAMETOOLONG;
  813                 goto bail;
  814         }
  815 
  816         error = vfs_domount(td, fstype, fspath, fsflags, &optlist);
  817         if (error == ENOENT) {
  818                 error = EINVAL;
  819                 if (errmsg != NULL)
  820                         strncpy(errmsg, "Invalid fstype", errmsg_len);
  821                 goto bail;
  822         }
  823 
  824         /*
  825          * See if we can mount in the read-only mode if the error code suggests
  826          * that it could be possible and the mount options allow for that.
  827          * Never try it if "[no]{ro|rw}" has been explicitly requested and not
  828          * overridden by "autoro".
  829          */
  830         if (autoro && vfs_should_downgrade_to_ro_mount(fsflags, error)) {
  831                 printf("%s: R/W mount failed, possibly R/O media,"
  832                     " trying R/O mount\n", __func__);
  833                 fsflags |= MNT_RDONLY;
  834                 error = vfs_domount(td, fstype, fspath, fsflags, &optlist);
  835         }
  836 bail:
  837         /* copyout the errmsg */
  838         if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt)
  839             && errmsg_len > 0 && errmsg != NULL) {
  840                 if (fsoptions->uio_segflg == UIO_SYSSPACE) {
  841                         bcopy(errmsg,
  842                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
  843                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
  844                 } else {
  845                         copyout(errmsg,
  846                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
  847                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
  848                 }
  849         }
  850 
  851         if (optlist != NULL)
  852                 vfs_freeopts(optlist);
  853         return (error);
  854 }
  855 
  856 /*
  857  * Old mount API.
  858  */
  859 #ifndef _SYS_SYSPROTO_H_
  860 struct mount_args {
  861         char    *type;
  862         char    *path;
  863         int     flags;
  864         caddr_t data;
  865 };
  866 #endif
  867 /* ARGSUSED */
  868 int
  869 sys_mount(struct thread *td, struct mount_args *uap)
  870 {
  871         char *fstype;
  872         struct vfsconf *vfsp = NULL;
  873         struct mntarg *ma = NULL;
  874         uint64_t flags;
  875         int error;
  876 
  877         /*
  878          * Mount flags are now 64-bits. On 32-bit architectures only
  879          * 32-bits are passed in, but from here on everything handles
  880          * 64-bit flags correctly.
  881          */
  882         flags = uap->flags;
  883 
  884         AUDIT_ARG_FFLAGS(flags);
  885 
  886         /*
  887          * Filter out MNT_ROOTFS.  We do not want clients of mount() in
  888          * userspace to set this flag, but we must filter it out if we want
  889          * MNT_UPDATE on the root file system to work.
  890          * MNT_ROOTFS should only be set by the kernel when mounting its
  891          * root file system.
  892          */
  893         flags &= ~MNT_ROOTFS;
  894 
  895         fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
  896         error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
  897         if (error) {
  898                 free(fstype, M_TEMP);
  899                 return (error);
  900         }
  901 
  902         AUDIT_ARG_TEXT(fstype);
  903         vfsp = vfs_byname_kld(fstype, td, &error);
  904         free(fstype, M_TEMP);
  905         if (vfsp == NULL)
  906                 return (ENOENT);
  907         if (((vfsp->vfc_flags & VFCF_SBDRY) != 0 &&
  908             vfsp->vfc_vfsops_sd->vfs_cmount == NULL) ||
  909             ((vfsp->vfc_flags & VFCF_SBDRY) == 0 &&
  910             vfsp->vfc_vfsops->vfs_cmount == NULL))
  911                 return (EOPNOTSUPP);
  912 
  913         ma = mount_argsu(ma, "fstype", uap->type, MFSNAMELEN);
  914         ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
  915         ma = mount_argb(ma, flags & MNT_RDONLY, "noro");
  916         ma = mount_argb(ma, !(flags & MNT_NOSUID), "nosuid");
  917         ma = mount_argb(ma, !(flags & MNT_NOEXEC), "noexec");
  918 
  919         if ((vfsp->vfc_flags & VFCF_SBDRY) != 0)
  920                 return (vfsp->vfc_vfsops_sd->vfs_cmount(ma, uap->data, flags));
  921         return (vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, flags));
  922 }
  923 
  924 /*
  925  * vfs_domount_first(): first file system mount (not update)
  926  */
  927 static int
  928 vfs_domount_first(
  929         struct thread *td,              /* Calling thread. */
  930         struct vfsconf *vfsp,           /* File system type. */
  931         char *fspath,                   /* Mount path. */
  932         struct vnode *vp,               /* Vnode to be covered. */
  933         uint64_t fsflags,               /* Flags common to all filesystems. */
  934         struct vfsoptlist **optlist     /* Options local to the filesystem. */
  935         )
  936 {
  937         struct vattr va;
  938         struct mount *mp;
  939         struct vnode *newdp, *rootvp;
  940         int error, error1;
  941         bool unmounted;
  942 
  943         ASSERT_VOP_ELOCKED(vp, __func__);
  944         KASSERT((fsflags & MNT_UPDATE) == 0, ("MNT_UPDATE shouldn't be here"));
  945 
  946         /*
  947          * If the jail of the calling thread lacks permission for this type of
  948          * file system, or is trying to cover its own root, deny immediately.
  949          */
  950         if (jailed(td->td_ucred) && (!prison_allow(td->td_ucred,
  951             vfsp->vfc_prison_flag) || vp == td->td_ucred->cr_prison->pr_root)) {
  952                 vput(vp);
  953                 return (EPERM);
  954         }
  955 
  956         /*
  957          * If the user is not root, ensure that they own the directory
  958          * onto which we are attempting to mount.
  959          */
  960         error = VOP_GETATTR(vp, &va, td->td_ucred);
  961         if (error == 0 && va.va_uid != td->td_ucred->cr_uid)
  962                 error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN);
  963         if (error == 0)
  964                 error = vinvalbuf(vp, V_SAVE, 0, 0);
  965         if (vfsp->vfc_flags & VFCF_FILEMOUNT) {
  966                 if (error == 0 && vp->v_type != VDIR && vp->v_type != VREG)
  967                         error = EINVAL;
  968                 /*
  969                  * For file mounts, ensure that there is only one hardlink to the file.
  970                  */
  971                 if (error == 0 && vp->v_type == VREG && va.va_nlink != 1)
  972                         error = EINVAL;
  973         } else {
  974                 if (error == 0 && vp->v_type != VDIR)
  975                         error = ENOTDIR;
  976         }
  977         if (error == 0 && (fsflags & MNT_EMPTYDIR) != 0)
  978                 error = vfs_emptydir(vp);
  979         if (error == 0) {
  980                 VI_LOCK(vp);
  981                 if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL)
  982                         vp->v_iflag |= VI_MOUNT;
  983                 else
  984                         error = EBUSY;
  985                 VI_UNLOCK(vp);
  986         }
  987         if (error != 0) {
  988                 vput(vp);
  989                 return (error);
  990         }
  991         vn_seqc_write_begin(vp);
  992         VOP_UNLOCK(vp);
  993 
  994         /* Allocate and initialize the filesystem. */
  995         mp = vfs_mount_alloc(vp, vfsp, fspath, td->td_ucred);
  996         /* XXXMAC: pass to vfs_mount_alloc? */
  997         mp->mnt_optnew = *optlist;
  998         /* Set the mount level flags. */
  999         mp->mnt_flag = (fsflags & (MNT_UPDATEMASK | MNT_ROOTFS | MNT_RDONLY));
 1000 
 1001         /*
 1002          * Mount the filesystem.
 1003          * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
 1004          * get.  No freeing of cn_pnbuf.
 1005          */
 1006         error1 = 0;
 1007         unmounted = true;
 1008         if ((error = VFS_MOUNT(mp)) != 0 ||
 1009             (error1 = VFS_STATFS(mp, &mp->mnt_stat)) != 0 ||
 1010             (error1 = VFS_ROOT(mp, LK_EXCLUSIVE, &newdp)) != 0) {
 1011                 rootvp = NULL;
 1012                 if (error1 != 0) {
 1013                         MPASS(error == 0);
 1014                         rootvp = vfs_cache_root_clear(mp);
 1015                         if (rootvp != NULL) {
 1016                                 vhold(rootvp);
 1017                                 vrele(rootvp);
 1018                         }
 1019                         (void)vn_start_write(NULL, &mp, V_WAIT);
 1020                         MNT_ILOCK(mp);
 1021                         mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_UNMOUNTF;
 1022                         MNT_IUNLOCK(mp);
 1023                         VFS_PURGE(mp);
 1024                         error = VFS_UNMOUNT(mp, 0);
 1025                         vn_finished_write(mp);
 1026                         if (error != 0) {
 1027                                 printf(
 1028                     "failed post-mount (%d): rollback unmount returned %d\n",
 1029                                     error1, error);
 1030                                 unmounted = false;
 1031                         }
 1032                         error = error1;
 1033                 }
 1034                 vfs_unbusy(mp);
 1035                 mp->mnt_vnodecovered = NULL;
 1036                 if (unmounted) {
 1037                         /* XXXKIB wait for mnt_lockref drain? */
 1038                         vfs_mount_destroy(mp);
 1039                 }
 1040                 VI_LOCK(vp);
 1041                 vp->v_iflag &= ~VI_MOUNT;
 1042                 VI_UNLOCK(vp);
 1043                 if (rootvp != NULL) {
 1044                         vn_seqc_write_end(rootvp);
 1045                         vdrop(rootvp);
 1046                 }
 1047                 vn_seqc_write_end(vp);
 1048                 vrele(vp);
 1049                 return (error);
 1050         }
 1051         vn_seqc_write_begin(newdp);
 1052         VOP_UNLOCK(newdp);
 1053 
 1054         if (mp->mnt_opt != NULL)
 1055                 vfs_freeopts(mp->mnt_opt);
 1056         mp->mnt_opt = mp->mnt_optnew;
 1057         *optlist = NULL;
 1058 
 1059         /*
 1060          * Prevent external consumers of mount options from reading mnt_optnew.
 1061          */
 1062         mp->mnt_optnew = NULL;
 1063 
 1064         MNT_ILOCK(mp);
 1065         if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
 1066             (mp->mnt_kern_flag & MNTK_NOASYNC) == 0)
 1067                 mp->mnt_kern_flag |= MNTK_ASYNC;
 1068         else
 1069                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1070         MNT_IUNLOCK(mp);
 1071 
 1072         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1073         cache_purge(vp);
 1074         VI_LOCK(vp);
 1075         vp->v_iflag &= ~VI_MOUNT;
 1076         vn_irflag_set_locked(vp, VIRF_MOUNTPOINT);
 1077         vp->v_mountedhere = mp;
 1078         VI_UNLOCK(vp);
 1079         /* Place the new filesystem at the end of the mount list. */
 1080         mtx_lock(&mountlist_mtx);
 1081         TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 1082         mtx_unlock(&mountlist_mtx);
 1083         vfs_event_signal(NULL, VQ_MOUNT, 0);
 1084         vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY);
 1085         VOP_UNLOCK(vp);
 1086         EVENTHANDLER_DIRECT_INVOKE(vfs_mounted, mp, newdp, td);
 1087         VOP_UNLOCK(newdp);
 1088         mount_devctl_event("MOUNT", mp, false);
 1089         mountcheckdirs(vp, newdp);
 1090         vn_seqc_write_end(vp);
 1091         vn_seqc_write_end(newdp);
 1092         vrele(newdp);
 1093         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 1094                 vfs_allocate_syncvnode(mp);
 1095         vfs_op_exit(mp);
 1096         vfs_unbusy(mp);
 1097         return (0);
 1098 }
 1099 
 1100 /*
 1101  * vfs_domount_update(): update of mounted file system
 1102  */
 1103 static int
 1104 vfs_domount_update(
 1105         struct thread *td,              /* Calling thread. */
 1106         struct vnode *vp,               /* Mount point vnode. */
 1107         uint64_t fsflags,               /* Flags common to all filesystems. */
 1108         struct vfsoptlist **optlist     /* Options local to the filesystem. */
 1109         )
 1110 {
 1111         struct export_args export;
 1112         struct o2export_args o2export;
 1113         struct vnode *rootvp;
 1114         void *bufp;
 1115         struct mount *mp;
 1116         int error, export_error, i, len;
 1117         uint64_t flag;
 1118         gid_t *grps;
 1119 
 1120         ASSERT_VOP_ELOCKED(vp, __func__);
 1121         KASSERT((fsflags & MNT_UPDATE) != 0, ("MNT_UPDATE should be here"));
 1122         mp = vp->v_mount;
 1123 
 1124         if ((vp->v_vflag & VV_ROOT) == 0) {
 1125                 if (vfs_copyopt(*optlist, "export", &export, sizeof(export))
 1126                     == 0)
 1127                         error = EXDEV;
 1128                 else
 1129                         error = EINVAL;
 1130                 vput(vp);
 1131                 return (error);
 1132         }
 1133 
 1134         /*
 1135          * We only allow the filesystem to be reloaded if it
 1136          * is currently mounted read-only.
 1137          */
 1138         flag = mp->mnt_flag;
 1139         if ((fsflags & MNT_RELOAD) != 0 && (flag & MNT_RDONLY) == 0) {
 1140                 vput(vp);
 1141                 return (EOPNOTSUPP);    /* Needs translation */
 1142         }
 1143         /*
 1144          * Only privileged root, or (if MNT_USER is set) the user that
 1145          * did the original mount is permitted to update it.
 1146          */
 1147         error = vfs_suser(mp, td);
 1148         if (error != 0) {
 1149                 vput(vp);
 1150                 return (error);
 1151         }
 1152         if (vfs_busy(mp, MBF_NOWAIT)) {
 1153                 vput(vp);
 1154                 return (EBUSY);
 1155         }
 1156         VI_LOCK(vp);
 1157         if ((vp->v_iflag & VI_MOUNT) != 0 || vp->v_mountedhere != NULL) {
 1158                 VI_UNLOCK(vp);
 1159                 vfs_unbusy(mp);
 1160                 vput(vp);
 1161                 return (EBUSY);
 1162         }
 1163         vp->v_iflag |= VI_MOUNT;
 1164         VI_UNLOCK(vp);
 1165         VOP_UNLOCK(vp);
 1166 
 1167         vfs_op_enter(mp);
 1168         vn_seqc_write_begin(vp);
 1169 
 1170         rootvp = NULL;
 1171         MNT_ILOCK(mp);
 1172         if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
 1173                 MNT_IUNLOCK(mp);
 1174                 error = EBUSY;
 1175                 goto end;
 1176         }
 1177         mp->mnt_flag &= ~MNT_UPDATEMASK;
 1178         mp->mnt_flag |= fsflags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE |
 1179             MNT_SNAPSHOT | MNT_ROOTFS | MNT_UPDATEMASK | MNT_RDONLY);
 1180         if ((mp->mnt_flag & MNT_ASYNC) == 0)
 1181                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1182         rootvp = vfs_cache_root_clear(mp);
 1183         MNT_IUNLOCK(mp);
 1184         mp->mnt_optnew = *optlist;
 1185         vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
 1186 
 1187         /*
 1188          * Mount the filesystem.
 1189          * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
 1190          * get.  No freeing of cn_pnbuf.
 1191          */
 1192         error = VFS_MOUNT(mp);
 1193 
 1194         export_error = 0;
 1195         /* Process the export option. */
 1196         if (error == 0 && vfs_getopt(mp->mnt_optnew, "export", &bufp,
 1197             &len) == 0) {
 1198                 /* Assume that there is only 1 ABI for each length. */
 1199                 switch (len) {
 1200                 case (sizeof(struct oexport_args)):
 1201                         bzero(&o2export, sizeof(o2export));
 1202                         /* FALLTHROUGH */
 1203                 case (sizeof(o2export)):
 1204                         bcopy(bufp, &o2export, len);
 1205                         export.ex_flags = (uint64_t)o2export.ex_flags;
 1206                         export.ex_root = o2export.ex_root;
 1207                         export.ex_uid = o2export.ex_anon.cr_uid;
 1208                         export.ex_groups = NULL;
 1209                         export.ex_ngroups = o2export.ex_anon.cr_ngroups;
 1210                         if (export.ex_ngroups > 0) {
 1211                                 if (export.ex_ngroups <= XU_NGROUPS) {
 1212                                         export.ex_groups = malloc(
 1213                                             export.ex_ngroups * sizeof(gid_t),
 1214                                             M_TEMP, M_WAITOK);
 1215                                         for (i = 0; i < export.ex_ngroups; i++)
 1216                                                 export.ex_groups[i] =
 1217                                                   o2export.ex_anon.cr_groups[i];
 1218                                 } else
 1219                                         export_error = EINVAL;
 1220                         } else if (export.ex_ngroups < 0)
 1221                                 export_error = EINVAL;
 1222                         export.ex_addr = o2export.ex_addr;
 1223                         export.ex_addrlen = o2export.ex_addrlen;
 1224                         export.ex_mask = o2export.ex_mask;
 1225                         export.ex_masklen = o2export.ex_masklen;
 1226                         export.ex_indexfile = o2export.ex_indexfile;
 1227                         export.ex_numsecflavors = o2export.ex_numsecflavors;
 1228                         if (export.ex_numsecflavors < MAXSECFLAVORS) {
 1229                                 for (i = 0; i < export.ex_numsecflavors; i++)
 1230                                         export.ex_secflavors[i] =
 1231                                             o2export.ex_secflavors[i];
 1232                         } else
 1233                                 export_error = EINVAL;
 1234                         if (export_error == 0)
 1235                                 export_error = vfs_export(mp, &export);
 1236                         free(export.ex_groups, M_TEMP);
 1237                         break;
 1238                 case (sizeof(export)):
 1239                         bcopy(bufp, &export, len);
 1240                         grps = NULL;
 1241                         if (export.ex_ngroups > 0) {
 1242                                 if (export.ex_ngroups <= NGROUPS_MAX) {
 1243                                         grps = malloc(export.ex_ngroups *
 1244                                             sizeof(gid_t), M_TEMP, M_WAITOK);
 1245                                         export_error = copyin(export.ex_groups,
 1246                                             grps, export.ex_ngroups *
 1247                                             sizeof(gid_t));
 1248                                         if (export_error == 0)
 1249                                                 export.ex_groups = grps;
 1250                                 } else
 1251                                         export_error = EINVAL;
 1252                         } else if (export.ex_ngroups == 0)
 1253                                 export.ex_groups = NULL;
 1254                         else
 1255                                 export_error = EINVAL;
 1256                         if (export_error == 0)
 1257                                 export_error = vfs_export(mp, &export);
 1258                         free(grps, M_TEMP);
 1259                         break;
 1260                 default:
 1261                         export_error = EINVAL;
 1262                         break;
 1263                 }
 1264         }
 1265 
 1266         MNT_ILOCK(mp);
 1267         if (error == 0) {
 1268                 mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE |
 1269                     MNT_SNAPSHOT);
 1270         } else {
 1271                 /*
 1272                  * If we fail, restore old mount flags. MNT_QUOTA is special,
 1273                  * because it is not part of MNT_UPDATEMASK, but it could have
 1274                  * changed in the meantime if quotactl(2) was called.
 1275                  * All in all we want current value of MNT_QUOTA, not the old
 1276                  * one.
 1277                  */
 1278                 mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | (flag & ~MNT_QUOTA);
 1279         }
 1280         if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
 1281             (mp->mnt_kern_flag & MNTK_NOASYNC) == 0)
 1282                 mp->mnt_kern_flag |= MNTK_ASYNC;
 1283         else
 1284                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1285         MNT_IUNLOCK(mp);
 1286 
 1287         if (error != 0)
 1288                 goto end;
 1289 
 1290         mount_devctl_event("REMOUNT", mp, true);
 1291         if (mp->mnt_opt != NULL)
 1292                 vfs_freeopts(mp->mnt_opt);
 1293         mp->mnt_opt = mp->mnt_optnew;
 1294         *optlist = NULL;
 1295         (void)VFS_STATFS(mp, &mp->mnt_stat);
 1296         /*
 1297          * Prevent external consumers of mount options from reading
 1298          * mnt_optnew.
 1299          */
 1300         mp->mnt_optnew = NULL;
 1301 
 1302         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 1303                 vfs_allocate_syncvnode(mp);
 1304         else
 1305                 vfs_deallocate_syncvnode(mp);
 1306 end:
 1307         vfs_op_exit(mp);
 1308         if (rootvp != NULL) {
 1309                 vn_seqc_write_end(rootvp);
 1310                 vrele(rootvp);
 1311         }
 1312         vn_seqc_write_end(vp);
 1313         vfs_unbusy(mp);
 1314         VI_LOCK(vp);
 1315         vp->v_iflag &= ~VI_MOUNT;
 1316         VI_UNLOCK(vp);
 1317         vrele(vp);
 1318         return (error != 0 ? error : export_error);
 1319 }
 1320 
 1321 /*
 1322  * vfs_domount(): actually attempt a filesystem mount.
 1323  */
 1324 static int
 1325 vfs_domount(
 1326         struct thread *td,              /* Calling thread. */
 1327         const char *fstype,             /* Filesystem type. */
 1328         char *fspath,                   /* Mount path. */
 1329         uint64_t fsflags,               /* Flags common to all filesystems. */
 1330         struct vfsoptlist **optlist     /* Options local to the filesystem. */
 1331         )
 1332 {
 1333         struct vfsconf *vfsp;
 1334         struct nameidata nd;
 1335         struct vnode *vp;
 1336         char *pathbuf;
 1337         int error;
 1338 
 1339         /*
 1340          * Be ultra-paranoid about making sure the type and fspath
 1341          * variables will fit in our mp buffers, including the
 1342          * terminating NUL.
 1343          */
 1344         if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
 1345                 return (ENAMETOOLONG);
 1346 
 1347         if (jailed(td->td_ucred) || usermount == 0) {
 1348                 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0)
 1349                         return (error);
 1350         }
 1351 
 1352         /*
 1353          * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
 1354          */
 1355         if (fsflags & MNT_EXPORTED) {
 1356                 error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED);
 1357                 if (error)
 1358                         return (error);
 1359         }
 1360         if (fsflags & MNT_SUIDDIR) {
 1361                 error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR);
 1362                 if (error)
 1363                         return (error);
 1364         }
 1365         /*
 1366          * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users.
 1367          */
 1368         if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) {
 1369                 if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0)
 1370                         fsflags |= MNT_NOSUID | MNT_USER;
 1371         }
 1372 
 1373         /* Load KLDs before we lock the covered vnode to avoid reversals. */
 1374         vfsp = NULL;
 1375         if ((fsflags & MNT_UPDATE) == 0) {
 1376                 /* Don't try to load KLDs if we're mounting the root. */
 1377                 if (fsflags & MNT_ROOTFS) {
 1378                         if ((vfsp = vfs_byname(fstype)) == NULL)
 1379                                 return (ENODEV);
 1380                 } else {
 1381                         if ((vfsp = vfs_byname_kld(fstype, td, &error)) == NULL)
 1382                                 return (error);
 1383                 }
 1384         }
 1385 
 1386         /*
 1387          * Get vnode to be covered or mount point's vnode in case of MNT_UPDATE.
 1388          */
 1389         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | WANTPARENT,
 1390             UIO_SYSSPACE, fspath, td);
 1391         error = namei(&nd);
 1392         if (error != 0)
 1393                 return (error);
 1394         vp = nd.ni_vp;
 1395         /*
 1396          * Don't allow stacking file mounts to work around problems with the way
 1397          * that namei sets nd.ni_dvp to vp_crossmp for these.
 1398          */
 1399         if (vp->v_type == VREG)
 1400                 fsflags |= MNT_NOCOVER;
 1401         if ((fsflags & MNT_UPDATE) == 0) {
 1402                 if ((vp->v_vflag & VV_ROOT) != 0 &&
 1403                     (fsflags & MNT_NOCOVER) != 0) {
 1404                         vput(vp);
 1405                         error = EBUSY;
 1406                         goto out;
 1407                 }
 1408                 pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
 1409                 strcpy(pathbuf, fspath);
 1410                 /*
 1411                  * Note: we allow any vnode type here. If the path sanity check
 1412                  * succeeds, the type will be validated in vfs_domount_first
 1413                  * above.
 1414                  */
 1415                 if (vp->v_type == VDIR)
 1416                         error = vn_path_to_global_path(td, vp, pathbuf,
 1417                             MNAMELEN);
 1418                 else
 1419                         error = vn_path_to_global_path_hardlink(td, vp,
 1420                             nd.ni_dvp, pathbuf, MNAMELEN,
 1421                             nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen);
 1422                 if (error == 0) {
 1423                         error = vfs_domount_first(td, vfsp, pathbuf, vp,
 1424                             fsflags, optlist);
 1425                 }
 1426                 free(pathbuf, M_TEMP);
 1427         } else
 1428                 error = vfs_domount_update(td, vp, fsflags, optlist);
 1429 
 1430 out:
 1431         NDFREE(&nd, NDF_ONLY_PNBUF);
 1432         vrele(nd.ni_dvp);
 1433 
 1434         return (error);
 1435 }
 1436 
 1437 /*
 1438  * Unmount a filesystem.
 1439  *
 1440  * Note: unmount takes a path to the vnode mounted on as argument, not
 1441  * special file (as before).
 1442  */
 1443 #ifndef _SYS_SYSPROTO_H_
 1444 struct unmount_args {
 1445         char    *path;
 1446         int     flags;
 1447 };
 1448 #endif
 1449 /* ARGSUSED */
 1450 int
 1451 sys_unmount(struct thread *td, struct unmount_args *uap)
 1452 {
 1453 
 1454         return (kern_unmount(td, uap->path, uap->flags));
 1455 }
 1456 
 1457 int
 1458 kern_unmount(struct thread *td, const char *path, int flags)
 1459 {
 1460         struct nameidata nd;
 1461         struct mount *mp;
 1462         char *pathbuf;
 1463         int error, id0, id1;
 1464 
 1465         AUDIT_ARG_VALUE(flags);
 1466         if (jailed(td->td_ucred) || usermount == 0) {
 1467                 error = priv_check(td, PRIV_VFS_UNMOUNT);
 1468                 if (error)
 1469                         return (error);
 1470         }
 1471 
 1472         pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
 1473         error = copyinstr(path, pathbuf, MNAMELEN, NULL);
 1474         if (error) {
 1475                 free(pathbuf, M_TEMP);
 1476                 return (error);
 1477         }
 1478         if (flags & MNT_BYFSID) {
 1479                 AUDIT_ARG_TEXT(pathbuf);
 1480                 /* Decode the filesystem ID. */
 1481                 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
 1482                         free(pathbuf, M_TEMP);
 1483                         return (EINVAL);
 1484                 }
 1485 
 1486                 mtx_lock(&mountlist_mtx);
 1487                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 1488                         if (mp->mnt_stat.f_fsid.val[0] == id0 &&
 1489                             mp->mnt_stat.f_fsid.val[1] == id1) {
 1490                                 vfs_ref(mp);
 1491                                 break;
 1492                         }
 1493                 }
 1494                 mtx_unlock(&mountlist_mtx);
 1495         } else {
 1496                 /*
 1497                  * Try to find global path for path argument.
 1498                  */
 1499                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1,
 1500                     UIO_SYSSPACE, pathbuf, td);
 1501                 if (namei(&nd) == 0) {
 1502                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1503                         error = vn_path_to_global_path(td, nd.ni_vp, pathbuf,
 1504                             MNAMELEN);
 1505                         if (error == 0)
 1506                                 vput(nd.ni_vp);
 1507                 }
 1508                 mtx_lock(&mountlist_mtx);
 1509                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 1510                         if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0) {
 1511                                 vfs_ref(mp);
 1512                                 break;
 1513                         }
 1514                 }
 1515                 mtx_unlock(&mountlist_mtx);
 1516         }
 1517         free(pathbuf, M_TEMP);
 1518         if (mp == NULL) {
 1519                 /*
 1520                  * Previously we returned ENOENT for a nonexistent path and
 1521                  * EINVAL for a non-mountpoint.  We cannot tell these apart
 1522                  * now, so in the !MNT_BYFSID case return the more likely
 1523                  * EINVAL for compatibility.
 1524                  */
 1525                 return ((flags & MNT_BYFSID) ? ENOENT : EINVAL);
 1526         }
 1527 
 1528         /*
 1529          * Don't allow unmounting the root filesystem.
 1530          */
 1531         if (mp->mnt_flag & MNT_ROOTFS) {
 1532                 vfs_rel(mp);
 1533                 return (EINVAL);
 1534         }
 1535         error = dounmount(mp, flags, td);
 1536         return (error);
 1537 }
 1538 
 1539 /*
 1540  * Return error if any of the vnodes, ignoring the root vnode
 1541  * and the syncer vnode, have non-zero usecount.
 1542  *
 1543  * This function is purely advisory - it can return false positives
 1544  * and negatives.
 1545  */
 1546 static int
 1547 vfs_check_usecounts(struct mount *mp)
 1548 {
 1549         struct vnode *vp, *mvp;
 1550 
 1551         MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 1552                 if ((vp->v_vflag & VV_ROOT) == 0 && vp->v_type != VNON &&
 1553                     vp->v_usecount != 0) {
 1554                         VI_UNLOCK(vp);
 1555                         MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 1556                         return (EBUSY);
 1557                 }
 1558                 VI_UNLOCK(vp);
 1559         }
 1560 
 1561         return (0);
 1562 }
 1563 
 1564 static void
 1565 dounmount_cleanup(struct mount *mp, struct vnode *coveredvp, int mntkflags)
 1566 {
 1567 
 1568         mtx_assert(MNT_MTX(mp), MA_OWNED);
 1569         mp->mnt_kern_flag &= ~mntkflags;
 1570         if ((mp->mnt_kern_flag & MNTK_MWAIT) != 0) {
 1571                 mp->mnt_kern_flag &= ~MNTK_MWAIT;
 1572                 wakeup(mp);
 1573         }
 1574         vfs_op_exit_locked(mp);
 1575         MNT_IUNLOCK(mp);
 1576         if (coveredvp != NULL) {
 1577                 VOP_UNLOCK(coveredvp);
 1578                 vdrop(coveredvp);
 1579         }
 1580         vn_finished_write(mp);
 1581 }
 1582 
 1583 /*
 1584  * There are various reference counters associated with the mount point.
 1585  * Normally it is permitted to modify them without taking the mnt ilock,
 1586  * but this behavior can be temporarily disabled if stable value is needed
 1587  * or callers are expected to block (e.g. to not allow new users during
 1588  * forced unmount).
 1589  */
 1590 void
 1591 vfs_op_enter(struct mount *mp)
 1592 {
 1593         struct mount_pcpu *mpcpu;
 1594         int cpu;
 1595 
 1596         MNT_ILOCK(mp);
 1597         mp->mnt_vfs_ops++;
 1598         if (mp->mnt_vfs_ops > 1) {
 1599                 MNT_IUNLOCK(mp);
 1600                 return;
 1601         }
 1602         vfs_op_barrier_wait(mp);
 1603         CPU_FOREACH(cpu) {
 1604                 mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1605 
 1606                 mp->mnt_ref += mpcpu->mntp_ref;
 1607                 mpcpu->mntp_ref = 0;
 1608 
 1609                 mp->mnt_lockref += mpcpu->mntp_lockref;
 1610                 mpcpu->mntp_lockref = 0;
 1611 
 1612                 mp->mnt_writeopcount += mpcpu->mntp_writeopcount;
 1613                 mpcpu->mntp_writeopcount = 0;
 1614         }
 1615         MPASSERT(mp->mnt_ref > 0 && mp->mnt_lockref >= 0 &&
 1616             mp->mnt_writeopcount >= 0, mp,
 1617             ("invalid count(s): ref %d lockref %d writeopcount %d",
 1618             mp->mnt_ref, mp->mnt_lockref, mp->mnt_writeopcount));
 1619         MNT_IUNLOCK(mp);
 1620         vfs_assert_mount_counters(mp);
 1621 }
 1622 
 1623 void
 1624 vfs_op_exit_locked(struct mount *mp)
 1625 {
 1626 
 1627         mtx_assert(MNT_MTX(mp), MA_OWNED);
 1628 
 1629         MPASSERT(mp->mnt_vfs_ops > 0, mp,
 1630             ("invalid vfs_ops count %d", mp->mnt_vfs_ops));
 1631         MPASSERT(mp->mnt_vfs_ops > 1 ||
 1632             (mp->mnt_kern_flag & (MNTK_UNMOUNT | MNTK_SUSPEND)) == 0, mp,
 1633             ("vfs_ops too low %d in unmount or suspend", mp->mnt_vfs_ops));
 1634         mp->mnt_vfs_ops--;
 1635 }
 1636 
 1637 void
 1638 vfs_op_exit(struct mount *mp)
 1639 {
 1640 
 1641         MNT_ILOCK(mp);
 1642         vfs_op_exit_locked(mp);
 1643         MNT_IUNLOCK(mp);
 1644 }
 1645 
 1646 struct vfs_op_barrier_ipi {
 1647         struct mount *mp;
 1648         struct smp_rendezvous_cpus_retry_arg srcra;
 1649 };
 1650 
 1651 static void
 1652 vfs_op_action_func(void *arg)
 1653 {
 1654         struct vfs_op_barrier_ipi *vfsopipi;
 1655         struct mount *mp;
 1656 
 1657         vfsopipi = __containerof(arg, struct vfs_op_barrier_ipi, srcra);
 1658         mp = vfsopipi->mp;
 1659 
 1660         if (!vfs_op_thread_entered(mp))
 1661                 smp_rendezvous_cpus_done(arg);
 1662 }
 1663 
 1664 static void
 1665 vfs_op_wait_func(void *arg, int cpu)
 1666 {
 1667         struct vfs_op_barrier_ipi *vfsopipi;
 1668         struct mount *mp;
 1669         struct mount_pcpu *mpcpu;
 1670 
 1671         vfsopipi = __containerof(arg, struct vfs_op_barrier_ipi, srcra);
 1672         mp = vfsopipi->mp;
 1673 
 1674         mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1675         while (atomic_load_int(&mpcpu->mntp_thread_in_ops))
 1676                 cpu_spinwait();
 1677 }
 1678 
 1679 void
 1680 vfs_op_barrier_wait(struct mount *mp)
 1681 {
 1682         struct vfs_op_barrier_ipi vfsopipi;
 1683 
 1684         vfsopipi.mp = mp;
 1685 
 1686         smp_rendezvous_cpus_retry(all_cpus,
 1687             smp_no_rendezvous_barrier,
 1688             vfs_op_action_func,
 1689             smp_no_rendezvous_barrier,
 1690             vfs_op_wait_func,
 1691             &vfsopipi.srcra);
 1692 }
 1693 
 1694 #ifdef DIAGNOSTIC
 1695 void
 1696 vfs_assert_mount_counters(struct mount *mp)
 1697 {
 1698         struct mount_pcpu *mpcpu;
 1699         int cpu;
 1700 
 1701         if (mp->mnt_vfs_ops == 0)
 1702                 return;
 1703 
 1704         CPU_FOREACH(cpu) {
 1705                 mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1706                 if (mpcpu->mntp_ref != 0 ||
 1707                     mpcpu->mntp_lockref != 0 ||
 1708                     mpcpu->mntp_writeopcount != 0)
 1709                         vfs_dump_mount_counters(mp);
 1710         }
 1711 }
 1712 
 1713 void
 1714 vfs_dump_mount_counters(struct mount *mp)
 1715 {
 1716         struct mount_pcpu *mpcpu;
 1717         int ref, lockref, writeopcount;
 1718         int cpu;
 1719 
 1720         printf("%s: mp %p vfs_ops %d\n", __func__, mp, mp->mnt_vfs_ops);
 1721 
 1722         printf("        ref : ");
 1723         ref = mp->mnt_ref;
 1724         CPU_FOREACH(cpu) {
 1725                 mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1726                 printf("%d ", mpcpu->mntp_ref);
 1727                 ref += mpcpu->mntp_ref;
 1728         }
 1729         printf("\n");
 1730         printf("    lockref : ");
 1731         lockref = mp->mnt_lockref;
 1732         CPU_FOREACH(cpu) {
 1733                 mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1734                 printf("%d ", mpcpu->mntp_lockref);
 1735                 lockref += mpcpu->mntp_lockref;
 1736         }
 1737         printf("\n");
 1738         printf("writeopcount: ");
 1739         writeopcount = mp->mnt_writeopcount;
 1740         CPU_FOREACH(cpu) {
 1741                 mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1742                 printf("%d ", mpcpu->mntp_writeopcount);
 1743                 writeopcount += mpcpu->mntp_writeopcount;
 1744         }
 1745         printf("\n");
 1746 
 1747         printf("counter       struct total\n");
 1748         printf("ref             %-5d  %-5d\n", mp->mnt_ref, ref);
 1749         printf("lockref         %-5d  %-5d\n", mp->mnt_lockref, lockref);
 1750         printf("writeopcount    %-5d  %-5d\n", mp->mnt_writeopcount, writeopcount);
 1751 
 1752         panic("invalid counts on struct mount");
 1753 }
 1754 #endif
 1755 
 1756 int
 1757 vfs_mount_fetch_counter(struct mount *mp, enum mount_counter which)
 1758 {
 1759         struct mount_pcpu *mpcpu;
 1760         int cpu, sum;
 1761 
 1762         switch (which) {
 1763         case MNT_COUNT_REF:
 1764                 sum = mp->mnt_ref;
 1765                 break;
 1766         case MNT_COUNT_LOCKREF:
 1767                 sum = mp->mnt_lockref;
 1768                 break;
 1769         case MNT_COUNT_WRITEOPCOUNT:
 1770                 sum = mp->mnt_writeopcount;
 1771                 break;
 1772         }
 1773 
 1774         CPU_FOREACH(cpu) {
 1775                 mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1776                 switch (which) {
 1777                 case MNT_COUNT_REF:
 1778                         sum += mpcpu->mntp_ref;
 1779                         break;
 1780                 case MNT_COUNT_LOCKREF:
 1781                         sum += mpcpu->mntp_lockref;
 1782                         break;
 1783                 case MNT_COUNT_WRITEOPCOUNT:
 1784                         sum += mpcpu->mntp_writeopcount;
 1785                         break;
 1786                 }
 1787         }
 1788         return (sum);
 1789 }
 1790 
 1791 /*
 1792  * Do the actual filesystem unmount.
 1793  */
 1794 int
 1795 dounmount(struct mount *mp, int flags, struct thread *td)
 1796 {
 1797         struct vnode *coveredvp, *rootvp;
 1798         int error;
 1799         uint64_t async_flag;
 1800         int mnt_gen_r;
 1801 
 1802         if ((coveredvp = mp->mnt_vnodecovered) != NULL) {
 1803                 mnt_gen_r = mp->mnt_gen;
 1804                 VI_LOCK(coveredvp);
 1805                 vholdl(coveredvp);
 1806                 vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY);
 1807                 /*
 1808                  * Check for mp being unmounted while waiting for the
 1809                  * covered vnode lock.
 1810                  */
 1811                 if (coveredvp->v_mountedhere != mp ||
 1812                     coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) {
 1813                         VOP_UNLOCK(coveredvp);
 1814                         vdrop(coveredvp);
 1815                         vfs_rel(mp);
 1816                         return (EBUSY);
 1817                 }
 1818         }
 1819 
 1820         /*
 1821          * Only privileged root, or (if MNT_USER is set) the user that did the
 1822          * original mount is permitted to unmount this filesystem.
 1823          */
 1824         error = vfs_suser(mp, td);
 1825         if (error != 0) {
 1826                 if (coveredvp != NULL) {
 1827                         VOP_UNLOCK(coveredvp);
 1828                         vdrop(coveredvp);
 1829                 }
 1830                 vfs_rel(mp);
 1831                 return (error);
 1832         }
 1833 
 1834         vfs_op_enter(mp);
 1835 
 1836         vn_start_write(NULL, &mp, V_WAIT | V_MNTREF);
 1837         MNT_ILOCK(mp);
 1838         if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 ||
 1839             (mp->mnt_flag & MNT_UPDATE) != 0 ||
 1840             !TAILQ_EMPTY(&mp->mnt_uppers)) {
 1841                 dounmount_cleanup(mp, coveredvp, 0);
 1842                 return (EBUSY);
 1843         }
 1844         mp->mnt_kern_flag |= MNTK_UNMOUNT;
 1845         rootvp = vfs_cache_root_clear(mp);
 1846         if (coveredvp != NULL)
 1847                 vn_seqc_write_begin(coveredvp);
 1848         if (flags & MNT_NONBUSY) {
 1849                 MNT_IUNLOCK(mp);
 1850                 error = vfs_check_usecounts(mp);
 1851                 MNT_ILOCK(mp);
 1852                 if (error != 0) {
 1853                         vn_seqc_write_end(coveredvp);
 1854                         dounmount_cleanup(mp, coveredvp, MNTK_UNMOUNT);
 1855                         if (rootvp != NULL) {
 1856                                 vn_seqc_write_end(rootvp);
 1857                                 vrele(rootvp);
 1858                         }
 1859                         return (error);
 1860                 }
 1861         }
 1862         /* Allow filesystems to detect that a forced unmount is in progress. */
 1863         if (flags & MNT_FORCE) {
 1864                 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
 1865                 MNT_IUNLOCK(mp);
 1866                 /*
 1867                  * Must be done after setting MNTK_UNMOUNTF and before
 1868                  * waiting for mnt_lockref to become 0.
 1869                  */
 1870                 VFS_PURGE(mp);
 1871                 MNT_ILOCK(mp);
 1872         }
 1873         error = 0;
 1874         if (mp->mnt_lockref) {
 1875                 mp->mnt_kern_flag |= MNTK_DRAINING;
 1876                 error = msleep(&mp->mnt_lockref, MNT_MTX(mp), PVFS,
 1877                     "mount drain", 0);
 1878         }
 1879         MNT_IUNLOCK(mp);
 1880         KASSERT(mp->mnt_lockref == 0,
 1881             ("%s: invalid lock refcount in the drain path @ %s:%d",
 1882             __func__, __FILE__, __LINE__));
 1883         KASSERT(error == 0,
 1884             ("%s: invalid return value for msleep in the drain path @ %s:%d",
 1885             __func__, __FILE__, __LINE__));
 1886 
 1887         /*
 1888          * We want to keep the vnode around so that we can vn_seqc_write_end
 1889          * after we are done with unmount. Downgrade our reference to a mere
 1890          * hold count so that we don't interefere with anything.
 1891          */
 1892         if (rootvp != NULL) {
 1893                 vhold(rootvp);
 1894                 vrele(rootvp);
 1895         }
 1896 
 1897         if (mp->mnt_flag & MNT_EXPUBLIC)
 1898                 vfs_setpublicfs(NULL, NULL, NULL);
 1899 
 1900         vfs_periodic(mp, MNT_WAIT);
 1901         MNT_ILOCK(mp);
 1902         async_flag = mp->mnt_flag & MNT_ASYNC;
 1903         mp->mnt_flag &= ~MNT_ASYNC;
 1904         mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1905         MNT_IUNLOCK(mp);
 1906         vfs_deallocate_syncvnode(mp);
 1907         error = VFS_UNMOUNT(mp, flags);
 1908         vn_finished_write(mp);
 1909         /*
 1910          * If we failed to flush the dirty blocks for this mount point,
 1911          * undo all the cdir/rdir and rootvnode changes we made above.
 1912          * Unless we failed to do so because the device is reporting that
 1913          * it doesn't exist anymore.
 1914          */
 1915         if (error && error != ENXIO) {
 1916                 MNT_ILOCK(mp);
 1917                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
 1918                         MNT_IUNLOCK(mp);
 1919                         vfs_allocate_syncvnode(mp);
 1920                         MNT_ILOCK(mp);
 1921                 }
 1922                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
 1923                 mp->mnt_flag |= async_flag;
 1924                 if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
 1925                     (mp->mnt_kern_flag & MNTK_NOASYNC) == 0)
 1926                         mp->mnt_kern_flag |= MNTK_ASYNC;
 1927                 if (mp->mnt_kern_flag & MNTK_MWAIT) {
 1928                         mp->mnt_kern_flag &= ~MNTK_MWAIT;
 1929                         wakeup(mp);
 1930                 }
 1931                 vfs_op_exit_locked(mp);
 1932                 MNT_IUNLOCK(mp);
 1933                 if (coveredvp) {
 1934                         vn_seqc_write_end(coveredvp);
 1935                         VOP_UNLOCK(coveredvp);
 1936                         vdrop(coveredvp);
 1937                 }
 1938                 if (rootvp != NULL) {
 1939                         vn_seqc_write_end(rootvp);
 1940                         vdrop(rootvp);
 1941                 }
 1942                 return (error);
 1943         }
 1944         mtx_lock(&mountlist_mtx);
 1945         TAILQ_REMOVE(&mountlist, mp, mnt_list);
 1946         mtx_unlock(&mountlist_mtx);
 1947         EVENTHANDLER_DIRECT_INVOKE(vfs_unmounted, mp, td);
 1948         if (coveredvp != NULL) {
 1949                 VI_LOCK(coveredvp);
 1950                 vn_irflag_unset_locked(coveredvp, VIRF_MOUNTPOINT);
 1951                 coveredvp->v_mountedhere = NULL;
 1952                 vn_seqc_write_end_locked(coveredvp);
 1953                 VI_UNLOCK(coveredvp);
 1954                 VOP_UNLOCK(coveredvp);
 1955                 vdrop(coveredvp);
 1956         }
 1957         mount_devctl_event("UNMOUNT", mp, false);
 1958         if (rootvp != NULL) {
 1959                 vn_seqc_write_end(rootvp);
 1960                 vdrop(rootvp);
 1961         }
 1962         vfs_event_signal(NULL, VQ_UNMOUNT, 0);
 1963         if (rootvnode != NULL && mp == rootvnode->v_mount) {
 1964                 vrele(rootvnode);
 1965                 rootvnode = NULL;
 1966         }
 1967         if (mp == rootdevmp)
 1968                 rootdevmp = NULL;
 1969         vfs_mount_destroy(mp);
 1970         return (0);
 1971 }
 1972 
 1973 /*
 1974  * Report errors during filesystem mounting.
 1975  */
 1976 void
 1977 vfs_mount_error(struct mount *mp, const char *fmt, ...)
 1978 {
 1979         struct vfsoptlist *moptlist = mp->mnt_optnew;
 1980         va_list ap;
 1981         int error, len;
 1982         char *errmsg;
 1983 
 1984         error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len);
 1985         if (error || errmsg == NULL || len <= 0)
 1986                 return;
 1987 
 1988         va_start(ap, fmt);
 1989         vsnprintf(errmsg, (size_t)len, fmt, ap);
 1990         va_end(ap);
 1991 }
 1992 
 1993 void
 1994 vfs_opterror(struct vfsoptlist *opts, const char *fmt, ...)
 1995 {
 1996         va_list ap;
 1997         int error, len;
 1998         char *errmsg;
 1999 
 2000         error = vfs_getopt(opts, "errmsg", (void **)&errmsg, &len);
 2001         if (error || errmsg == NULL || len <= 0)
 2002                 return;
 2003 
 2004         va_start(ap, fmt);
 2005         vsnprintf(errmsg, (size_t)len, fmt, ap);
 2006         va_end(ap);
 2007 }
 2008 
 2009 /*
 2010  * ---------------------------------------------------------------------
 2011  * Functions for querying mount options/arguments from filesystems.
 2012  */
 2013 
 2014 /*
 2015  * Check that no unknown options are given
 2016  */
 2017 int
 2018 vfs_filteropt(struct vfsoptlist *opts, const char **legal)
 2019 {
 2020         struct vfsopt *opt;
 2021         char errmsg[255];
 2022         const char **t, *p, *q;
 2023         int ret = 0;
 2024 
 2025         TAILQ_FOREACH(opt, opts, link) {
 2026                 p = opt->name;
 2027                 q = NULL;
 2028                 if (p[0] == 'n' && p[1] == 'o')
 2029                         q = p + 2;
 2030                 for(t = global_opts; *t != NULL; t++) {
 2031                         if (strcmp(*t, p) == 0)
 2032                                 break;
 2033                         if (q != NULL) {
 2034                                 if (strcmp(*t, q) == 0)
 2035                                         break;
 2036                         }
 2037                 }
 2038                 if (*t != NULL)
 2039                         continue;
 2040                 for(t = legal; *t != NULL; t++) {
 2041                         if (strcmp(*t, p) == 0)
 2042                                 break;
 2043                         if (q != NULL) {
 2044                                 if (strcmp(*t, q) == 0)
 2045                                         break;
 2046                         }
 2047                 }
 2048                 if (*t != NULL)
 2049                         continue;
 2050                 snprintf(errmsg, sizeof(errmsg),
 2051                     "mount option <%s> is unknown", p);
 2052                 ret = EINVAL;
 2053         }
 2054         if (ret != 0) {
 2055                 TAILQ_FOREACH(opt, opts, link) {
 2056                         if (strcmp(opt->name, "errmsg") == 0) {
 2057                                 strncpy((char *)opt->value, errmsg, opt->len);
 2058                                 break;
 2059                         }
 2060                 }
 2061                 if (opt == NULL)
 2062                         printf("%s\n", errmsg);
 2063         }
 2064         return (ret);
 2065 }
 2066 
 2067 /*
 2068  * Get a mount option by its name.
 2069  *
 2070  * Return 0 if the option was found, ENOENT otherwise.
 2071  * If len is non-NULL it will be filled with the length
 2072  * of the option. If buf is non-NULL, it will be filled
 2073  * with the address of the option.
 2074  */
 2075 int
 2076 vfs_getopt(struct vfsoptlist *opts, const char *name, void **buf, int *len)
 2077 {
 2078         struct vfsopt *opt;
 2079 
 2080         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 2081 
 2082         TAILQ_FOREACH(opt, opts, link) {
 2083                 if (strcmp(name, opt->name) == 0) {
 2084                         opt->seen = 1;
 2085                         if (len != NULL)
 2086                                 *len = opt->len;
 2087                         if (buf != NULL)
 2088                                 *buf = opt->value;
 2089                         return (0);
 2090                 }
 2091         }
 2092         return (ENOENT);
 2093 }
 2094 
 2095 int
 2096 vfs_getopt_pos(struct vfsoptlist *opts, const char *name)
 2097 {
 2098         struct vfsopt *opt;
 2099 
 2100         if (opts == NULL)
 2101                 return (-1);
 2102 
 2103         TAILQ_FOREACH(opt, opts, link) {
 2104                 if (strcmp(name, opt->name) == 0) {
 2105                         opt->seen = 1;
 2106                         return (opt->pos);
 2107                 }
 2108         }
 2109         return (-1);
 2110 }
 2111 
 2112 int
 2113 vfs_getopt_size(struct vfsoptlist *opts, const char *name, off_t *value)
 2114 {
 2115         char *opt_value, *vtp;
 2116         quad_t iv;
 2117         int error, opt_len;
 2118 
 2119         error = vfs_getopt(opts, name, (void **)&opt_value, &opt_len);
 2120         if (error != 0)
 2121                 return (error);
 2122         if (opt_len == 0 || opt_value == NULL)
 2123                 return (EINVAL);
 2124         if (opt_value[0] == '\0' || opt_value[opt_len - 1] != '\0')
 2125                 return (EINVAL);
 2126         iv = strtoq(opt_value, &vtp, 0);
 2127         if (vtp == opt_value || (vtp[0] != '\0' && vtp[1] != '\0'))
 2128                 return (EINVAL);
 2129         if (iv < 0)
 2130                 return (EINVAL);
 2131         switch (vtp[0]) {
 2132         case 't': case 'T':
 2133                 iv *= 1024;
 2134                 /* FALLTHROUGH */
 2135         case 'g': case 'G':
 2136                 iv *= 1024;
 2137                 /* FALLTHROUGH */
 2138         case 'm': case 'M':
 2139                 iv *= 1024;
 2140                 /* FALLTHROUGH */
 2141         case 'k': case 'K':
 2142                 iv *= 1024;
 2143         case '\0':
 2144                 break;
 2145         default:
 2146                 return (EINVAL);
 2147         }
 2148         *value = iv;
 2149 
 2150         return (0);
 2151 }
 2152 
 2153 char *
 2154 vfs_getopts(struct vfsoptlist *opts, const char *name, int *error)
 2155 {
 2156         struct vfsopt *opt;
 2157 
 2158         *error = 0;
 2159         TAILQ_FOREACH(opt, opts, link) {
 2160                 if (strcmp(name, opt->name) != 0)
 2161                         continue;
 2162                 opt->seen = 1;
 2163                 if (opt->len == 0 ||
 2164                     ((char *)opt->value)[opt->len - 1] != '\0') {
 2165                         *error = EINVAL;
 2166                         return (NULL);
 2167                 }
 2168                 return (opt->value);
 2169         }
 2170         *error = ENOENT;
 2171         return (NULL);
 2172 }
 2173 
 2174 int
 2175 vfs_flagopt(struct vfsoptlist *opts, const char *name, uint64_t *w,
 2176         uint64_t val)
 2177 {
 2178         struct vfsopt *opt;
 2179 
 2180         TAILQ_FOREACH(opt, opts, link) {
 2181                 if (strcmp(name, opt->name) == 0) {
 2182                         opt->seen = 1;
 2183                         if (w != NULL)
 2184                                 *w |= val;
 2185                         return (1);
 2186                 }
 2187         }
 2188         if (w != NULL)
 2189                 *w &= ~val;
 2190         return (0);
 2191 }
 2192 
 2193 int
 2194 vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...)
 2195 {
 2196         va_list ap;
 2197         struct vfsopt *opt;
 2198         int ret;
 2199 
 2200         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 2201 
 2202         TAILQ_FOREACH(opt, opts, link) {
 2203                 if (strcmp(name, opt->name) != 0)
 2204                         continue;
 2205                 opt->seen = 1;
 2206                 if (opt->len == 0 || opt->value == NULL)
 2207                         return (0);
 2208                 if (((char *)opt->value)[opt->len - 1] != '\0')
 2209                         return (0);
 2210                 va_start(ap, fmt);
 2211                 ret = vsscanf(opt->value, fmt, ap);
 2212                 va_end(ap);
 2213                 return (ret);
 2214         }
 2215         return (0);
 2216 }
 2217 
 2218 int
 2219 vfs_setopt(struct vfsoptlist *opts, const char *name, void *value, int len)
 2220 {
 2221         struct vfsopt *opt;
 2222 
 2223         TAILQ_FOREACH(opt, opts, link) {
 2224                 if (strcmp(name, opt->name) != 0)
 2225                         continue;
 2226                 opt->seen = 1;
 2227                 if (opt->value == NULL)
 2228                         opt->len = len;
 2229                 else {
 2230                         if (opt->len != len)
 2231                                 return (EINVAL);
 2232                         bcopy(value, opt->value, len);
 2233                 }
 2234                 return (0);
 2235         }
 2236         return (ENOENT);
 2237 }
 2238 
 2239 int
 2240 vfs_setopt_part(struct vfsoptlist *opts, const char *name, void *value, int len)
 2241 {
 2242         struct vfsopt *opt;
 2243 
 2244         TAILQ_FOREACH(opt, opts, link) {
 2245                 if (strcmp(name, opt->name) != 0)
 2246                         continue;
 2247                 opt->seen = 1;
 2248                 if (opt->value == NULL)
 2249                         opt->len = len;
 2250                 else {
 2251                         if (opt->len < len)
 2252                                 return (EINVAL);
 2253                         opt->len = len;
 2254                         bcopy(value, opt->value, len);
 2255                 }
 2256                 return (0);
 2257         }
 2258         return (ENOENT);
 2259 }
 2260 
 2261 int
 2262 vfs_setopts(struct vfsoptlist *opts, const char *name, const char *value)
 2263 {
 2264         struct vfsopt *opt;
 2265 
 2266         TAILQ_FOREACH(opt, opts, link) {
 2267                 if (strcmp(name, opt->name) != 0)
 2268                         continue;
 2269                 opt->seen = 1;
 2270                 if (opt->value == NULL)
 2271                         opt->len = strlen(value) + 1;
 2272                 else if (strlcpy(opt->value, value, opt->len) >= opt->len)
 2273                         return (EINVAL);
 2274                 return (0);
 2275         }
 2276         return (ENOENT);
 2277 }
 2278 
 2279 /*
 2280  * Find and copy a mount option.
 2281  *
 2282  * The size of the buffer has to be specified
 2283  * in len, if it is not the same length as the
 2284  * mount option, EINVAL is returned.
 2285  * Returns ENOENT if the option is not found.
 2286  */
 2287 int
 2288 vfs_copyopt(struct vfsoptlist *opts, const char *name, void *dest, int len)
 2289 {
 2290         struct vfsopt *opt;
 2291 
 2292         KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
 2293 
 2294         TAILQ_FOREACH(opt, opts, link) {
 2295                 if (strcmp(name, opt->name) == 0) {
 2296                         opt->seen = 1;
 2297                         if (len != opt->len)
 2298                                 return (EINVAL);
 2299                         bcopy(opt->value, dest, opt->len);
 2300                         return (0);
 2301                 }
 2302         }
 2303         return (ENOENT);
 2304 }
 2305 
 2306 int
 2307 __vfs_statfs(struct mount *mp, struct statfs *sbp)
 2308 {
 2309 
 2310         /*
 2311          * Filesystems only fill in part of the structure for updates, we
 2312          * have to read the entirety first to get all content.
 2313          */
 2314         if (sbp != &mp->mnt_stat)
 2315                 memcpy(sbp, &mp->mnt_stat, sizeof(*sbp));
 2316 
 2317         /*
 2318          * Set these in case the underlying filesystem fails to do so.
 2319          */
 2320         sbp->f_version = STATFS_VERSION;
 2321         sbp->f_namemax = NAME_MAX;
 2322         sbp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 2323 
 2324         return (mp->mnt_op->vfs_statfs(mp, sbp));
 2325 }
 2326 
 2327 void
 2328 vfs_mountedfrom(struct mount *mp, const char *from)
 2329 {
 2330 
 2331         bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname);
 2332         strlcpy(mp->mnt_stat.f_mntfromname, from,
 2333             sizeof mp->mnt_stat.f_mntfromname);
 2334 }
 2335 
 2336 /*
 2337  * ---------------------------------------------------------------------
 2338  * This is the api for building mount args and mounting filesystems from
 2339  * inside the kernel.
 2340  *
 2341  * The API works by accumulation of individual args.  First error is
 2342  * latched.
 2343  *
 2344  * XXX: should be documented in new manpage kernel_mount(9)
 2345  */
 2346 
 2347 /* A memory allocation which must be freed when we are done */
 2348 struct mntaarg {
 2349         SLIST_ENTRY(mntaarg)    next;
 2350 };
 2351 
 2352 /* The header for the mount arguments */
 2353 struct mntarg {
 2354         struct iovec *v;
 2355         int len;
 2356         int error;
 2357         SLIST_HEAD(, mntaarg)   list;
 2358 };
 2359 
 2360 /*
 2361  * Add a boolean argument.
 2362  *
 2363  * flag is the boolean value.
 2364  * name must start with "no".
 2365  */
 2366 struct mntarg *
 2367 mount_argb(struct mntarg *ma, int flag, const char *name)
 2368 {
 2369 
 2370         KASSERT(name[0] == 'n' && name[1] == 'o',
 2371             ("mount_argb(...,%s): name must start with 'no'", name));
 2372 
 2373         return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0));
 2374 }
 2375 
 2376 /*
 2377  * Add an argument printf style
 2378  */
 2379 struct mntarg *
 2380 mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...)
 2381 {
 2382         va_list ap;
 2383         struct mntaarg *maa;
 2384         struct sbuf *sb;
 2385         int len;
 2386 
 2387         if (ma == NULL) {
 2388                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2389                 SLIST_INIT(&ma->list);
 2390         }
 2391         if (ma->error)
 2392                 return (ma);
 2393 
 2394         ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 2395             M_MOUNT, M_WAITOK);
 2396         ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 2397         ma->v[ma->len].iov_len = strlen(name) + 1;
 2398         ma->len++;
 2399 
 2400         sb = sbuf_new_auto();
 2401         va_start(ap, fmt);
 2402         sbuf_vprintf(sb, fmt, ap);
 2403         va_end(ap);
 2404         sbuf_finish(sb);
 2405         len = sbuf_len(sb) + 1;
 2406         maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 2407         SLIST_INSERT_HEAD(&ma->list, maa, next);
 2408         bcopy(sbuf_data(sb), maa + 1, len);
 2409         sbuf_delete(sb);
 2410 
 2411         ma->v[ma->len].iov_base = maa + 1;
 2412         ma->v[ma->len].iov_len = len;
 2413         ma->len++;
 2414 
 2415         return (ma);
 2416 }
 2417 
 2418 /*
 2419  * Add an argument which is a userland string.
 2420  */
 2421 struct mntarg *
 2422 mount_argsu(struct mntarg *ma, const char *name, const void *val, int len)
 2423 {
 2424         struct mntaarg *maa;
 2425         char *tbuf;
 2426 
 2427         if (val == NULL)
 2428                 return (ma);
 2429         if (ma == NULL) {
 2430                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2431                 SLIST_INIT(&ma->list);
 2432         }
 2433         if (ma->error)
 2434                 return (ma);
 2435         maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 2436         SLIST_INSERT_HEAD(&ma->list, maa, next);
 2437         tbuf = (void *)(maa + 1);
 2438         ma->error = copyinstr(val, tbuf, len, NULL);
 2439         return (mount_arg(ma, name, tbuf, -1));
 2440 }
 2441 
 2442 /*
 2443  * Plain argument.
 2444  *
 2445  * If length is -1, treat value as a C string.
 2446  */
 2447 struct mntarg *
 2448 mount_arg(struct mntarg *ma, const char *name, const void *val, int len)
 2449 {
 2450 
 2451         if (ma == NULL) {
 2452                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2453                 SLIST_INIT(&ma->list);
 2454         }
 2455         if (ma->error)
 2456                 return (ma);
 2457 
 2458         ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 2459             M_MOUNT, M_WAITOK);
 2460         ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 2461         ma->v[ma->len].iov_len = strlen(name) + 1;
 2462         ma->len++;
 2463 
 2464         ma->v[ma->len].iov_base = (void *)(uintptr_t)val;
 2465         if (len < 0)
 2466                 ma->v[ma->len].iov_len = strlen(val) + 1;
 2467         else
 2468                 ma->v[ma->len].iov_len = len;
 2469         ma->len++;
 2470         return (ma);
 2471 }
 2472 
 2473 /*
 2474  * Free a mntarg structure
 2475  */
 2476 static void
 2477 free_mntarg(struct mntarg *ma)
 2478 {
 2479         struct mntaarg *maa;
 2480 
 2481         while (!SLIST_EMPTY(&ma->list)) {
 2482                 maa = SLIST_FIRST(&ma->list);
 2483                 SLIST_REMOVE_HEAD(&ma->list, next);
 2484                 free(maa, M_MOUNT);
 2485         }
 2486         free(ma->v, M_MOUNT);
 2487         free(ma, M_MOUNT);
 2488 }
 2489 
 2490 /*
 2491  * Mount a filesystem
 2492  */
 2493 int
 2494 kernel_mount(struct mntarg *ma, uint64_t flags)
 2495 {
 2496         struct uio auio;
 2497         int error;
 2498 
 2499         KASSERT(ma != NULL, ("kernel_mount NULL ma"));
 2500         KASSERT(ma->error != 0 || ma->v != NULL, ("kernel_mount NULL ma->v"));
 2501         KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len));
 2502 
 2503         error = ma->error;
 2504         if (error == 0) {
 2505                 auio.uio_iov = ma->v;
 2506                 auio.uio_iovcnt = ma->len;
 2507                 auio.uio_segflg = UIO_SYSSPACE;
 2508                 error = vfs_donmount(curthread, flags, &auio);
 2509         }
 2510         free_mntarg(ma);
 2511         return (error);
 2512 }
 2513 
 2514 /*
 2515  * A printflike function to mount a filesystem.
 2516  */
 2517 int
 2518 kernel_vmount(int flags, ...)
 2519 {
 2520         struct mntarg *ma = NULL;
 2521         va_list ap;
 2522         const char *cp;
 2523         const void *vp;
 2524         int error;
 2525 
 2526         va_start(ap, flags);
 2527         for (;;) {
 2528                 cp = va_arg(ap, const char *);
 2529                 if (cp == NULL)
 2530                         break;
 2531                 vp = va_arg(ap, const void *);
 2532                 ma = mount_arg(ma, cp, vp, (vp != NULL ? -1 : 0));
 2533         }
 2534         va_end(ap);
 2535 
 2536         error = kernel_mount(ma, flags);
 2537         return (error);
 2538 }
 2539 
 2540 /* Map from mount options to printable formats. */
 2541 static struct mntoptnames optnames[] = {
 2542         MNTOPT_NAMES
 2543 };
 2544 
 2545 #define DEVCTL_LEN 1024
 2546 static void
 2547 mount_devctl_event(const char *type, struct mount *mp, bool donew)
 2548 {
 2549         const uint8_t *cp;
 2550         struct mntoptnames *fp;
 2551         struct sbuf sb;
 2552         struct statfs *sfp = &mp->mnt_stat;
 2553         char *buf;
 2554 
 2555         buf = malloc(DEVCTL_LEN, M_MOUNT, M_NOWAIT);
 2556         if (buf == NULL)
 2557                 return;
 2558         sbuf_new(&sb, buf, DEVCTL_LEN, SBUF_FIXEDLEN);
 2559         sbuf_cpy(&sb, "mount-point=\"");
 2560         devctl_safe_quote_sb(&sb, sfp->f_mntonname);
 2561         sbuf_cat(&sb, "\" mount-dev=\"");
 2562         devctl_safe_quote_sb(&sb, sfp->f_mntfromname);
 2563         sbuf_cat(&sb, "\" mount-type=\"");
 2564         devctl_safe_quote_sb(&sb, sfp->f_fstypename);
 2565         sbuf_cat(&sb, "\" fsid=0x");
 2566         cp = (const uint8_t *)&sfp->f_fsid.val[0];
 2567         for (int i = 0; i < sizeof(sfp->f_fsid); i++)
 2568                 sbuf_printf(&sb, "%02x", cp[i]);
 2569         sbuf_printf(&sb, " owner=%u flags=\"", sfp->f_owner);
 2570         for (fp = optnames; fp->o_opt != 0; fp++) {
 2571                 if ((mp->mnt_flag & fp->o_opt) != 0) {
 2572                         sbuf_cat(&sb, fp->o_name);
 2573                         sbuf_putc(&sb, ';');
 2574                 }
 2575         }
 2576         sbuf_putc(&sb, '"');
 2577         sbuf_finish(&sb);
 2578 
 2579         /*
 2580          * Options are not published because the form of the options depends on
 2581          * the file system and may include binary data. In addition, they don't
 2582          * necessarily provide enough useful information to be actionable when
 2583          * devd processes them.
 2584          */
 2585 
 2586         if (sbuf_error(&sb) == 0)
 2587                 devctl_notify("VFS", "FS", type, sbuf_data(&sb));
 2588         sbuf_delete(&sb);
 2589         free(buf, M_MOUNT);
 2590 }
 2591 
 2592 /*
 2593  * Force remount specified mount point to read-only.  The argument
 2594  * must be busied to avoid parallel unmount attempts.
 2595  *
 2596  * Intended use is to prevent further writes if some metadata
 2597  * inconsistency is detected.  Note that the function still flushes
 2598  * all cached metadata and data for the mount point, which might be
 2599  * not always suitable.
 2600  */
 2601 int
 2602 vfs_remount_ro(struct mount *mp)
 2603 {
 2604         struct vfsoptlist *opts;
 2605         struct vfsopt *opt;
 2606         struct vnode *vp_covered, *rootvp;
 2607         int error;
 2608 
 2609         KASSERT(mp->mnt_lockref > 0,
 2610             ("vfs_remount_ro: mp %p is not busied", mp));
 2611         KASSERT((mp->mnt_kern_flag & MNTK_UNMOUNT) == 0,
 2612             ("vfs_remount_ro: mp %p is being unmounted (and busy?)", mp));
 2613 
 2614         rootvp = NULL;
 2615         vp_covered = mp->mnt_vnodecovered;
 2616         error = vget(vp_covered, LK_EXCLUSIVE | LK_NOWAIT);
 2617         if (error != 0)
 2618                 return (error);
 2619         VI_LOCK(vp_covered);
 2620         if ((vp_covered->v_iflag & VI_MOUNT) != 0) {
 2621                 VI_UNLOCK(vp_covered);
 2622                 vput(vp_covered);
 2623                 return (EBUSY);
 2624         }
 2625         vp_covered->v_iflag |= VI_MOUNT;
 2626         VI_UNLOCK(vp_covered);
 2627         vfs_op_enter(mp);
 2628         vn_seqc_write_begin(vp_covered);
 2629 
 2630         MNT_ILOCK(mp);
 2631         if ((mp->mnt_flag & MNT_RDONLY) != 0) {
 2632                 MNT_IUNLOCK(mp);
 2633                 error = EBUSY;
 2634                 goto out;
 2635         }
 2636         mp->mnt_flag |= MNT_UPDATE | MNT_FORCE | MNT_RDONLY;
 2637         rootvp = vfs_cache_root_clear(mp);
 2638         MNT_IUNLOCK(mp);
 2639 
 2640         opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK | M_ZERO);
 2641         TAILQ_INIT(opts);
 2642         opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK | M_ZERO);
 2643         opt->name = strdup("ro", M_MOUNT);
 2644         opt->value = NULL;
 2645         TAILQ_INSERT_TAIL(opts, opt, link);
 2646         vfs_mergeopts(opts, mp->mnt_opt);
 2647         mp->mnt_optnew = opts;
 2648 
 2649         error = VFS_MOUNT(mp);
 2650 
 2651         if (error == 0) {
 2652                 MNT_ILOCK(mp);
 2653                 mp->mnt_flag &= ~(MNT_UPDATE | MNT_FORCE);
 2654                 MNT_IUNLOCK(mp);
 2655                 vfs_deallocate_syncvnode(mp);
 2656                 if (mp->mnt_opt != NULL)
 2657                         vfs_freeopts(mp->mnt_opt);
 2658                 mp->mnt_opt = mp->mnt_optnew;
 2659         } else {
 2660                 MNT_ILOCK(mp);
 2661                 mp->mnt_flag &= ~(MNT_UPDATE | MNT_FORCE | MNT_RDONLY);
 2662                 MNT_IUNLOCK(mp);
 2663                 vfs_freeopts(mp->mnt_optnew);
 2664         }
 2665         mp->mnt_optnew = NULL;
 2666 
 2667 out:
 2668         vfs_op_exit(mp);
 2669         VI_LOCK(vp_covered);
 2670         vp_covered->v_iflag &= ~VI_MOUNT;
 2671         VI_UNLOCK(vp_covered);
 2672         vput(vp_covered);
 2673         vn_seqc_write_end(vp_covered);
 2674         if (rootvp != NULL) {
 2675                 vn_seqc_write_end(rootvp);
 2676                 vrele(rootvp);
 2677         }
 2678         return (error);
 2679 }
 2680 
 2681 /*
 2682  * Suspend write operations on all local writeable filesystems.  Does
 2683  * full sync of them in the process.
 2684  *
 2685  * Iterate over the mount points in reverse order, suspending most
 2686  * recently mounted filesystems first.  It handles a case where a
 2687  * filesystem mounted from a md(4) vnode-backed device should be
 2688  * suspended before the filesystem that owns the vnode.
 2689  */
 2690 void
 2691 suspend_all_fs(void)
 2692 {
 2693         struct mount *mp;
 2694         int error;
 2695 
 2696         mtx_lock(&mountlist_mtx);
 2697         TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 2698                 error = vfs_busy(mp, MBF_MNTLSTLOCK | MBF_NOWAIT);
 2699                 if (error != 0)
 2700                         continue;
 2701                 if ((mp->mnt_flag & (MNT_RDONLY | MNT_LOCAL)) != MNT_LOCAL ||
 2702                     (mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
 2703                         mtx_lock(&mountlist_mtx);
 2704                         vfs_unbusy(mp);
 2705                         continue;
 2706                 }
 2707                 error = vfs_write_suspend(mp, 0);
 2708                 if (error == 0) {
 2709                         MNT_ILOCK(mp);
 2710                         MPASS((mp->mnt_kern_flag & MNTK_SUSPEND_ALL) == 0);
 2711                         mp->mnt_kern_flag |= MNTK_SUSPEND_ALL;
 2712                         MNT_IUNLOCK(mp);
 2713                         mtx_lock(&mountlist_mtx);
 2714                 } else {
 2715                         printf("suspend of %s failed, error %d\n",
 2716                             mp->mnt_stat.f_mntonname, error);
 2717                         mtx_lock(&mountlist_mtx);
 2718                         vfs_unbusy(mp);
 2719                 }
 2720         }
 2721         mtx_unlock(&mountlist_mtx);
 2722 }
 2723 
 2724 void
 2725 resume_all_fs(void)
 2726 {
 2727         struct mount *mp;
 2728 
 2729         mtx_lock(&mountlist_mtx);
 2730         TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 2731                 if ((mp->mnt_kern_flag & MNTK_SUSPEND_ALL) == 0)
 2732                         continue;
 2733                 mtx_unlock(&mountlist_mtx);
 2734                 MNT_ILOCK(mp);
 2735                 MPASS((mp->mnt_kern_flag & MNTK_SUSPEND) != 0);
 2736                 mp->mnt_kern_flag &= ~MNTK_SUSPEND_ALL;
 2737                 MNT_IUNLOCK(mp);
 2738                 vfs_write_resume(mp, 0);
 2739                 mtx_lock(&mountlist_mtx);
 2740                 vfs_unbusy(mp);
 2741         }
 2742         mtx_unlock(&mountlist_mtx);
 2743 }

Cache object: b57977454294266869d7a9ba2a85e99c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.