vfs_mount.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1999-2004 Poul-Henning Kamp
    5  * Copyright (c) 1999 Michael Smith
    6  * Copyright (c) 1989, 1993
    7  *      The Regents of the University of California.  All rights reserved.
    8  * (c) UNIX System Laboratories, Inc.
    9  * All or some portions of this file are derived from material licensed
   10  * to the University of California by American Telephone and Telegraph
   11  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   12  * the permission of UNIX System Laboratories, Inc.
   13  *
   14  * Redistribution and use in source and binary forms, with or without
   15  * modification, are permitted provided that the following conditions
   16  * are met:
   17  * 1. Redistributions of source code must retain the above copyright
   18  *    notice, this list of conditions and the following disclaimer.
   19  * 2. Redistributions in binary form must reproduce the above copyright
   20  *    notice, this list of conditions and the following disclaimer in the
   21  *    documentation and/or other materials provided with the distribution.
   22  * 3. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __FBSDID("$FreeBSD$");
   41 
   42 #include <sys/param.h>
   43 #include <sys/conf.h>
   44 #include <sys/smp.h>
   45 #include <sys/devctl.h>
   46 #include <sys/eventhandler.h>
   47 #include <sys/fcntl.h>
   48 #include <sys/jail.h>
   49 #include <sys/kernel.h>
   50 #include <sys/ktr.h>
   51 #include <sys/libkern.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mount.h>
   54 #include <sys/mutex.h>
   55 #include <sys/namei.h>
   56 #include <sys/priv.h>
   57 #include <sys/proc.h>
   58 #include <sys/filedesc.h>
   59 #include <sys/reboot.h>
   60 #include <sys/sbuf.h>
   61 #include <sys/syscallsubr.h>
   62 #include <sys/sysproto.h>
   63 #include <sys/sx.h>
   64 #include <sys/sysctl.h>
   65 #include <sys/sysent.h>
   66 #include <sys/systm.h>
   67 #include <sys/vnode.h>
   68 #include <vm/uma.h>
   69 
   70 #include <geom/geom.h>
   71 
   72 #include <machine/stdarg.h>
   73 
   74 #include <security/audit/audit.h>
   75 #include <security/mac/mac_framework.h>
   76 
   77 #define VFS_MOUNTARG_SIZE_MAX   (1024 * 64)
   78 
   79 static int      vfs_domount(struct thread *td, const char *fstype, char *fspath,
   80                     uint64_t fsflags, struct vfsoptlist **optlist);
   81 static void     free_mntarg(struct mntarg *ma);
   82 
   83 static int      usermount = 0;
   84 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
   85     "Unprivileged users may mount and unmount file systems");
   86 
   87 static bool     default_autoro = false;
   88 SYSCTL_BOOL(_vfs, OID_AUTO, default_autoro, CTLFLAG_RW, &default_autoro, 0,
   89     "Retry failed r/w mount as r/o if no explicit ro/rw option is specified");
   90 
   91 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
   92 MALLOC_DEFINE(M_STATFS, "statfs", "statfs structure");
   93 static uma_zone_t mount_zone;
   94 
   95 /* List of mounted filesystems. */
   96 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
   97 
   98 /* For any iteration/modification of mountlist */
   99 struct mtx_padalign __exclusive_cache_line mountlist_mtx;
  100 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF);
  101 
  102 EVENTHANDLER_LIST_DEFINE(vfs_mounted);
  103 EVENTHANDLER_LIST_DEFINE(vfs_unmounted);
  104 
  105 static void mount_devctl_event(const char *type, struct mount *mp, bool donew);
  106 
  107 /*
  108  * Global opts, taken by all filesystems
  109  */
  110 static const char *global_opts[] = {
  111         "errmsg",
  112         "fstype",
  113         "fspath",
  114         "ro",
  115         "rw",
  116         "nosuid",
  117         "noexec",
  118         NULL
  119 };
  120 
  121 static int
  122 mount_init(void *mem, int size, int flags)
  123 {
  124         struct mount *mp;
  125 
  126         mp = (struct mount *)mem;
  127         mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
  128         mtx_init(&mp->mnt_listmtx, "struct mount vlist mtx", NULL, MTX_DEF);
  129         lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
  130         mp->mnt_pcpu = uma_zalloc_pcpu(pcpu_zone_16, M_WAITOK | M_ZERO);
  131         mp->mnt_ref = 0;
  132         mp->mnt_vfs_ops = 1;
  133         mp->mnt_rootvnode = NULL;
  134         return (0);
  135 }
  136 
  137 static void
  138 mount_fini(void *mem, int size)
  139 {
  140         struct mount *mp;
  141 
  142         mp = (struct mount *)mem;
  143         uma_zfree_pcpu(pcpu_zone_16, mp->mnt_pcpu);
  144         lockdestroy(&mp->mnt_explock);
  145         mtx_destroy(&mp->mnt_listmtx);
  146         mtx_destroy(&mp->mnt_mtx);
  147 }
  148 
  149 static void
  150 vfs_mount_init(void *dummy __unused)
  151 {
  152 
  153         mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount), NULL,
  154             NULL, mount_init, mount_fini, UMA_ALIGN_CACHE, UMA_ZONE_NOFREE);
  155 }
  156 SYSINIT(vfs_mount, SI_SUB_VFS, SI_ORDER_ANY, vfs_mount_init, NULL);
  157 
  158 /*
  159  * ---------------------------------------------------------------------
  160  * Functions for building and sanitizing the mount options
  161  */
  162 
  163 /* Remove one mount option. */
  164 static void
  165 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
  166 {
  167 
  168         TAILQ_REMOVE(opts, opt, link);
  169         free(opt->name, M_MOUNT);
  170         if (opt->value != NULL)
  171                 free(opt->value, M_MOUNT);
  172         free(opt, M_MOUNT);
  173 }
  174 
  175 /* Release all resources related to the mount options. */
  176 void
  177 vfs_freeopts(struct vfsoptlist *opts)
  178 {
  179         struct vfsopt *opt;
  180 
  181         while (!TAILQ_EMPTY(opts)) {
  182                 opt = TAILQ_FIRST(opts);
  183                 vfs_freeopt(opts, opt);
  184         }
  185         free(opts, M_MOUNT);
  186 }
  187 
  188 void
  189 vfs_deleteopt(struct vfsoptlist *opts, const char *name)
  190 {
  191         struct vfsopt *opt, *temp;
  192 
  193         if (opts == NULL)
  194                 return;
  195         TAILQ_FOREACH_SAFE(opt, opts, link, temp)  {
  196                 if (strcmp(opt->name, name) == 0)
  197                         vfs_freeopt(opts, opt);
  198         }
  199 }
  200 
  201 static int
  202 vfs_isopt_ro(const char *opt)
  203 {
  204 
  205         if (strcmp(opt, "ro") == 0 || strcmp(opt, "rdonly") == 0 ||
  206             strcmp(opt, "norw") == 0)
  207                 return (1);
  208         return (0);
  209 }
  210 
  211 static int
  212 vfs_isopt_rw(const char *opt)
  213 {
  214 
  215         if (strcmp(opt, "rw") == 0 || strcmp(opt, "noro") == 0)
  216                 return (1);
  217         return (0);
  218 }
  219 
  220 /*
  221  * Check if options are equal (with or without the "no" prefix).
  222  */
  223 static int
  224 vfs_equalopts(const char *opt1, const char *opt2)
  225 {
  226         char *p;
  227 
  228         /* "opt" vs. "opt" or "noopt" vs. "noopt" */
  229         if (strcmp(opt1, opt2) == 0)
  230                 return (1);
  231         /* "noopt" vs. "opt" */
  232         if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
  233                 return (1);
  234         /* "opt" vs. "noopt" */
  235         if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
  236                 return (1);
  237         while ((p = strchr(opt1, '.')) != NULL &&
  238             !strncmp(opt1, opt2, ++p - opt1)) {
  239                 opt2 += p - opt1;
  240                 opt1 = p;
  241                 /* "foo.noopt" vs. "foo.opt" */
  242                 if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
  243                         return (1);
  244                 /* "foo.opt" vs. "foo.noopt" */
  245                 if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
  246                         return (1);
  247         }
  248         /* "ro" / "rdonly" / "norw" / "rw" / "noro" */
  249         if ((vfs_isopt_ro(opt1) || vfs_isopt_rw(opt1)) &&
  250             (vfs_isopt_ro(opt2) || vfs_isopt_rw(opt2)))
  251                 return (1);
  252         return (0);
  253 }
  254 
  255 /*
  256  * If a mount option is specified several times,
  257  * (with or without the "no" prefix) only keep
  258  * the last occurrence of it.
  259  */
  260 static void
  261 vfs_sanitizeopts(struct vfsoptlist *opts)
  262 {
  263         struct vfsopt *opt, *opt2, *tmp;
  264 
  265         TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
  266                 opt2 = TAILQ_PREV(opt, vfsoptlist, link);
  267                 while (opt2 != NULL) {
  268                         if (vfs_equalopts(opt->name, opt2->name)) {
  269                                 tmp = TAILQ_PREV(opt2, vfsoptlist, link);
  270                                 vfs_freeopt(opts, opt2);
  271                                 opt2 = tmp;
  272                         } else {
  273                                 opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
  274                         }
  275                 }
  276         }
  277 }
  278 
  279 /*
  280  * Build a linked list of mount options from a struct uio.
  281  */
  282 int
  283 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
  284 {
  285         struct vfsoptlist *opts;
  286         struct vfsopt *opt;
  287         size_t memused, namelen, optlen;
  288         unsigned int i, iovcnt;
  289         int error;
  290 
  291         opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
  292         TAILQ_INIT(opts);
  293         memused = 0;
  294         iovcnt = auio->uio_iovcnt;
  295         for (i = 0; i < iovcnt; i += 2) {
  296                 namelen = auio->uio_iov[i].iov_len;
  297                 optlen = auio->uio_iov[i + 1].iov_len;
  298                 memused += sizeof(struct vfsopt) + optlen + namelen;
  299                 /*
  300                  * Avoid consuming too much memory, and attempts to overflow
  301                  * memused.
  302                  */
  303                 if (memused > VFS_MOUNTARG_SIZE_MAX ||
  304                     optlen > VFS_MOUNTARG_SIZE_MAX ||
  305                     namelen > VFS_MOUNTARG_SIZE_MAX) {
  306                         error = EINVAL;
  307                         goto bad;
  308                 }
  309 
  310                 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  311                 opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
  312                 opt->value = NULL;
  313                 opt->len = 0;
  314                 opt->pos = i / 2;
  315                 opt->seen = 0;
  316 
  317                 /*
  318                  * Do this early, so jumps to "bad" will free the current
  319                  * option.
  320                  */
  321                 TAILQ_INSERT_TAIL(opts, opt, link);
  322 
  323                 if (auio->uio_segflg == UIO_SYSSPACE) {
  324                         bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
  325                 } else {
  326                         error = copyin(auio->uio_iov[i].iov_base, opt->name,
  327                             namelen);
  328                         if (error)
  329                                 goto bad;
  330                 }
  331                 /* Ensure names are null-terminated strings. */
  332                 if (namelen == 0 || opt->name[namelen - 1] != '\0') {
  333                         error = EINVAL;
  334                         goto bad;
  335                 }
  336                 if (optlen != 0) {
  337                         opt->len = optlen;
  338                         opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
  339                         if (auio->uio_segflg == UIO_SYSSPACE) {
  340                                 bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
  341                                     optlen);
  342                         } else {
  343                                 error = copyin(auio->uio_iov[i + 1].iov_base,
  344                                     opt->value, optlen);
  345                                 if (error)
  346                                         goto bad;
  347                         }
  348                 }
  349         }
  350         vfs_sanitizeopts(opts);
  351         *options = opts;
  352         return (0);
  353 bad:
  354         vfs_freeopts(opts);
  355         return (error);
  356 }
  357 
  358 /*
  359  * Merge the old mount options with the new ones passed
  360  * in the MNT_UPDATE case.
  361  *
  362  * XXX: This function will keep a "nofoo" option in the new
  363  * options.  E.g, if the option's canonical name is "foo",
  364  * "nofoo" ends up in the mount point's active options.
  365  */
  366 static void
  367 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *oldopts)
  368 {
  369         struct vfsopt *opt, *new;
  370 
  371         TAILQ_FOREACH(opt, oldopts, link) {
  372                 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
  373                 new->name = strdup(opt->name, M_MOUNT);
  374                 if (opt->len != 0) {
  375                         new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
  376                         bcopy(opt->value, new->value, opt->len);
  377                 } else
  378                         new->value = NULL;
  379                 new->len = opt->len;
  380                 new->seen = opt->seen;
  381                 TAILQ_INSERT_HEAD(toopts, new, link);
  382         }
  383         vfs_sanitizeopts(toopts);
  384 }
  385 
  386 /*
  387  * Mount a filesystem.
  388  */
  389 #ifndef _SYS_SYSPROTO_H_
  390 struct nmount_args {
  391         struct iovec *iovp;
  392         unsigned int iovcnt;
  393         int flags;
  394 };
  395 #endif
  396 int
  397 sys_nmount(struct thread *td, struct nmount_args *uap)
  398 {
  399         struct uio *auio;
  400         int error;
  401         u_int iovcnt;
  402         uint64_t flags;
  403 
  404         /*
  405          * Mount flags are now 64-bits. On 32-bit archtectures only
  406          * 32-bits are passed in, but from here on everything handles
  407          * 64-bit flags correctly.
  408          */
  409         flags = uap->flags;
  410 
  411         AUDIT_ARG_FFLAGS(flags);
  412         CTR4(KTR_VFS, "%s: iovp %p with iovcnt %d and flags %d", __func__,
  413             uap->iovp, uap->iovcnt, flags);
  414 
  415         /*
  416          * Filter out MNT_ROOTFS.  We do not want clients of nmount() in
  417          * userspace to set this flag, but we must filter it out if we want
  418          * MNT_UPDATE on the root file system to work.
  419          * MNT_ROOTFS should only be set by the kernel when mounting its
  420          * root file system.
  421          */
  422         flags &= ~MNT_ROOTFS;
  423 
  424         iovcnt = uap->iovcnt;
  425         /*
  426          * Check that we have an even number of iovec's
  427          * and that we have at least two options.
  428          */
  429         if ((iovcnt & 1) || (iovcnt < 4)) {
  430                 CTR2(KTR_VFS, "%s: failed for invalid iovcnt %d", __func__,
  431                     uap->iovcnt);
  432                 return (EINVAL);
  433         }
  434 
  435         error = copyinuio(uap->iovp, iovcnt, &auio);
  436         if (error) {
  437                 CTR2(KTR_VFS, "%s: failed for invalid uio op with %d errno",
  438                     __func__, error);
  439                 return (error);
  440         }
  441         error = vfs_donmount(td, flags, auio);
  442 
  443         free(auio, M_IOV);
  444         return (error);
  445 }
  446 
  447 /*
  448  * ---------------------------------------------------------------------
  449  * Various utility functions
  450  */
  451 
  452 /*
  453  * Get a reference on a mount point from a vnode.
  454  *
  455  * The vnode is allowed to be passed unlocked and race against dooming. Note in
  456  * such case there are no guarantees the referenced mount point will still be
  457  * associated with it after the function returns.
  458  */
  459 struct mount *
  460 vfs_ref_from_vp(struct vnode *vp)
  461 {
  462         struct mount *mp;
  463         struct mount_pcpu *mpcpu;
  464 
  465         mp = atomic_load_ptr(&vp->v_mount);
  466         if (__predict_false(mp == NULL)) {
  467                 return (mp);
  468         }
  469         if (vfs_op_thread_enter(mp, mpcpu)) {
  470                 if (__predict_true(mp == vp->v_mount)) {
  471                         vfs_mp_count_add_pcpu(mpcpu, ref, 1);
  472                         vfs_op_thread_exit(mp, mpcpu);
  473                 } else {
  474                         vfs_op_thread_exit(mp, mpcpu);
  475                         mp = NULL;
  476                 }
  477         } else {
  478                 MNT_ILOCK(mp);
  479                 if (mp == vp->v_mount) {
  480                         MNT_REF(mp);
  481                         MNT_IUNLOCK(mp);
  482                 } else {
  483                         MNT_IUNLOCK(mp);
  484                         mp = NULL;
  485                 }
  486         }
  487         return (mp);
  488 }
  489 
  490 void
  491 vfs_ref(struct mount *mp)
  492 {
  493         struct mount_pcpu *mpcpu;
  494 
  495         CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
  496         if (vfs_op_thread_enter(mp, mpcpu)) {
  497                 vfs_mp_count_add_pcpu(mpcpu, ref, 1);
  498                 vfs_op_thread_exit(mp, mpcpu);
  499                 return;
  500         }
  501 
  502         MNT_ILOCK(mp);
  503         MNT_REF(mp);
  504         MNT_IUNLOCK(mp);
  505 }
  506 
  507 void
  508 vfs_rel(struct mount *mp)
  509 {
  510         struct mount_pcpu *mpcpu;
  511 
  512         CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
  513         if (vfs_op_thread_enter(mp, mpcpu)) {
  514                 vfs_mp_count_sub_pcpu(mpcpu, ref, 1);
  515                 vfs_op_thread_exit(mp, mpcpu);
  516                 return;
  517         }
  518 
  519         MNT_ILOCK(mp);
  520         MNT_REL(mp);
  521         MNT_IUNLOCK(mp);
  522 }
  523 
  524 /*
  525  * Allocate and initialize the mount point struct.
  526  */
  527 struct mount *
  528 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath,
  529     struct ucred *cred)
  530 {
  531         struct mount *mp;
  532 
  533         mp = uma_zalloc(mount_zone, M_WAITOK);
  534         bzero(&mp->mnt_startzero,
  535             __rangeof(struct mount, mnt_startzero, mnt_endzero));
  536         mp->mnt_kern_flag = 0;
  537         mp->mnt_flag = 0;
  538         mp->mnt_rootvnode = NULL;
  539         mp->mnt_vnodecovered = NULL;
  540         mp->mnt_op = NULL;
  541         mp->mnt_vfc = NULL;
  542         TAILQ_INIT(&mp->mnt_nvnodelist);
  543         mp->mnt_nvnodelistsize = 0;
  544         TAILQ_INIT(&mp->mnt_lazyvnodelist);
  545         mp->mnt_lazyvnodelistsize = 0;
  546         if (mp->mnt_ref != 0 || mp->mnt_lockref != 0 ||
  547             mp->mnt_writeopcount != 0)
  548                 panic("%s: non-zero counters on new mp %p\n", __func__, mp);
  549         if (mp->mnt_vfs_ops != 1)
  550                 panic("%s: vfs_ops should be 1 but %d found\n", __func__,
  551                     mp->mnt_vfs_ops);
  552         (void) vfs_busy(mp, MBF_NOWAIT);
  553         atomic_add_acq_int(&vfsp->vfc_refcount, 1);
  554         mp->mnt_op = vfsp->vfc_vfsops;
  555         mp->mnt_vfc = vfsp;
  556         mp->mnt_stat.f_type = vfsp->vfc_typenum;
  557         mp->mnt_gen++;
  558         strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
  559         mp->mnt_vnodecovered = vp;
  560         mp->mnt_cred = crdup(cred);
  561         mp->mnt_stat.f_owner = cred->cr_uid;
  562         strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
  563         mp->mnt_iosize_max = DFLTPHYS;
  564 #ifdef MAC
  565         mac_mount_init(mp);
  566         mac_mount_create(cred, mp);
  567 #endif
  568         arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
  569         TAILQ_INIT(&mp->mnt_uppers);
  570         return (mp);
  571 }
  572 
  573 /*
  574  * Destroy the mount struct previously allocated by vfs_mount_alloc().
  575  */
  576 void
  577 vfs_mount_destroy(struct mount *mp)
  578 {
  579 
  580         if (mp->mnt_vfs_ops == 0)
  581                 panic("%s: entered with zero vfs_ops\n", __func__);
  582 
  583         vfs_assert_mount_counters(mp);
  584 
  585         MNT_ILOCK(mp);
  586         mp->mnt_kern_flag |= MNTK_REFEXPIRE;
  587         if (mp->mnt_kern_flag & MNTK_MWAIT) {
  588                 mp->mnt_kern_flag &= ~MNTK_MWAIT;
  589                 wakeup(mp);
  590         }
  591         while (mp->mnt_ref)
  592                 msleep(mp, MNT_MTX(mp), PVFS, "mntref", 0);
  593         KASSERT(mp->mnt_ref == 0,
  594             ("%s: invalid refcount in the drain path @ %s:%d", __func__,
  595             __FILE__, __LINE__));
  596         if (mp->mnt_writeopcount != 0)
  597                 panic("vfs_mount_destroy: nonzero writeopcount");
  598         if (mp->mnt_secondary_writes != 0)
  599                 panic("vfs_mount_destroy: nonzero secondary_writes");
  600         atomic_subtract_rel_int(&mp->mnt_vfc->vfc_refcount, 1);
  601         if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
  602                 struct vnode *vp;
  603 
  604                 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes)
  605                         vn_printf(vp, "dangling vnode ");
  606                 panic("unmount: dangling vnode");
  607         }
  608         KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers"));
  609         if (mp->mnt_nvnodelistsize != 0)
  610                 panic("vfs_mount_destroy: nonzero nvnodelistsize");
  611         if (mp->mnt_lazyvnodelistsize != 0)
  612                 panic("vfs_mount_destroy: nonzero lazyvnodelistsize");
  613         if (mp->mnt_lockref != 0)
  614                 panic("vfs_mount_destroy: nonzero lock refcount");
  615         MNT_IUNLOCK(mp);
  616 
  617         if (mp->mnt_vfs_ops != 1)
  618                 panic("%s: vfs_ops should be 1 but %d found\n", __func__,
  619                     mp->mnt_vfs_ops);
  620 
  621         if (mp->mnt_rootvnode != NULL)
  622                 panic("%s: mount point still has a root vnode %p\n", __func__,
  623                     mp->mnt_rootvnode);
  624 
  625         if (mp->mnt_vnodecovered != NULL)
  626                 vrele(mp->mnt_vnodecovered);
  627 #ifdef MAC
  628         mac_mount_destroy(mp);
  629 #endif
  630         if (mp->mnt_opt != NULL)
  631                 vfs_freeopts(mp->mnt_opt);
  632         crfree(mp->mnt_cred);
  633         uma_zfree(mount_zone, mp);
  634 }
  635 
  636 static bool
  637 vfs_should_downgrade_to_ro_mount(uint64_t fsflags, int error)
  638 {
  639         /* This is an upgrade of an exisiting mount. */
  640         if ((fsflags & MNT_UPDATE) != 0)
  641                 return (false);
  642         /* This is already an R/O mount. */
  643         if ((fsflags & MNT_RDONLY) != 0)
  644                 return (false);
  645 
  646         switch (error) {
  647         case ENODEV:    /* generic, geom, ... */
  648         case EACCES:    /* cam/scsi, ... */
  649         case EROFS:     /* md, mmcsd, ... */
  650                 /*
  651                  * These errors can be returned by the storage layer to signal
  652                  * that the media is read-only.  No harm in the R/O mount
  653                  * attempt if the error was returned for some other reason.
  654                  */
  655                 return (true);
  656         default:
  657                 return (false);
  658         }
  659 }
  660 
  661 int
  662 vfs_donmount(struct thread *td, uint64_t fsflags, struct uio *fsoptions)
  663 {
  664         struct vfsoptlist *optlist;
  665         struct vfsopt *opt, *tmp_opt;
  666         char *fstype, *fspath, *errmsg;
  667         int error, fstypelen, fspathlen, errmsg_len, errmsg_pos;
  668         bool autoro;
  669 
  670         errmsg = fspath = NULL;
  671         errmsg_len = fspathlen = 0;
  672         errmsg_pos = -1;
  673         autoro = default_autoro;
  674 
  675         error = vfs_buildopts(fsoptions, &optlist);
  676         if (error)
  677                 return (error);
  678 
  679         if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0)
  680                 errmsg_pos = vfs_getopt_pos(optlist, "errmsg");
  681 
  682         /*
  683          * We need these two options before the others,
  684          * and they are mandatory for any filesystem.
  685          * Ensure they are NUL terminated as well.
  686          */
  687         fstypelen = 0;
  688         error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
  689         if (error || fstypelen <= 0 || fstype[fstypelen - 1] != '\0') {
  690                 error = EINVAL;
  691                 if (errmsg != NULL)
  692                         strncpy(errmsg, "Invalid fstype", errmsg_len);
  693                 goto bail;
  694         }
  695         fspathlen = 0;
  696         error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
  697         if (error || fspathlen <= 0 || fspath[fspathlen - 1] != '\0') {
  698                 error = EINVAL;
  699                 if (errmsg != NULL)
  700                         strncpy(errmsg, "Invalid fspath", errmsg_len);
  701                 goto bail;
  702         }
  703 
  704         /*
  705          * We need to see if we have the "update" option
  706          * before we call vfs_domount(), since vfs_domount() has special
  707          * logic based on MNT_UPDATE.  This is very important
  708          * when we want to update the root filesystem.
  709          */
  710         TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) {
  711                 int do_freeopt = 0;
  712 
  713                 if (strcmp(opt->name, "update") == 0) {
  714                         fsflags |= MNT_UPDATE;
  715                         do_freeopt = 1;
  716                 }
  717                 else if (strcmp(opt->name, "async") == 0)
  718                         fsflags |= MNT_ASYNC;
  719                 else if (strcmp(opt->name, "force") == 0) {
  720                         fsflags |= MNT_FORCE;
  721                         do_freeopt = 1;
  722                 }
  723                 else if (strcmp(opt->name, "reload") == 0) {
  724                         fsflags |= MNT_RELOAD;
  725                         do_freeopt = 1;
  726                 }
  727                 else if (strcmp(opt->name, "multilabel") == 0)
  728                         fsflags |= MNT_MULTILABEL;
  729                 else if (strcmp(opt->name, "noasync") == 0)
  730                         fsflags &= ~MNT_ASYNC;
  731                 else if (strcmp(opt->name, "noatime") == 0)
  732                         fsflags |= MNT_NOATIME;
  733                 else if (strcmp(opt->name, "atime") == 0) {
  734                         free(opt->name, M_MOUNT);
  735                         opt->name = strdup("nonoatime", M_MOUNT);
  736                 }
  737                 else if (strcmp(opt->name, "noclusterr") == 0)
  738                         fsflags |= MNT_NOCLUSTERR;
  739                 else if (strcmp(opt->name, "clusterr") == 0) {
  740                         free(opt->name, M_MOUNT);
  741                         opt->name = strdup("nonoclusterr", M_MOUNT);
  742                 }
  743                 else if (strcmp(opt->name, "noclusterw") == 0)
  744                         fsflags |= MNT_NOCLUSTERW;
  745                 else if (strcmp(opt->name, "clusterw") == 0) {
  746                         free(opt->name, M_MOUNT);
  747                         opt->name = strdup("nonoclusterw", M_MOUNT);
  748                 }
  749                 else if (strcmp(opt->name, "noexec") == 0)
  750                         fsflags |= MNT_NOEXEC;
  751                 else if (strcmp(opt->name, "exec") == 0) {
  752                         free(opt->name, M_MOUNT);
  753                         opt->name = strdup("nonoexec", M_MOUNT);
  754                 }
  755                 else if (strcmp(opt->name, "nosuid") == 0)
  756                         fsflags |= MNT_NOSUID;
  757                 else if (strcmp(opt->name, "suid") == 0) {
  758                         free(opt->name, M_MOUNT);
  759                         opt->name = strdup("nonosuid", M_MOUNT);
  760                 }
  761                 else if (strcmp(opt->name, "nosymfollow") == 0)
  762                         fsflags |= MNT_NOSYMFOLLOW;
  763                 else if (strcmp(opt->name, "symfollow") == 0) {
  764                         free(opt->name, M_MOUNT);
  765                         opt->name = strdup("nonosymfollow", M_MOUNT);
  766                 }
  767                 else if (strcmp(opt->name, "noro") == 0) {
  768                         fsflags &= ~MNT_RDONLY;
  769                         autoro = false;
  770                 }
  771                 else if (strcmp(opt->name, "rw") == 0) {
  772                         fsflags &= ~MNT_RDONLY;
  773                         autoro = false;
  774                 }
  775                 else if (strcmp(opt->name, "ro") == 0) {
  776                         fsflags |= MNT_RDONLY;
  777                         autoro = false;
  778                 }
  779                 else if (strcmp(opt->name, "rdonly") == 0) {
  780                         free(opt->name, M_MOUNT);
  781                         opt->name = strdup("ro", M_MOUNT);
  782                         fsflags |= MNT_RDONLY;
  783                         autoro = false;
  784                 }
  785                 else if (strcmp(opt->name, "autoro") == 0) {
  786                         do_freeopt = 1;
  787                         autoro = true;
  788                 }
  789                 else if (strcmp(opt->name, "suiddir") == 0)
  790                         fsflags |= MNT_SUIDDIR;
  791                 else if (strcmp(opt->name, "sync") == 0)
  792                         fsflags |= MNT_SYNCHRONOUS;
  793                 else if (strcmp(opt->name, "union") == 0)
  794                         fsflags |= MNT_UNION;
  795                 else if (strcmp(opt->name, "automounted") == 0) {
  796                         fsflags |= MNT_AUTOMOUNTED;
  797                         do_freeopt = 1;
  798                 } else if (strcmp(opt->name, "nocover") == 0) {
  799                         fsflags |= MNT_NOCOVER;
  800                         do_freeopt = 1;
  801                 } else if (strcmp(opt->name, "cover") == 0) {
  802                         fsflags &= ~MNT_NOCOVER;
  803                         do_freeopt = 1;
  804                 } else if (strcmp(opt->name, "emptydir") == 0) {
  805                         fsflags |= MNT_EMPTYDIR;
  806                         do_freeopt = 1;
  807                 } else if (strcmp(opt->name, "noemptydir") == 0) {
  808                         fsflags &= ~MNT_EMPTYDIR;
  809                         do_freeopt = 1;
  810                 }
  811                 if (do_freeopt)
  812                         vfs_freeopt(optlist, opt);
  813         }
  814 
  815         /*
  816          * Be ultra-paranoid about making sure the type and fspath
  817          * variables will fit in our mp buffers, including the
  818          * terminating NUL.
  819          */
  820         if (fstypelen > MFSNAMELEN || fspathlen > MNAMELEN) {
  821                 error = ENAMETOOLONG;
  822                 goto bail;
  823         }
  824 
  825         error = vfs_domount(td, fstype, fspath, fsflags, &optlist);
  826 
  827         /*
  828          * See if we can mount in the read-only mode if the error code suggests
  829          * that it could be possible and the mount options allow for that.
  830          * Never try it if "[no]{ro|rw}" has been explicitly requested and not
  831          * overridden by "autoro".
  832          */
  833         if (autoro && vfs_should_downgrade_to_ro_mount(fsflags, error)) {
  834                 printf("%s: R/W mount failed, possibly R/O media,"
  835                     " trying R/O mount\n", __func__);
  836                 fsflags |= MNT_RDONLY;
  837                 error = vfs_domount(td, fstype, fspath, fsflags, &optlist);
  838         }
  839 bail:
  840         /* copyout the errmsg */
  841         if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt)
  842             && errmsg_len > 0 && errmsg != NULL) {
  843                 if (fsoptions->uio_segflg == UIO_SYSSPACE) {
  844                         bcopy(errmsg,
  845                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
  846                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
  847                 } else {
  848                         copyout(errmsg,
  849                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
  850                             fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
  851                 }
  852         }
  853 
  854         if (optlist != NULL)
  855                 vfs_freeopts(optlist);
  856         return (error);
  857 }
  858 
  859 /*
  860  * Old mount API.
  861  */
  862 #ifndef _SYS_SYSPROTO_H_
  863 struct mount_args {
  864         char    *type;
  865         char    *path;
  866         int     flags;
  867         caddr_t data;
  868 };
  869 #endif
  870 /* ARGSUSED */
  871 int
  872 sys_mount(struct thread *td, struct mount_args *uap)
  873 {
  874         char *fstype;
  875         struct vfsconf *vfsp = NULL;
  876         struct mntarg *ma = NULL;
  877         uint64_t flags;
  878         int error;
  879 
  880         /*
  881          * Mount flags are now 64-bits. On 32-bit architectures only
  882          * 32-bits are passed in, but from here on everything handles
  883          * 64-bit flags correctly.
  884          */
  885         flags = uap->flags;
  886 
  887         AUDIT_ARG_FFLAGS(flags);
  888 
  889         /*
  890          * Filter out MNT_ROOTFS.  We do not want clients of mount() in
  891          * userspace to set this flag, but we must filter it out if we want
  892          * MNT_UPDATE on the root file system to work.
  893          * MNT_ROOTFS should only be set by the kernel when mounting its
  894          * root file system.
  895          */
  896         flags &= ~MNT_ROOTFS;
  897 
  898         fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
  899         error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
  900         if (error) {
  901                 free(fstype, M_TEMP);
  902                 return (error);
  903         }
  904 
  905         AUDIT_ARG_TEXT(fstype);
  906         vfsp = vfs_byname_kld(fstype, td, &error);
  907         free(fstype, M_TEMP);
  908         if (vfsp == NULL)
  909                 return (ENOENT);
  910         if (((vfsp->vfc_flags & VFCF_SBDRY) != 0 &&
  911             vfsp->vfc_vfsops_sd->vfs_cmount == NULL) ||
  912             ((vfsp->vfc_flags & VFCF_SBDRY) == 0 &&
  913             vfsp->vfc_vfsops->vfs_cmount == NULL))
  914                 return (EOPNOTSUPP);
  915 
  916         ma = mount_argsu(ma, "fstype", uap->type, MFSNAMELEN);
  917         ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
  918         ma = mount_argb(ma, flags & MNT_RDONLY, "noro");
  919         ma = mount_argb(ma, !(flags & MNT_NOSUID), "nosuid");
  920         ma = mount_argb(ma, !(flags & MNT_NOEXEC), "noexec");
  921 
  922         if ((vfsp->vfc_flags & VFCF_SBDRY) != 0)
  923                 return (vfsp->vfc_vfsops_sd->vfs_cmount(ma, uap->data, flags));
  924         return (vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, flags));
  925 }
  926 
  927 /*
  928  * vfs_domount_first(): first file system mount (not update)
  929  */
  930 static int
  931 vfs_domount_first(
  932         struct thread *td,              /* Calling thread. */
  933         struct vfsconf *vfsp,           /* File system type. */
  934         char *fspath,                   /* Mount path. */
  935         struct vnode *vp,               /* Vnode to be covered. */
  936         uint64_t fsflags,               /* Flags common to all filesystems. */
  937         struct vfsoptlist **optlist     /* Options local to the filesystem. */
  938         )
  939 {
  940         struct vattr va;
  941         struct mount *mp;
  942         struct vnode *newdp, *rootvp;
  943         int error, error1;
  944         bool unmounted;
  945 
  946         ASSERT_VOP_ELOCKED(vp, __func__);
  947         KASSERT((fsflags & MNT_UPDATE) == 0, ("MNT_UPDATE shouldn't be here"));
  948 
  949         /*
  950          * If the jail of the calling thread lacks permission for this type of
  951          * file system, or is trying to cover its own root, deny immediately.
  952          */
  953         if (jailed(td->td_ucred) && (!prison_allow(td->td_ucred,
  954             vfsp->vfc_prison_flag) || vp == td->td_ucred->cr_prison->pr_root)) {
  955                 vput(vp);
  956                 return (EPERM);
  957         }
  958 
  959         /*
  960          * If the user is not root, ensure that they own the directory
  961          * onto which we are attempting to mount.
  962          */
  963         error = VOP_GETATTR(vp, &va, td->td_ucred);
  964         if (error == 0 && va.va_uid != td->td_ucred->cr_uid)
  965                 error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN);
  966         if (error == 0)
  967                 error = vinvalbuf(vp, V_SAVE, 0, 0);
  968         if (error == 0 && vp->v_type != VDIR)
  969                 error = ENOTDIR;
  970         if (error == 0 && (fsflags & MNT_EMPTYDIR) != 0)
  971                 error = vfs_emptydir(vp);
  972         if (error == 0) {
  973                 VI_LOCK(vp);
  974                 if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL)
  975                         vp->v_iflag |= VI_MOUNT;
  976                 else
  977                         error = EBUSY;
  978                 VI_UNLOCK(vp);
  979         }
  980         if (error != 0) {
  981                 vput(vp);
  982                 return (error);
  983         }
  984         vn_seqc_write_begin(vp);
  985         VOP_UNLOCK(vp);
  986 
  987         /* Allocate and initialize the filesystem. */
  988         mp = vfs_mount_alloc(vp, vfsp, fspath, td->td_ucred);
  989         /* XXXMAC: pass to vfs_mount_alloc? */
  990         mp->mnt_optnew = *optlist;
  991         /* Set the mount level flags. */
  992         mp->mnt_flag = (fsflags & (MNT_UPDATEMASK | MNT_ROOTFS | MNT_RDONLY));
  993 
  994         /*
  995          * Mount the filesystem.
  996          * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
  997          * get.  No freeing of cn_pnbuf.
  998          */
  999         error1 = 0;
 1000         unmounted = true;
 1001         if ((error = VFS_MOUNT(mp)) != 0 ||
 1002             (error1 = VFS_STATFS(mp, &mp->mnt_stat)) != 0 ||
 1003             (error1 = VFS_ROOT(mp, LK_EXCLUSIVE, &newdp)) != 0) {
 1004                 rootvp = NULL;
 1005                 if (error1 != 0) {
 1006                         MPASS(error == 0);
 1007                         rootvp = vfs_cache_root_clear(mp);
 1008                         if (rootvp != NULL) {
 1009                                 vhold(rootvp);
 1010                                 vrele(rootvp);
 1011                         }
 1012                         (void)vn_start_write(NULL, &mp, V_WAIT);
 1013                         MNT_ILOCK(mp);
 1014                         mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_UNMOUNTF;
 1015                         MNT_IUNLOCK(mp);
 1016                         VFS_PURGE(mp);
 1017                         error = VFS_UNMOUNT(mp, 0);
 1018                         vn_finished_write(mp);
 1019                         if (error != 0) {
 1020                                 printf(
 1021                     "failed post-mount (%d): rollback unmount returned %d\n",
 1022                                     error1, error);
 1023                                 unmounted = false;
 1024                         }
 1025                         error = error1;
 1026                 }
 1027                 vfs_unbusy(mp);
 1028                 mp->mnt_vnodecovered = NULL;
 1029                 if (unmounted) {
 1030                         /* XXXKIB wait for mnt_lockref drain? */
 1031                         vfs_mount_destroy(mp);
 1032                 }
 1033                 VI_LOCK(vp);
 1034                 vp->v_iflag &= ~VI_MOUNT;
 1035                 VI_UNLOCK(vp);
 1036                 if (rootvp != NULL) {
 1037                         vn_seqc_write_end(rootvp);
 1038                         vdrop(rootvp);
 1039                 }
 1040                 vn_seqc_write_end(vp);
 1041                 vrele(vp);
 1042                 return (error);
 1043         }
 1044         vn_seqc_write_begin(newdp);
 1045         VOP_UNLOCK(newdp);
 1046 
 1047         if (mp->mnt_opt != NULL)
 1048                 vfs_freeopts(mp->mnt_opt);
 1049         mp->mnt_opt = mp->mnt_optnew;
 1050         *optlist = NULL;
 1051 
 1052         /*
 1053          * Prevent external consumers of mount options from reading mnt_optnew.
 1054          */
 1055         mp->mnt_optnew = NULL;
 1056 
 1057         MNT_ILOCK(mp);
 1058         if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
 1059             (mp->mnt_kern_flag & MNTK_NOASYNC) == 0)
 1060                 mp->mnt_kern_flag |= MNTK_ASYNC;
 1061         else
 1062                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1063         MNT_IUNLOCK(mp);
 1064 
 1065         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1066         cache_purge(vp);
 1067         VI_LOCK(vp);
 1068         vp->v_iflag &= ~VI_MOUNT;
 1069         vn_irflag_set_locked(vp, VIRF_MOUNTPOINT);
 1070         vp->v_mountedhere = mp;
 1071         VI_UNLOCK(vp);
 1072         /* Place the new filesystem at the end of the mount list. */
 1073         mtx_lock(&mountlist_mtx);
 1074         TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 1075         mtx_unlock(&mountlist_mtx);
 1076         vfs_event_signal(NULL, VQ_MOUNT, 0);
 1077         vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY);
 1078         VOP_UNLOCK(vp);
 1079         EVENTHANDLER_DIRECT_INVOKE(vfs_mounted, mp, newdp, td);
 1080         VOP_UNLOCK(newdp);
 1081         mount_devctl_event("MOUNT", mp, false);
 1082         mountcheckdirs(vp, newdp);
 1083         vn_seqc_write_end(vp);
 1084         vn_seqc_write_end(newdp);
 1085         vrele(newdp);
 1086         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 1087                 vfs_allocate_syncvnode(mp);
 1088         vfs_op_exit(mp);
 1089         vfs_unbusy(mp);
 1090         return (0);
 1091 }
 1092 
 1093 /*
 1094  * vfs_domount_update(): update of mounted file system
 1095  */
 1096 static int
 1097 vfs_domount_update(
 1098         struct thread *td,              /* Calling thread. */
 1099         struct vnode *vp,               /* Mount point vnode. */
 1100         uint64_t fsflags,               /* Flags common to all filesystems. */
 1101         struct vfsoptlist **optlist     /* Options local to the filesystem. */
 1102         )
 1103 {
 1104         struct export_args export;
 1105         struct o2export_args o2export;
 1106         struct vnode *rootvp;
 1107         void *bufp;
 1108         struct mount *mp;
 1109         int error, export_error, i, len;
 1110         uint64_t flag;
 1111         gid_t *grps;
 1112 
 1113         ASSERT_VOP_ELOCKED(vp, __func__);
 1114         KASSERT((fsflags & MNT_UPDATE) != 0, ("MNT_UPDATE should be here"));
 1115         mp = vp->v_mount;
 1116 
 1117         if ((vp->v_vflag & VV_ROOT) == 0) {
 1118                 if (vfs_copyopt(*optlist, "export", &export, sizeof(export))
 1119                     == 0)
 1120                         error = EXDEV;
 1121                 else
 1122                         error = EINVAL;
 1123                 vput(vp);
 1124                 return (error);
 1125         }
 1126 
 1127         /*
 1128          * We only allow the filesystem to be reloaded if it
 1129          * is currently mounted read-only.
 1130          */
 1131         flag = mp->mnt_flag;
 1132         if ((fsflags & MNT_RELOAD) != 0 && (flag & MNT_RDONLY) == 0) {
 1133                 vput(vp);
 1134                 return (EOPNOTSUPP);    /* Needs translation */
 1135         }
 1136         /*
 1137          * Only privileged root, or (if MNT_USER is set) the user that
 1138          * did the original mount is permitted to update it.
 1139          */
 1140         error = vfs_suser(mp, td);
 1141         if (error != 0) {
 1142                 vput(vp);
 1143                 return (error);
 1144         }
 1145         if (vfs_busy(mp, MBF_NOWAIT)) {
 1146                 vput(vp);
 1147                 return (EBUSY);
 1148         }
 1149         VI_LOCK(vp);
 1150         if ((vp->v_iflag & VI_MOUNT) != 0 || vp->v_mountedhere != NULL) {
 1151                 VI_UNLOCK(vp);
 1152                 vfs_unbusy(mp);
 1153                 vput(vp);
 1154                 return (EBUSY);
 1155         }
 1156         vp->v_iflag |= VI_MOUNT;
 1157         VI_UNLOCK(vp);
 1158         VOP_UNLOCK(vp);
 1159 
 1160         vfs_op_enter(mp);
 1161         vn_seqc_write_begin(vp);
 1162 
 1163         rootvp = NULL;
 1164         MNT_ILOCK(mp);
 1165         if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
 1166                 MNT_IUNLOCK(mp);
 1167                 error = EBUSY;
 1168                 goto end;
 1169         }
 1170         mp->mnt_flag &= ~MNT_UPDATEMASK;
 1171         mp->mnt_flag |= fsflags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE |
 1172             MNT_SNAPSHOT | MNT_ROOTFS | MNT_UPDATEMASK | MNT_RDONLY);
 1173         if ((mp->mnt_flag & MNT_ASYNC) == 0)
 1174                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1175         rootvp = vfs_cache_root_clear(mp);
 1176         MNT_IUNLOCK(mp);
 1177         mp->mnt_optnew = *optlist;
 1178         vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
 1179 
 1180         /*
 1181          * Mount the filesystem.
 1182          * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
 1183          * get.  No freeing of cn_pnbuf.
 1184          */
 1185         error = VFS_MOUNT(mp);
 1186 
 1187         export_error = 0;
 1188         /* Process the export option. */
 1189         if (error == 0 && vfs_getopt(mp->mnt_optnew, "export", &bufp,
 1190             &len) == 0) {
 1191                 /* Assume that there is only 1 ABI for each length. */
 1192                 switch (len) {
 1193                 case (sizeof(struct oexport_args)):
 1194                         bzero(&o2export, sizeof(o2export));
 1195                         /* FALLTHROUGH */
 1196                 case (sizeof(o2export)):
 1197                         bcopy(bufp, &o2export, len);
 1198                         export.ex_flags = (uint64_t)o2export.ex_flags;
 1199                         export.ex_root = o2export.ex_root;
 1200                         export.ex_uid = o2export.ex_anon.cr_uid;
 1201                         export.ex_groups = NULL;
 1202                         export.ex_ngroups = o2export.ex_anon.cr_ngroups;
 1203                         if (export.ex_ngroups > 0) {
 1204                                 if (export.ex_ngroups <= XU_NGROUPS) {
 1205                                         export.ex_groups = malloc(
 1206                                             export.ex_ngroups * sizeof(gid_t),
 1207                                             M_TEMP, M_WAITOK);
 1208                                         for (i = 0; i < export.ex_ngroups; i++)
 1209                                                 export.ex_groups[i] =
 1210                                                   o2export.ex_anon.cr_groups[i];
 1211                                 } else
 1212                                         export_error = EINVAL;
 1213                         } else if (export.ex_ngroups < 0)
 1214                                 export_error = EINVAL;
 1215                         export.ex_addr = o2export.ex_addr;
 1216                         export.ex_addrlen = o2export.ex_addrlen;
 1217                         export.ex_mask = o2export.ex_mask;
 1218                         export.ex_masklen = o2export.ex_masklen;
 1219                         export.ex_indexfile = o2export.ex_indexfile;
 1220                         export.ex_numsecflavors = o2export.ex_numsecflavors;
 1221                         if (export.ex_numsecflavors < MAXSECFLAVORS) {
 1222                                 for (i = 0; i < export.ex_numsecflavors; i++)
 1223                                         export.ex_secflavors[i] =
 1224                                             o2export.ex_secflavors[i];
 1225                         } else
 1226                                 export_error = EINVAL;
 1227                         if (export_error == 0)
 1228                                 export_error = vfs_export(mp, &export);
 1229                         free(export.ex_groups, M_TEMP);
 1230                         break;
 1231                 case (sizeof(export)):
 1232                         bcopy(bufp, &export, len);
 1233                         grps = NULL;
 1234                         if (export.ex_ngroups > 0) {
 1235                                 if (export.ex_ngroups <= NGROUPS_MAX) {
 1236                                         grps = malloc(export.ex_ngroups *
 1237                                             sizeof(gid_t), M_TEMP, M_WAITOK);
 1238                                         export_error = copyin(export.ex_groups,
 1239                                             grps, export.ex_ngroups *
 1240                                             sizeof(gid_t));
 1241                                         if (export_error == 0)
 1242                                                 export.ex_groups = grps;
 1243                                 } else
 1244                                         export_error = EINVAL;
 1245                         } else if (export.ex_ngroups == 0)
 1246                                 export.ex_groups = NULL;
 1247                         else
 1248                                 export_error = EINVAL;
 1249                         if (export_error == 0)
 1250                                 export_error = vfs_export(mp, &export);
 1251                         free(grps, M_TEMP);
 1252                         break;
 1253                 default:
 1254                         export_error = EINVAL;
 1255                         break;
 1256                 }
 1257         }
 1258 
 1259         MNT_ILOCK(mp);
 1260         if (error == 0) {
 1261                 mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE |
 1262                     MNT_SNAPSHOT);
 1263         } else {
 1264                 /*
 1265                  * If we fail, restore old mount flags. MNT_QUOTA is special,
 1266                  * because it is not part of MNT_UPDATEMASK, but it could have
 1267                  * changed in the meantime if quotactl(2) was called.
 1268                  * All in all we want current value of MNT_QUOTA, not the old
 1269                  * one.
 1270                  */
 1271                 mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | (flag & ~MNT_QUOTA);
 1272         }
 1273         if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
 1274             (mp->mnt_kern_flag & MNTK_NOASYNC) == 0)
 1275                 mp->mnt_kern_flag |= MNTK_ASYNC;
 1276         else
 1277                 mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1278         MNT_IUNLOCK(mp);
 1279 
 1280         if (error != 0)
 1281                 goto end;
 1282 
 1283         mount_devctl_event("REMOUNT", mp, true);
 1284         if (mp->mnt_opt != NULL)
 1285                 vfs_freeopts(mp->mnt_opt);
 1286         mp->mnt_opt = mp->mnt_optnew;
 1287         *optlist = NULL;
 1288         (void)VFS_STATFS(mp, &mp->mnt_stat);
 1289         /*
 1290          * Prevent external consumers of mount options from reading
 1291          * mnt_optnew.
 1292          */
 1293         mp->mnt_optnew = NULL;
 1294 
 1295         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 1296                 vfs_allocate_syncvnode(mp);
 1297         else
 1298                 vfs_deallocate_syncvnode(mp);
 1299 end:
 1300         vfs_op_exit(mp);
 1301         if (rootvp != NULL) {
 1302                 vn_seqc_write_end(rootvp);
 1303                 vrele(rootvp);
 1304         }
 1305         vn_seqc_write_end(vp);
 1306         vfs_unbusy(mp);
 1307         VI_LOCK(vp);
 1308         vp->v_iflag &= ~VI_MOUNT;
 1309         VI_UNLOCK(vp);
 1310         vrele(vp);
 1311         return (error != 0 ? error : export_error);
 1312 }
 1313 
 1314 /*
 1315  * vfs_domount(): actually attempt a filesystem mount.
 1316  */
 1317 static int
 1318 vfs_domount(
 1319         struct thread *td,              /* Calling thread. */
 1320         const char *fstype,             /* Filesystem type. */
 1321         char *fspath,                   /* Mount path. */
 1322         uint64_t fsflags,               /* Flags common to all filesystems. */
 1323         struct vfsoptlist **optlist     /* Options local to the filesystem. */
 1324         )
 1325 {
 1326         struct vfsconf *vfsp;
 1327         struct nameidata nd;
 1328         struct vnode *vp;
 1329         char *pathbuf;
 1330         int error;
 1331 
 1332         /*
 1333          * Be ultra-paranoid about making sure the type and fspath
 1334          * variables will fit in our mp buffers, including the
 1335          * terminating NUL.
 1336          */
 1337         if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
 1338                 return (ENAMETOOLONG);
 1339 
 1340         if (jailed(td->td_ucred) || usermount == 0) {
 1341                 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0)
 1342                         return (error);
 1343         }
 1344 
 1345         /*
 1346          * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
 1347          */
 1348         if (fsflags & MNT_EXPORTED) {
 1349                 error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED);
 1350                 if (error)
 1351                         return (error);
 1352         }
 1353         if (fsflags & MNT_SUIDDIR) {
 1354                 error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR);
 1355                 if (error)
 1356                         return (error);
 1357         }
 1358         /*
 1359          * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users.
 1360          */
 1361         if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) {
 1362                 if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0)
 1363                         fsflags |= MNT_NOSUID | MNT_USER;
 1364         }
 1365 
 1366         /* Load KLDs before we lock the covered vnode to avoid reversals. */
 1367         vfsp = NULL;
 1368         if ((fsflags & MNT_UPDATE) == 0) {
 1369                 /* Don't try to load KLDs if we're mounting the root. */
 1370                 if (fsflags & MNT_ROOTFS)
 1371                         vfsp = vfs_byname(fstype);
 1372                 else
 1373                         vfsp = vfs_byname_kld(fstype, td, &error);
 1374                 if (vfsp == NULL)
 1375                         return (ENODEV);
 1376         }
 1377 
 1378         /*
 1379          * Get vnode to be covered or mount point's vnode in case of MNT_UPDATE.
 1380          */
 1381         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1,
 1382             UIO_SYSSPACE, fspath, td);
 1383         error = namei(&nd);
 1384         if (error != 0)
 1385                 return (error);
 1386         NDFREE(&nd, NDF_ONLY_PNBUF);
 1387         vp = nd.ni_vp;
 1388         if ((fsflags & MNT_UPDATE) == 0) {
 1389                 if ((vp->v_vflag & VV_ROOT) != 0 &&
 1390                     (fsflags & MNT_NOCOVER) != 0) {
 1391                         vput(vp);
 1392                         return (EBUSY);
 1393                 }
 1394                 pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
 1395                 strcpy(pathbuf, fspath);
 1396                 error = vn_path_to_global_path(td, vp, pathbuf, MNAMELEN);
 1397                 if (error == 0) {
 1398                         error = vfs_domount_first(td, vfsp, pathbuf, vp,
 1399                             fsflags, optlist);
 1400                 }
 1401                 free(pathbuf, M_TEMP);
 1402         } else
 1403                 error = vfs_domount_update(td, vp, fsflags, optlist);
 1404 
 1405         return (error);
 1406 }
 1407 
 1408 /*
 1409  * Unmount a filesystem.
 1410  *
 1411  * Note: unmount takes a path to the vnode mounted on as argument, not
 1412  * special file (as before).
 1413  */
 1414 #ifndef _SYS_SYSPROTO_H_
 1415 struct unmount_args {
 1416         char    *path;
 1417         int     flags;
 1418 };
 1419 #endif
 1420 /* ARGSUSED */
 1421 int
 1422 sys_unmount(struct thread *td, struct unmount_args *uap)
 1423 {
 1424 
 1425         return (kern_unmount(td, uap->path, uap->flags));
 1426 }
 1427 
 1428 int
 1429 kern_unmount(struct thread *td, const char *path, int flags)
 1430 {
 1431         struct nameidata nd;
 1432         struct mount *mp;
 1433         char *pathbuf;
 1434         int error, id0, id1;
 1435 
 1436         AUDIT_ARG_VALUE(flags);
 1437         if (jailed(td->td_ucred) || usermount == 0) {
 1438                 error = priv_check(td, PRIV_VFS_UNMOUNT);
 1439                 if (error)
 1440                         return (error);
 1441         }
 1442 
 1443         pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
 1444         error = copyinstr(path, pathbuf, MNAMELEN, NULL);
 1445         if (error) {
 1446                 free(pathbuf, M_TEMP);
 1447                 return (error);
 1448         }
 1449         if (flags & MNT_BYFSID) {
 1450                 AUDIT_ARG_TEXT(pathbuf);
 1451                 /* Decode the filesystem ID. */
 1452                 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
 1453                         free(pathbuf, M_TEMP);
 1454                         return (EINVAL);
 1455                 }
 1456 
 1457                 mtx_lock(&mountlist_mtx);
 1458                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 1459                         if (mp->mnt_stat.f_fsid.val[0] == id0 &&
 1460                             mp->mnt_stat.f_fsid.val[1] == id1) {
 1461                                 vfs_ref(mp);
 1462                                 break;
 1463                         }
 1464                 }
 1465                 mtx_unlock(&mountlist_mtx);
 1466         } else {
 1467                 /*
 1468                  * Try to find global path for path argument.
 1469                  */
 1470                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1,
 1471                     UIO_SYSSPACE, pathbuf, td);
 1472                 if (namei(&nd) == 0) {
 1473                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1474                         error = vn_path_to_global_path(td, nd.ni_vp, pathbuf,
 1475                             MNAMELEN);
 1476                         if (error == 0)
 1477                                 vput(nd.ni_vp);
 1478                 }
 1479                 mtx_lock(&mountlist_mtx);
 1480                 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 1481                         if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0) {
 1482                                 vfs_ref(mp);
 1483                                 break;
 1484                         }
 1485                 }
 1486                 mtx_unlock(&mountlist_mtx);
 1487         }
 1488         free(pathbuf, M_TEMP);
 1489         if (mp == NULL) {
 1490                 /*
 1491                  * Previously we returned ENOENT for a nonexistent path and
 1492                  * EINVAL for a non-mountpoint.  We cannot tell these apart
 1493                  * now, so in the !MNT_BYFSID case return the more likely
 1494                  * EINVAL for compatibility.
 1495                  */
 1496                 return ((flags & MNT_BYFSID) ? ENOENT : EINVAL);
 1497         }
 1498 
 1499         /*
 1500          * Don't allow unmounting the root filesystem.
 1501          */
 1502         if (mp->mnt_flag & MNT_ROOTFS) {
 1503                 vfs_rel(mp);
 1504                 return (EINVAL);
 1505         }
 1506         error = dounmount(mp, flags, td);
 1507         return (error);
 1508 }
 1509 
 1510 /*
 1511  * Return error if any of the vnodes, ignoring the root vnode
 1512  * and the syncer vnode, have non-zero usecount.
 1513  *
 1514  * This function is purely advisory - it can return false positives
 1515  * and negatives.
 1516  */
 1517 static int
 1518 vfs_check_usecounts(struct mount *mp)
 1519 {
 1520         struct vnode *vp, *mvp;
 1521 
 1522         MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 1523                 if ((vp->v_vflag & VV_ROOT) == 0 && vp->v_type != VNON &&
 1524                     vp->v_usecount != 0) {
 1525                         VI_UNLOCK(vp);
 1526                         MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 1527                         return (EBUSY);
 1528                 }
 1529                 VI_UNLOCK(vp);
 1530         }
 1531 
 1532         return (0);
 1533 }
 1534 
 1535 static void
 1536 dounmount_cleanup(struct mount *mp, struct vnode *coveredvp, int mntkflags)
 1537 {
 1538 
 1539         mtx_assert(MNT_MTX(mp), MA_OWNED);
 1540         mp->mnt_kern_flag &= ~mntkflags;
 1541         if ((mp->mnt_kern_flag & MNTK_MWAIT) != 0) {
 1542                 mp->mnt_kern_flag &= ~MNTK_MWAIT;
 1543                 wakeup(mp);
 1544         }
 1545         vfs_op_exit_locked(mp);
 1546         MNT_IUNLOCK(mp);
 1547         if (coveredvp != NULL) {
 1548                 VOP_UNLOCK(coveredvp);
 1549                 vdrop(coveredvp);
 1550         }
 1551         vn_finished_write(mp);
 1552 }
 1553 
 1554 /*
 1555  * There are various reference counters associated with the mount point.
 1556  * Normally it is permitted to modify them without taking the mnt ilock,
 1557  * but this behavior can be temporarily disabled if stable value is needed
 1558  * or callers are expected to block (e.g. to not allow new users during
 1559  * forced unmount).
 1560  */
 1561 void
 1562 vfs_op_enter(struct mount *mp)
 1563 {
 1564         struct mount_pcpu *mpcpu;
 1565         int cpu;
 1566 
 1567         MNT_ILOCK(mp);
 1568         mp->mnt_vfs_ops++;
 1569         if (mp->mnt_vfs_ops > 1) {
 1570                 MNT_IUNLOCK(mp);
 1571                 return;
 1572         }
 1573         vfs_op_barrier_wait(mp);
 1574         CPU_FOREACH(cpu) {
 1575                 mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1576 
 1577                 mp->mnt_ref += mpcpu->mntp_ref;
 1578                 mpcpu->mntp_ref = 0;
 1579 
 1580                 mp->mnt_lockref += mpcpu->mntp_lockref;
 1581                 mpcpu->mntp_lockref = 0;
 1582 
 1583                 mp->mnt_writeopcount += mpcpu->mntp_writeopcount;
 1584                 mpcpu->mntp_writeopcount = 0;
 1585         }
 1586         if (mp->mnt_ref <= 0 || mp->mnt_lockref < 0 || mp->mnt_writeopcount < 0)
 1587                 panic("%s: invalid count(s) on mp %p: ref %d lockref %d writeopcount %d\n",
 1588                     __func__, mp, mp->mnt_ref, mp->mnt_lockref, mp->mnt_writeopcount);
 1589         MNT_IUNLOCK(mp);
 1590         vfs_assert_mount_counters(mp);
 1591 }
 1592 
 1593 void
 1594 vfs_op_exit_locked(struct mount *mp)
 1595 {
 1596 
 1597         mtx_assert(MNT_MTX(mp), MA_OWNED);
 1598 
 1599         if (mp->mnt_vfs_ops <= 0)
 1600                 panic("%s: invalid vfs_ops count %d for mp %p\n",
 1601                     __func__, mp->mnt_vfs_ops, mp);
 1602         mp->mnt_vfs_ops--;
 1603 }
 1604 
 1605 void
 1606 vfs_op_exit(struct mount *mp)
 1607 {
 1608 
 1609         MNT_ILOCK(mp);
 1610         vfs_op_exit_locked(mp);
 1611         MNT_IUNLOCK(mp);
 1612 }
 1613 
 1614 struct vfs_op_barrier_ipi {
 1615         struct mount *mp;
 1616         struct smp_rendezvous_cpus_retry_arg srcra;
 1617 };
 1618 
 1619 static void
 1620 vfs_op_action_func(void *arg)
 1621 {
 1622         struct vfs_op_barrier_ipi *vfsopipi;
 1623         struct mount *mp;
 1624 
 1625         vfsopipi = __containerof(arg, struct vfs_op_barrier_ipi, srcra);
 1626         mp = vfsopipi->mp;
 1627 
 1628         if (!vfs_op_thread_entered(mp))
 1629                 smp_rendezvous_cpus_done(arg);
 1630 }
 1631 
 1632 static void
 1633 vfs_op_wait_func(void *arg, int cpu)
 1634 {
 1635         struct vfs_op_barrier_ipi *vfsopipi;
 1636         struct mount *mp;
 1637         struct mount_pcpu *mpcpu;
 1638 
 1639         vfsopipi = __containerof(arg, struct vfs_op_barrier_ipi, srcra);
 1640         mp = vfsopipi->mp;
 1641 
 1642         mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1643         while (atomic_load_int(&mpcpu->mntp_thread_in_ops))
 1644                 cpu_spinwait();
 1645 }
 1646 
 1647 void
 1648 vfs_op_barrier_wait(struct mount *mp)
 1649 {
 1650         struct vfs_op_barrier_ipi vfsopipi;
 1651 
 1652         vfsopipi.mp = mp;
 1653 
 1654         smp_rendezvous_cpus_retry(all_cpus,
 1655             smp_no_rendezvous_barrier,
 1656             vfs_op_action_func,
 1657             smp_no_rendezvous_barrier,
 1658             vfs_op_wait_func,
 1659             &vfsopipi.srcra);
 1660 }
 1661 
 1662 #ifdef DIAGNOSTIC
 1663 void
 1664 vfs_assert_mount_counters(struct mount *mp)
 1665 {
 1666         struct mount_pcpu *mpcpu;
 1667         int cpu;
 1668 
 1669         if (mp->mnt_vfs_ops == 0)
 1670                 return;
 1671 
 1672         CPU_FOREACH(cpu) {
 1673                 mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1674                 if (mpcpu->mntp_ref != 0 ||
 1675                     mpcpu->mntp_lockref != 0 ||
 1676                     mpcpu->mntp_writeopcount != 0)
 1677                         vfs_dump_mount_counters(mp);
 1678         }
 1679 }
 1680 
 1681 void
 1682 vfs_dump_mount_counters(struct mount *mp)
 1683 {
 1684         struct mount_pcpu *mpcpu;
 1685         int ref, lockref, writeopcount;
 1686         int cpu;
 1687 
 1688         printf("%s: mp %p vfs_ops %d\n", __func__, mp, mp->mnt_vfs_ops);
 1689 
 1690         printf("        ref : ");
 1691         ref = mp->mnt_ref;
 1692         CPU_FOREACH(cpu) {
 1693                 mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1694                 printf("%d ", mpcpu->mntp_ref);
 1695                 ref += mpcpu->mntp_ref;
 1696         }
 1697         printf("\n");
 1698         printf("    lockref : ");
 1699         lockref = mp->mnt_lockref;
 1700         CPU_FOREACH(cpu) {
 1701                 mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1702                 printf("%d ", mpcpu->mntp_lockref);
 1703                 lockref += mpcpu->mntp_lockref;
 1704         }
 1705         printf("\n");
 1706         printf("writeopcount: ");
 1707         writeopcount = mp->mnt_writeopcount;
 1708         CPU_FOREACH(cpu) {
 1709                 mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1710                 printf("%d ", mpcpu->mntp_writeopcount);
 1711                 writeopcount += mpcpu->mntp_writeopcount;
 1712         }
 1713         printf("\n");
 1714 
 1715         printf("counter       struct total\n");
 1716         printf("ref             %-5d  %-5d\n", mp->mnt_ref, ref);
 1717         printf("lockref         %-5d  %-5d\n", mp->mnt_lockref, lockref);
 1718         printf("writeopcount    %-5d  %-5d\n", mp->mnt_writeopcount, writeopcount);
 1719 
 1720         panic("invalid counts on struct mount");
 1721 }
 1722 #endif
 1723 
 1724 int
 1725 vfs_mount_fetch_counter(struct mount *mp, enum mount_counter which)
 1726 {
 1727         struct mount_pcpu *mpcpu;
 1728         int cpu, sum;
 1729 
 1730         switch (which) {
 1731         case MNT_COUNT_REF:
 1732                 sum = mp->mnt_ref;
 1733                 break;
 1734         case MNT_COUNT_LOCKREF:
 1735                 sum = mp->mnt_lockref;
 1736                 break;
 1737         case MNT_COUNT_WRITEOPCOUNT:
 1738                 sum = mp->mnt_writeopcount;
 1739                 break;
 1740         }
 1741 
 1742         CPU_FOREACH(cpu) {
 1743                 mpcpu = vfs_mount_pcpu_remote(mp, cpu);
 1744                 switch (which) {
 1745                 case MNT_COUNT_REF:
 1746                         sum += mpcpu->mntp_ref;
 1747                         break;
 1748                 case MNT_COUNT_LOCKREF:
 1749                         sum += mpcpu->mntp_lockref;
 1750                         break;
 1751                 case MNT_COUNT_WRITEOPCOUNT:
 1752                         sum += mpcpu->mntp_writeopcount;
 1753                         break;
 1754                 }
 1755         }
 1756         return (sum);
 1757 }
 1758 
 1759 /*
 1760  * Do the actual filesystem unmount.
 1761  */
 1762 int
 1763 dounmount(struct mount *mp, int flags, struct thread *td)
 1764 {
 1765         struct vnode *coveredvp, *rootvp;
 1766         int error;
 1767         uint64_t async_flag;
 1768         int mnt_gen_r;
 1769 
 1770         if ((coveredvp = mp->mnt_vnodecovered) != NULL) {
 1771                 mnt_gen_r = mp->mnt_gen;
 1772                 VI_LOCK(coveredvp);
 1773                 vholdl(coveredvp);
 1774                 vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY);
 1775                 /*
 1776                  * Check for mp being unmounted while waiting for the
 1777                  * covered vnode lock.
 1778                  */
 1779                 if (coveredvp->v_mountedhere != mp ||
 1780                     coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) {
 1781                         VOP_UNLOCK(coveredvp);
 1782                         vdrop(coveredvp);
 1783                         vfs_rel(mp);
 1784                         return (EBUSY);
 1785                 }
 1786         }
 1787 
 1788         /*
 1789          * Only privileged root, or (if MNT_USER is set) the user that did the
 1790          * original mount is permitted to unmount this filesystem.
 1791          */
 1792         error = vfs_suser(mp, td);
 1793         if (error != 0) {
 1794                 if (coveredvp != NULL) {
 1795                         VOP_UNLOCK(coveredvp);
 1796                         vdrop(coveredvp);
 1797                 }
 1798                 vfs_rel(mp);
 1799                 return (error);
 1800         }
 1801 
 1802         vfs_op_enter(mp);
 1803 
 1804         vn_start_write(NULL, &mp, V_WAIT | V_MNTREF);
 1805         MNT_ILOCK(mp);
 1806         if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 ||
 1807             (mp->mnt_flag & MNT_UPDATE) != 0 ||
 1808             !TAILQ_EMPTY(&mp->mnt_uppers)) {
 1809                 dounmount_cleanup(mp, coveredvp, 0);
 1810                 return (EBUSY);
 1811         }
 1812         mp->mnt_kern_flag |= MNTK_UNMOUNT;
 1813         rootvp = vfs_cache_root_clear(mp);
 1814         if (coveredvp != NULL)
 1815                 vn_seqc_write_begin(coveredvp);
 1816         if (flags & MNT_NONBUSY) {
 1817                 MNT_IUNLOCK(mp);
 1818                 error = vfs_check_usecounts(mp);
 1819                 MNT_ILOCK(mp);
 1820                 if (error != 0) {
 1821                         vn_seqc_write_end(coveredvp);
 1822                         dounmount_cleanup(mp, coveredvp, MNTK_UNMOUNT);
 1823                         if (rootvp != NULL) {
 1824                                 vn_seqc_write_end(rootvp);
 1825                                 vrele(rootvp);
 1826                         }
 1827                         return (error);
 1828                 }
 1829         }
 1830         /* Allow filesystems to detect that a forced unmount is in progress. */
 1831         if (flags & MNT_FORCE) {
 1832                 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
 1833                 MNT_IUNLOCK(mp);
 1834                 /*
 1835                  * Must be done after setting MNTK_UNMOUNTF and before
 1836                  * waiting for mnt_lockref to become 0.
 1837                  */
 1838                 VFS_PURGE(mp);
 1839                 MNT_ILOCK(mp);
 1840         }
 1841         error = 0;
 1842         if (mp->mnt_lockref) {
 1843                 mp->mnt_kern_flag |= MNTK_DRAINING;
 1844                 error = msleep(&mp->mnt_lockref, MNT_MTX(mp), PVFS,
 1845                     "mount drain", 0);
 1846         }
 1847         MNT_IUNLOCK(mp);
 1848         KASSERT(mp->mnt_lockref == 0,
 1849             ("%s: invalid lock refcount in the drain path @ %s:%d",
 1850             __func__, __FILE__, __LINE__));
 1851         KASSERT(error == 0,
 1852             ("%s: invalid return value for msleep in the drain path @ %s:%d",
 1853             __func__, __FILE__, __LINE__));
 1854 
 1855         /*
 1856          * We want to keep the vnode around so that we can vn_seqc_write_end
 1857          * after we are done with unmount. Downgrade our reference to a mere
 1858          * hold count so that we don't interefere with anything.
 1859          */
 1860         if (rootvp != NULL) {
 1861                 vhold(rootvp);
 1862                 vrele(rootvp);
 1863         }
 1864 
 1865         if (mp->mnt_flag & MNT_EXPUBLIC)
 1866                 vfs_setpublicfs(NULL, NULL, NULL);
 1867 
 1868         vfs_periodic(mp, MNT_WAIT);
 1869         MNT_ILOCK(mp);
 1870         async_flag = mp->mnt_flag & MNT_ASYNC;
 1871         mp->mnt_flag &= ~MNT_ASYNC;
 1872         mp->mnt_kern_flag &= ~MNTK_ASYNC;
 1873         MNT_IUNLOCK(mp);
 1874         vfs_deallocate_syncvnode(mp);
 1875         error = VFS_UNMOUNT(mp, flags);
 1876         vn_finished_write(mp);
 1877         /*
 1878          * If we failed to flush the dirty blocks for this mount point,
 1879          * undo all the cdir/rdir and rootvnode changes we made above.
 1880          * Unless we failed to do so because the device is reporting that
 1881          * it doesn't exist anymore.
 1882          */
 1883         if (error && error != ENXIO) {
 1884                 MNT_ILOCK(mp);
 1885                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
 1886                         MNT_IUNLOCK(mp);
 1887                         vfs_allocate_syncvnode(mp);
 1888                         MNT_ILOCK(mp);
 1889                 }
 1890                 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
 1891                 mp->mnt_flag |= async_flag;
 1892                 if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
 1893                     (mp->mnt_kern_flag & MNTK_NOASYNC) == 0)
 1894                         mp->mnt_kern_flag |= MNTK_ASYNC;
 1895                 if (mp->mnt_kern_flag & MNTK_MWAIT) {
 1896                         mp->mnt_kern_flag &= ~MNTK_MWAIT;
 1897                         wakeup(mp);
 1898                 }
 1899                 vfs_op_exit_locked(mp);
 1900                 MNT_IUNLOCK(mp);
 1901                 if (coveredvp) {
 1902                         vn_seqc_write_end(coveredvp);
 1903                         VOP_UNLOCK(coveredvp);
 1904                         vdrop(coveredvp);
 1905                 }
 1906                 if (rootvp != NULL) {
 1907                         vn_seqc_write_end(rootvp);
 1908                         vdrop(rootvp);
 1909                 }
 1910                 return (error);
 1911         }
 1912         mtx_lock(&mountlist_mtx);
 1913         TAILQ_REMOVE(&mountlist, mp, mnt_list);
 1914         mtx_unlock(&mountlist_mtx);
 1915         EVENTHANDLER_DIRECT_INVOKE(vfs_unmounted, mp, td);
 1916         if (coveredvp != NULL) {
 1917                 VI_LOCK(coveredvp);
 1918                 vn_irflag_unset_locked(coveredvp, VIRF_MOUNTPOINT);
 1919                 coveredvp->v_mountedhere = NULL;
 1920                 vn_seqc_write_end_locked(coveredvp);
 1921                 VI_UNLOCK(coveredvp);
 1922                 VOP_UNLOCK(coveredvp);
 1923                 vdrop(coveredvp);
 1924         }
 1925         mount_devctl_event("UNMOUNT", mp, false);
 1926         if (rootvp != NULL) {
 1927                 vn_seqc_write_end(rootvp);
 1928                 vdrop(rootvp);
 1929         }
 1930         vfs_event_signal(NULL, VQ_UNMOUNT, 0);
 1931         if (rootvnode != NULL && mp == rootvnode->v_mount) {
 1932                 vrele(rootvnode);
 1933                 rootvnode = NULL;
 1934         }
 1935         if (mp == rootdevmp)
 1936                 rootdevmp = NULL;
 1937         vfs_mount_destroy(mp);
 1938         return (0);
 1939 }
 1940 
 1941 /*
 1942  * Report errors during filesystem mounting.
 1943  */
 1944 void
 1945 vfs_mount_error(struct mount *mp, const char *fmt, ...)
 1946 {
 1947         struct vfsoptlist *moptlist = mp->mnt_optnew;
 1948         va_list ap;
 1949         int error, len;
 1950         char *errmsg;
 1951 
 1952         error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len);
 1953         if (error || errmsg == NULL || len <= 0)
 1954                 return;
 1955 
 1956         va_start(ap, fmt);
 1957         vsnprintf(errmsg, (size_t)len, fmt, ap);
 1958         va_end(ap);
 1959 }
 1960 
 1961 void
 1962 vfs_opterror(struct vfsoptlist *opts, const char *fmt, ...)
 1963 {
 1964         va_list ap;
 1965         int error, len;
 1966         char *errmsg;
 1967 
 1968         error = vfs_getopt(opts, "errmsg", (void **)&errmsg, &len);
 1969         if (error || errmsg == NULL || len <= 0)
 1970                 return;
 1971 
 1972         va_start(ap, fmt);
 1973         vsnprintf(errmsg, (size_t)len, fmt, ap);
 1974         va_end(ap);
 1975 }
 1976 
 1977 /*
 1978  * ---------------------------------------------------------------------
 1979  * Functions for querying mount options/arguments from filesystems.
 1980  */
 1981 
 1982 /*
 1983  * Check that no unknown options are given
 1984  */
 1985 int
 1986 vfs_filteropt(struct vfsoptlist *opts, const char **legal)
 1987 {
 1988         struct vfsopt *opt;
 1989         char errmsg[255];
 1990         const char **t, *p, *q;
 1991         int ret = 0;
 1992 
 1993         TAILQ_FOREACH(opt, opts, link) {
 1994                 p = opt->name;
 1995                 q = NULL;
 1996                 if (p[0] == 'n' && p[1] == 'o')
 1997                         q = p + 2;
 1998                 for(t = global_opts; *t != NULL; t++) {
 1999                         if (strcmp(*t, p) == 0)
 2000                                 break;
 2001                         if (q != NULL) {
 2002                                 if (strcmp(*t, q) == 0)
 2003                                         break;
 2004                         }
 2005                 }
 2006                 if (*t != NULL)
 2007                         continue;
 2008                 for(t = legal; *t != NULL; t++) {
 2009                         if (strcmp(*t, p) == 0)
 2010                                 break;
 2011                         if (q != NULL) {
 2012                                 if (strcmp(*t, q) == 0)
 2013                                         break;
 2014                         }
 2015                 }
 2016                 if (*t != NULL)
 2017                         continue;
 2018                 snprintf(errmsg, sizeof(errmsg),
 2019                     "mount option <%s> is unknown", p);
 2020                 ret = EINVAL;
 2021         }
 2022         if (ret != 0) {
 2023                 TAILQ_FOREACH(opt, opts, link) {
 2024                         if (strcmp(opt->name, "errmsg") == 0) {
 2025                                 strncpy((char *)opt->value, errmsg, opt->len);
 2026                                 break;
 2027                         }
 2028                 }
 2029                 if (opt == NULL)
 2030                         printf("%s\n", errmsg);
 2031         }
 2032         return (ret);
 2033 }
 2034 
 2035 /*
 2036  * Get a mount option by its name.
 2037  *
 2038  * Return 0 if the option was found, ENOENT otherwise.
 2039  * If len is non-NULL it will be filled with the length
 2040  * of the option. If buf is non-NULL, it will be filled
 2041  * with the address of the option.
 2042  */
 2043 int
 2044 vfs_getopt(struct vfsoptlist *opts, const char *name, void **buf, int *len)
 2045 {
 2046         struct vfsopt *opt;
 2047 
 2048         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 2049 
 2050         TAILQ_FOREACH(opt, opts, link) {
 2051                 if (strcmp(name, opt->name) == 0) {
 2052                         opt->seen = 1;
 2053                         if (len != NULL)
 2054                                 *len = opt->len;
 2055                         if (buf != NULL)
 2056                                 *buf = opt->value;
 2057                         return (0);
 2058                 }
 2059         }
 2060         return (ENOENT);
 2061 }
 2062 
 2063 int
 2064 vfs_getopt_pos(struct vfsoptlist *opts, const char *name)
 2065 {
 2066         struct vfsopt *opt;
 2067 
 2068         if (opts == NULL)
 2069                 return (-1);
 2070 
 2071         TAILQ_FOREACH(opt, opts, link) {
 2072                 if (strcmp(name, opt->name) == 0) {
 2073                         opt->seen = 1;
 2074                         return (opt->pos);
 2075                 }
 2076         }
 2077         return (-1);
 2078 }
 2079 
 2080 int
 2081 vfs_getopt_size(struct vfsoptlist *opts, const char *name, off_t *value)
 2082 {
 2083         char *opt_value, *vtp;
 2084         quad_t iv;
 2085         int error, opt_len;
 2086 
 2087         error = vfs_getopt(opts, name, (void **)&opt_value, &opt_len);
 2088         if (error != 0)
 2089                 return (error);
 2090         if (opt_len == 0 || opt_value == NULL)
 2091                 return (EINVAL);
 2092         if (opt_value[0] == '\0' || opt_value[opt_len - 1] != '\0')
 2093                 return (EINVAL);
 2094         iv = strtoq(opt_value, &vtp, 0);
 2095         if (vtp == opt_value || (vtp[0] != '\0' && vtp[1] != '\0'))
 2096                 return (EINVAL);
 2097         if (iv < 0)
 2098                 return (EINVAL);
 2099         switch (vtp[0]) {
 2100         case 't': case 'T':
 2101                 iv *= 1024;
 2102                 /* FALLTHROUGH */
 2103         case 'g': case 'G':
 2104                 iv *= 1024;
 2105                 /* FALLTHROUGH */
 2106         case 'm': case 'M':
 2107                 iv *= 1024;
 2108                 /* FALLTHROUGH */
 2109         case 'k': case 'K':
 2110                 iv *= 1024;
 2111         case '\0':
 2112                 break;
 2113         default:
 2114                 return (EINVAL);
 2115         }
 2116         *value = iv;
 2117 
 2118         return (0);
 2119 }
 2120 
 2121 char *
 2122 vfs_getopts(struct vfsoptlist *opts, const char *name, int *error)
 2123 {
 2124         struct vfsopt *opt;
 2125 
 2126         *error = 0;
 2127         TAILQ_FOREACH(opt, opts, link) {
 2128                 if (strcmp(name, opt->name) != 0)
 2129                         continue;
 2130                 opt->seen = 1;
 2131                 if (opt->len == 0 ||
 2132                     ((char *)opt->value)[opt->len - 1] != '\0') {
 2133                         *error = EINVAL;
 2134                         return (NULL);
 2135                 }
 2136                 return (opt->value);
 2137         }
 2138         *error = ENOENT;
 2139         return (NULL);
 2140 }
 2141 
 2142 int
 2143 vfs_flagopt(struct vfsoptlist *opts, const char *name, uint64_t *w,
 2144         uint64_t val)
 2145 {
 2146         struct vfsopt *opt;
 2147 
 2148         TAILQ_FOREACH(opt, opts, link) {
 2149                 if (strcmp(name, opt->name) == 0) {
 2150                         opt->seen = 1;
 2151                         if (w != NULL)
 2152                                 *w |= val;
 2153                         return (1);
 2154                 }
 2155         }
 2156         if (w != NULL)
 2157                 *w &= ~val;
 2158         return (0);
 2159 }
 2160 
 2161 int
 2162 vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...)
 2163 {
 2164         va_list ap;
 2165         struct vfsopt *opt;
 2166         int ret;
 2167 
 2168         KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 2169 
 2170         TAILQ_FOREACH(opt, opts, link) {
 2171                 if (strcmp(name, opt->name) != 0)
 2172                         continue;
 2173                 opt->seen = 1;
 2174                 if (opt->len == 0 || opt->value == NULL)
 2175                         return (0);
 2176                 if (((char *)opt->value)[opt->len - 1] != '\0')
 2177                         return (0);
 2178                 va_start(ap, fmt);
 2179                 ret = vsscanf(opt->value, fmt, ap);
 2180                 va_end(ap);
 2181                 return (ret);
 2182         }
 2183         return (0);
 2184 }
 2185 
 2186 int
 2187 vfs_setopt(struct vfsoptlist *opts, const char *name, void *value, int len)
 2188 {
 2189         struct vfsopt *opt;
 2190 
 2191         TAILQ_FOREACH(opt, opts, link) {
 2192                 if (strcmp(name, opt->name) != 0)
 2193                         continue;
 2194                 opt->seen = 1;
 2195                 if (opt->value == NULL)
 2196                         opt->len = len;
 2197                 else {
 2198                         if (opt->len != len)
 2199                                 return (EINVAL);
 2200                         bcopy(value, opt->value, len);
 2201                 }
 2202                 return (0);
 2203         }
 2204         return (ENOENT);
 2205 }
 2206 
 2207 int
 2208 vfs_setopt_part(struct vfsoptlist *opts, const char *name, void *value, int len)
 2209 {
 2210         struct vfsopt *opt;
 2211 
 2212         TAILQ_FOREACH(opt, opts, link) {
 2213                 if (strcmp(name, opt->name) != 0)
 2214                         continue;
 2215                 opt->seen = 1;
 2216                 if (opt->value == NULL)
 2217                         opt->len = len;
 2218                 else {
 2219                         if (opt->len < len)
 2220                                 return (EINVAL);
 2221                         opt->len = len;
 2222                         bcopy(value, opt->value, len);
 2223                 }
 2224                 return (0);
 2225         }
 2226         return (ENOENT);
 2227 }
 2228 
 2229 int
 2230 vfs_setopts(struct vfsoptlist *opts, const char *name, const char *value)
 2231 {
 2232         struct vfsopt *opt;
 2233 
 2234         TAILQ_FOREACH(opt, opts, link) {
 2235                 if (strcmp(name, opt->name) != 0)
 2236                         continue;
 2237                 opt->seen = 1;
 2238                 if (opt->value == NULL)
 2239                         opt->len = strlen(value) + 1;
 2240                 else if (strlcpy(opt->value, value, opt->len) >= opt->len)
 2241                         return (EINVAL);
 2242                 return (0);
 2243         }
 2244         return (ENOENT);
 2245 }
 2246 
 2247 /*
 2248  * Find and copy a mount option.
 2249  *
 2250  * The size of the buffer has to be specified
 2251  * in len, if it is not the same length as the
 2252  * mount option, EINVAL is returned.
 2253  * Returns ENOENT if the option is not found.
 2254  */
 2255 int
 2256 vfs_copyopt(struct vfsoptlist *opts, const char *name, void *dest, int len)
 2257 {
 2258         struct vfsopt *opt;
 2259 
 2260         KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
 2261 
 2262         TAILQ_FOREACH(opt, opts, link) {
 2263                 if (strcmp(name, opt->name) == 0) {
 2264                         opt->seen = 1;
 2265                         if (len != opt->len)
 2266                                 return (EINVAL);
 2267                         bcopy(opt->value, dest, opt->len);
 2268                         return (0);
 2269                 }
 2270         }
 2271         return (ENOENT);
 2272 }
 2273 
 2274 int
 2275 __vfs_statfs(struct mount *mp, struct statfs *sbp)
 2276 {
 2277 
 2278         /*
 2279          * Filesystems only fill in part of the structure for updates, we
 2280          * have to read the entirety first to get all content.
 2281          */
 2282         if (sbp != &mp->mnt_stat)
 2283                 memcpy(sbp, &mp->mnt_stat, sizeof(*sbp));
 2284 
 2285         /*
 2286          * Set these in case the underlying filesystem fails to do so.
 2287          */
 2288         sbp->f_version = STATFS_VERSION;
 2289         sbp->f_namemax = NAME_MAX;
 2290         sbp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 2291 
 2292         return (mp->mnt_op->vfs_statfs(mp, sbp));
 2293 }
 2294 
 2295 void
 2296 vfs_mountedfrom(struct mount *mp, const char *from)
 2297 {
 2298 
 2299         bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname);
 2300         strlcpy(mp->mnt_stat.f_mntfromname, from,
 2301             sizeof mp->mnt_stat.f_mntfromname);
 2302 }
 2303 
 2304 /*
 2305  * ---------------------------------------------------------------------
 2306  * This is the api for building mount args and mounting filesystems from
 2307  * inside the kernel.
 2308  *
 2309  * The API works by accumulation of individual args.  First error is
 2310  * latched.
 2311  *
 2312  * XXX: should be documented in new manpage kernel_mount(9)
 2313  */
 2314 
 2315 /* A memory allocation which must be freed when we are done */
 2316 struct mntaarg {
 2317         SLIST_ENTRY(mntaarg)    next;
 2318 };
 2319 
 2320 /* The header for the mount arguments */
 2321 struct mntarg {
 2322         struct iovec *v;
 2323         int len;
 2324         int error;
 2325         SLIST_HEAD(, mntaarg)   list;
 2326 };
 2327 
 2328 /*
 2329  * Add a boolean argument.
 2330  *
 2331  * flag is the boolean value.
 2332  * name must start with "no".
 2333  */
 2334 struct mntarg *
 2335 mount_argb(struct mntarg *ma, int flag, const char *name)
 2336 {
 2337 
 2338         KASSERT(name[0] == 'n' && name[1] == 'o',
 2339             ("mount_argb(...,%s): name must start with 'no'", name));
 2340 
 2341         return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0));
 2342 }
 2343 
 2344 /*
 2345  * Add an argument printf style
 2346  */
 2347 struct mntarg *
 2348 mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...)
 2349 {
 2350         va_list ap;
 2351         struct mntaarg *maa;
 2352         struct sbuf *sb;
 2353         int len;
 2354 
 2355         if (ma == NULL) {
 2356                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2357                 SLIST_INIT(&ma->list);
 2358         }
 2359         if (ma->error)
 2360                 return (ma);
 2361 
 2362         ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 2363             M_MOUNT, M_WAITOK);
 2364         ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 2365         ma->v[ma->len].iov_len = strlen(name) + 1;
 2366         ma->len++;
 2367 
 2368         sb = sbuf_new_auto();
 2369         va_start(ap, fmt);
 2370         sbuf_vprintf(sb, fmt, ap);
 2371         va_end(ap);
 2372         sbuf_finish(sb);
 2373         len = sbuf_len(sb) + 1;
 2374         maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 2375         SLIST_INSERT_HEAD(&ma->list, maa, next);
 2376         bcopy(sbuf_data(sb), maa + 1, len);
 2377         sbuf_delete(sb);
 2378 
 2379         ma->v[ma->len].iov_base = maa + 1;
 2380         ma->v[ma->len].iov_len = len;
 2381         ma->len++;
 2382 
 2383         return (ma);
 2384 }
 2385 
 2386 /*
 2387  * Add an argument which is a userland string.
 2388  */
 2389 struct mntarg *
 2390 mount_argsu(struct mntarg *ma, const char *name, const void *val, int len)
 2391 {
 2392         struct mntaarg *maa;
 2393         char *tbuf;
 2394 
 2395         if (val == NULL)
 2396                 return (ma);
 2397         if (ma == NULL) {
 2398                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2399                 SLIST_INIT(&ma->list);
 2400         }
 2401         if (ma->error)
 2402                 return (ma);
 2403         maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 2404         SLIST_INSERT_HEAD(&ma->list, maa, next);
 2405         tbuf = (void *)(maa + 1);
 2406         ma->error = copyinstr(val, tbuf, len, NULL);
 2407         return (mount_arg(ma, name, tbuf, -1));
 2408 }
 2409 
 2410 /*
 2411  * Plain argument.
 2412  *
 2413  * If length is -1, treat value as a C string.
 2414  */
 2415 struct mntarg *
 2416 mount_arg(struct mntarg *ma, const char *name, const void *val, int len)
 2417 {
 2418 
 2419         if (ma == NULL) {
 2420                 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 2421                 SLIST_INIT(&ma->list);
 2422         }
 2423         if (ma->error)
 2424                 return (ma);
 2425 
 2426         ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 2427             M_MOUNT, M_WAITOK);
 2428         ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 2429         ma->v[ma->len].iov_len = strlen(name) + 1;
 2430         ma->len++;
 2431 
 2432         ma->v[ma->len].iov_base = (void *)(uintptr_t)val;
 2433         if (len < 0)
 2434                 ma->v[ma->len].iov_len = strlen(val) + 1;
 2435         else
 2436                 ma->v[ma->len].iov_len = len;
 2437         ma->len++;
 2438         return (ma);
 2439 }
 2440 
 2441 /*
 2442  * Free a mntarg structure
 2443  */
 2444 static void
 2445 free_mntarg(struct mntarg *ma)
 2446 {
 2447         struct mntaarg *maa;
 2448 
 2449         while (!SLIST_EMPTY(&ma->list)) {
 2450                 maa = SLIST_FIRST(&ma->list);
 2451                 SLIST_REMOVE_HEAD(&ma->list, next);
 2452                 free(maa, M_MOUNT);
 2453         }
 2454         free(ma->v, M_MOUNT);
 2455         free(ma, M_MOUNT);
 2456 }
 2457 
 2458 /*
 2459  * Mount a filesystem
 2460  */
 2461 int
 2462 kernel_mount(struct mntarg *ma, uint64_t flags)
 2463 {
 2464         struct uio auio;
 2465         int error;
 2466 
 2467         KASSERT(ma != NULL, ("kernel_mount NULL ma"));
 2468         KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v"));
 2469         KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len));
 2470 
 2471         auio.uio_iov = ma->v;
 2472         auio.uio_iovcnt = ma->len;
 2473         auio.uio_segflg = UIO_SYSSPACE;
 2474 
 2475         error = ma->error;
 2476         if (!error)
 2477                 error = vfs_donmount(curthread, flags, &auio);
 2478         free_mntarg(ma);
 2479         return (error);
 2480 }
 2481 
 2482 /*
 2483  * A printflike function to mount a filesystem.
 2484  */
 2485 int
 2486 kernel_vmount(int flags, ...)
 2487 {
 2488         struct mntarg *ma = NULL;
 2489         va_list ap;
 2490         const char *cp;
 2491         const void *vp;
 2492         int error;
 2493 
 2494         va_start(ap, flags);
 2495         for (;;) {
 2496                 cp = va_arg(ap, const char *);
 2497                 if (cp == NULL)
 2498                         break;
 2499                 vp = va_arg(ap, const void *);
 2500                 ma = mount_arg(ma, cp, vp, (vp != NULL ? -1 : 0));
 2501         }
 2502         va_end(ap);
 2503 
 2504         error = kernel_mount(ma, flags);
 2505         return (error);
 2506 }
 2507 
 2508 /* Map from mount options to printable formats. */
 2509 static struct mntoptnames optnames[] = {
 2510         MNTOPT_NAMES
 2511 };
 2512 
 2513 #define DEVCTL_LEN 1024
 2514 static void
 2515 mount_devctl_event(const char *type, struct mount *mp, bool donew)
 2516 {
 2517         const uint8_t *cp;
 2518         struct mntoptnames *fp;
 2519         struct sbuf sb;
 2520         struct statfs *sfp = &mp->mnt_stat;
 2521         char *buf;
 2522 
 2523         buf = malloc(DEVCTL_LEN, M_MOUNT, M_NOWAIT);
 2524         if (buf == NULL)
 2525                 return;
 2526         sbuf_new(&sb, buf, DEVCTL_LEN, SBUF_FIXEDLEN);
 2527         sbuf_cpy(&sb, "mount-point=\"");
 2528         devctl_safe_quote_sb(&sb, sfp->f_mntonname);
 2529         sbuf_cat(&sb, "\" mount-dev=\"");
 2530         devctl_safe_quote_sb(&sb, sfp->f_mntfromname);
 2531         sbuf_cat(&sb, "\" mount-type=\"");
 2532         devctl_safe_quote_sb(&sb, sfp->f_fstypename);
 2533         sbuf_cat(&sb, "\" fsid=0x");
 2534         cp = (const uint8_t *)&sfp->f_fsid.val[0];
 2535         for (int i = 0; i < sizeof(sfp->f_fsid); i++)
 2536                 sbuf_printf(&sb, "%02x", cp[i]);
 2537         sbuf_printf(&sb, " owner=%u flags=\"", sfp->f_owner);
 2538         for (fp = optnames; fp->o_opt != 0; fp++) {
 2539                 if ((mp->mnt_flag & fp->o_opt) != 0) {
 2540                         sbuf_cat(&sb, fp->o_name);
 2541                         sbuf_putc(&sb, ';');
 2542                 }
 2543         }
 2544         sbuf_putc(&sb, '"');
 2545         sbuf_finish(&sb);
 2546 
 2547         /*
 2548          * Options are not published because the form of the options depends on
 2549          * the file system and may include binary data. In addition, they don't
 2550          * necessarily provide enough useful information to be actionable when
 2551          * devd processes them.
 2552          */
 2553 
 2554         if (sbuf_error(&sb) == 0)
 2555                 devctl_notify("VFS", "FS", type, sbuf_data(&sb));
 2556         sbuf_delete(&sb);
 2557         free(buf, M_MOUNT);
 2558 }
 2559 
 2560 /*
 2561  * Suspend write operations on all local writeable filesystems.  Does
 2562  * full sync of them in the process.
 2563  *
 2564  * Iterate over the mount points in reverse order, suspending most
 2565  * recently mounted filesystems first.  It handles a case where a
 2566  * filesystem mounted from a md(4) vnode-backed device should be
 2567  * suspended before the filesystem that owns the vnode.
 2568  */
 2569 void
 2570 suspend_all_fs(void)
 2571 {
 2572         struct mount *mp;
 2573         int error;
 2574 
 2575         mtx_lock(&mountlist_mtx);
 2576         TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 2577                 error = vfs_busy(mp, MBF_MNTLSTLOCK | MBF_NOWAIT);
 2578                 if (error != 0)
 2579                         continue;
 2580                 if ((mp->mnt_flag & (MNT_RDONLY | MNT_LOCAL)) != MNT_LOCAL ||
 2581                     (mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
 2582                         mtx_lock(&mountlist_mtx);
 2583                         vfs_unbusy(mp);
 2584                         continue;
 2585                 }
 2586                 error = vfs_write_suspend(mp, 0);
 2587                 if (error == 0) {
 2588                         MNT_ILOCK(mp);
 2589                         MPASS((mp->mnt_kern_flag & MNTK_SUSPEND_ALL) == 0);
 2590                         mp->mnt_kern_flag |= MNTK_SUSPEND_ALL;
 2591                         MNT_IUNLOCK(mp);
 2592                         mtx_lock(&mountlist_mtx);
 2593                 } else {
 2594                         printf("suspend of %s failed, error %d\n",
 2595                             mp->mnt_stat.f_mntonname, error);
 2596                         mtx_lock(&mountlist_mtx);
 2597                         vfs_unbusy(mp);
 2598                 }
 2599         }
 2600         mtx_unlock(&mountlist_mtx);
 2601 }
 2602 
 2603 void
 2604 resume_all_fs(void)
 2605 {
 2606         struct mount *mp;
 2607 
 2608         mtx_lock(&mountlist_mtx);
 2609         TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 2610                 if ((mp->mnt_kern_flag & MNTK_SUSPEND_ALL) == 0)
 2611                         continue;
 2612                 mtx_unlock(&mountlist_mtx);
 2613                 MNT_ILOCK(mp);
 2614                 MPASS((mp->mnt_kern_flag & MNTK_SUSPEND) != 0);
 2615                 mp->mnt_kern_flag &= ~MNTK_SUSPEND_ALL;
 2616                 MNT_IUNLOCK(mp);
 2617                 vfs_write_resume(mp, 0);
 2618                 mtx_lock(&mountlist_mtx);
 2619                 vfs_unbusy(mp);
 2620         }
 2621         mtx_unlock(&mountlist_mtx);
 2622 }
Cache object: f6068f8bf9b38de58cc441433e263e19
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_mount.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_mount.c