ffs_vfsops.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1989, 1991, 1993, 1994
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 4. Neither the name of the University nor the names of its contributors
   14  *    may be used to endorse or promote products derived from this software
   15  *    without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  *      @(#)ffs_vfsops.c        8.31 (Berkeley) 5/20/95
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD$");
   34 
   35 #include "opt_mac.h"
   36 #include "opt_quota.h"
   37 #include "opt_ufs.h"
   38 #include "opt_ffs.h"
   39 
   40 #include <sys/param.h>
   41 #include <sys/systm.h>
   42 #include <sys/namei.h>
   43 #include <sys/priv.h>
   44 #include <sys/proc.h>
   45 #include <sys/kernel.h>
   46 #include <sys/vnode.h>
   47 #include <sys/mount.h>
   48 #include <sys/bio.h>
   49 #include <sys/buf.h>
   50 #include <sys/conf.h>
   51 #include <sys/fcntl.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mutex.h>
   54 
   55 #include <security/mac/mac_framework.h>
   56 
   57 #include <ufs/ufs/extattr.h>
   58 #include <ufs/ufs/gjournal.h>
   59 #include <ufs/ufs/quota.h>
   60 #include <ufs/ufs/ufsmount.h>
   61 #include <ufs/ufs/inode.h>
   62 #include <ufs/ufs/ufs_extern.h>
   63 
   64 #include <ufs/ffs/fs.h>
   65 #include <ufs/ffs/ffs_extern.h>
   66 
   67 #include <vm/vm.h>
   68 #include <vm/uma.h>
   69 #include <vm/vm_page.h>
   70 
   71 #include <geom/geom.h>
   72 #include <geom/geom_vfs.h>
   73 
   74 static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
   75 
   76 static int      ffs_reload(struct mount *, struct thread *);
   77 static int      ffs_mountfs(struct vnode *, struct mount *, struct thread *);
   78 static void     ffs_oldfscompat_read(struct fs *, struct ufsmount *,
   79                     ufs2_daddr_t);
   80 static void     ffs_oldfscompat_write(struct fs *, struct ufsmount *);
   81 static void     ffs_ifree(struct ufsmount *ump, struct inode *ip);
   82 static vfs_init_t ffs_init;
   83 static vfs_uninit_t ffs_uninit;
   84 static vfs_extattrctl_t ffs_extattrctl;
   85 static vfs_cmount_t ffs_cmount;
   86 static vfs_unmount_t ffs_unmount;
   87 static vfs_mount_t ffs_mount;
   88 static vfs_statfs_t ffs_statfs;
   89 static vfs_fhtovp_t ffs_fhtovp;
   90 static vfs_sync_t ffs_sync;
   91 
   92 static struct vfsops ufs_vfsops = {
   93         .vfs_extattrctl =       ffs_extattrctl,
   94         .vfs_fhtovp =           ffs_fhtovp,
   95         .vfs_init =             ffs_init,
   96         .vfs_mount =            ffs_mount,
   97         .vfs_cmount =           ffs_cmount,
   98         .vfs_quotactl =         ufs_quotactl,
   99         .vfs_root =             ufs_root,
  100         .vfs_statfs =           ffs_statfs,
  101         .vfs_sync =             ffs_sync,
  102         .vfs_uninit =           ffs_uninit,
  103         .vfs_unmount =          ffs_unmount,
  104         .vfs_vget =             ffs_vget,
  105 };
  106 
  107 VFS_SET(ufs_vfsops, ufs, 0);
  108 MODULE_VERSION(ufs, 1);
  109 
  110 static b_strategy_t ffs_geom_strategy;
  111 static b_write_t ffs_bufwrite;
  112 
  113 static struct buf_ops ffs_ops = {
  114         .bop_name =     "FFS",
  115         .bop_write =    ffs_bufwrite,
  116         .bop_strategy = ffs_geom_strategy,
  117         .bop_sync =     bufsync,
  118 #ifdef NO_FFS_SNAPSHOT
  119         .bop_bdflush =  bufbdflush,
  120 #else
  121         .bop_bdflush =  ffs_bdflush,
  122 #endif
  123 };
  124 
  125 static const char *ffs_opts[] = { "acls", "async", "atime", "clusterr",
  126     "clusterw", "exec", "export", "force", "from", "multilabel", 
  127     "snapshot", "suid", "suiddir", "symfollow", "sync",
  128     "union", NULL };
  129 
  130 static int
  131 ffs_mount(struct mount *mp, struct thread *td)
  132 {
  133         struct vnode *devvp;
  134         struct ufsmount *ump = 0;
  135         struct fs *fs;
  136         int error, flags;
  137         u_int mntorflags, mntandnotflags;
  138         mode_t accessmode;
  139         struct nameidata ndp;
  140         char *fspec;
  141 
  142         if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
  143                 return (EINVAL);
  144         if (uma_inode == NULL) {
  145                 uma_inode = uma_zcreate("FFS inode",
  146                     sizeof(struct inode), NULL, NULL, NULL, NULL,
  147                     UMA_ALIGN_PTR, 0);
  148                 uma_ufs1 = uma_zcreate("FFS1 dinode",
  149                     sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
  150                     UMA_ALIGN_PTR, 0);
  151                 uma_ufs2 = uma_zcreate("FFS2 dinode",
  152                     sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
  153                     UMA_ALIGN_PTR, 0);
  154         }
  155 
  156         fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
  157         if (error)
  158                 return (error);
  159 
  160         mntorflags = 0;
  161         mntandnotflags = 0;
  162         if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
  163                 mntorflags |= MNT_ACLS;
  164 
  165         if (vfs_getopt(mp->mnt_optnew, "async", NULL, NULL) == 0)
  166                 mntorflags |= MNT_ASYNC;
  167 
  168         if (vfs_getopt(mp->mnt_optnew, "force", NULL, NULL) == 0)
  169                 mntorflags |= MNT_FORCE;
  170 
  171         if (vfs_getopt(mp->mnt_optnew, "multilabel", NULL, NULL) == 0)
  172                 mntorflags |= MNT_MULTILABEL;
  173 
  174         if (vfs_getopt(mp->mnt_optnew, "noasync", NULL, NULL) == 0)
  175                 mntandnotflags |= MNT_ASYNC;
  176 
  177         if (vfs_getopt(mp->mnt_optnew, "noatime", NULL, NULL) == 0)
  178                 mntorflags |= MNT_NOATIME;
  179 
  180         if (vfs_getopt(mp->mnt_optnew, "noclusterr", NULL, NULL) == 0)
  181                 mntorflags |= MNT_NOCLUSTERR;
  182 
  183         if (vfs_getopt(mp->mnt_optnew, "noclusterw", NULL, NULL) == 0)
  184                 mntorflags |= MNT_NOCLUSTERW;
  185 
  186         if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0)
  187                 mntorflags |= MNT_SNAPSHOT;
  188 
  189         MNT_ILOCK(mp);
  190         mp->mnt_flag = (mp->mnt_flag | mntorflags) & ~mntandnotflags;
  191         MNT_IUNLOCK(mp);
  192         /*
  193          * If updating, check whether changing from read-only to
  194          * read/write; if there is no device name, that's all we do.
  195          */
  196         if (mp->mnt_flag & MNT_UPDATE) {
  197                 ump = VFSTOUFS(mp);
  198                 fs = ump->um_fs;
  199                 devvp = ump->um_devvp;
  200                 if (fs->fs_ronly == 0 &&
  201                     vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
  202                         if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
  203                                 return (error);
  204                         /*
  205                          * Flush any dirty data.
  206                          */
  207                         if ((error = ffs_sync(mp, MNT_WAIT, td)) != 0) {
  208                                 vn_finished_write(mp);
  209                                 return (error);
  210                         }
  211                         /*
  212                          * Check for and optionally get rid of files open
  213                          * for writing.
  214                          */
  215                         flags = WRITECLOSE;
  216                         if (mp->mnt_flag & MNT_FORCE)
  217                                 flags |= FORCECLOSE;
  218                         if (mp->mnt_flag & MNT_SOFTDEP) {
  219                                 error = softdep_flushfiles(mp, flags, td);
  220                         } else {
  221                                 error = ffs_flushfiles(mp, flags, td);
  222                         }
  223                         if (error) {
  224                                 vn_finished_write(mp);
  225                                 return (error);
  226                         }
  227                         if (fs->fs_pendingblocks != 0 ||
  228                             fs->fs_pendinginodes != 0) {
  229                                 printf("%s: %s: blocks %jd files %d\n",
  230                                     fs->fs_fsmnt, "update error",
  231                                     (intmax_t)fs->fs_pendingblocks,
  232                                     fs->fs_pendinginodes);
  233                                 fs->fs_pendingblocks = 0;
  234                                 fs->fs_pendinginodes = 0;
  235                         }
  236                         if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
  237                                 fs->fs_clean = 1;
  238                         if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
  239                                 fs->fs_ronly = 0;
  240                                 fs->fs_clean = 0;
  241                                 vn_finished_write(mp);
  242                                 return (error);
  243                         }
  244                         vn_finished_write(mp);
  245                         DROP_GIANT();
  246                         g_topology_lock();
  247                         g_access(ump->um_cp, 0, -1, 0);
  248                         g_topology_unlock();
  249                         PICKUP_GIANT();
  250                         fs->fs_ronly = 1;
  251                         MNT_ILOCK(mp);
  252                         mp->mnt_flag |= MNT_RDONLY;
  253                         MNT_IUNLOCK(mp);
  254                 }
  255                 if ((mp->mnt_flag & MNT_RELOAD) &&
  256                     (error = ffs_reload(mp, td)) != 0)
  257                         return (error);
  258                 if (fs->fs_ronly &&
  259                     !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
  260                         /*
  261                          * If upgrade to read-write by non-root, then verify
  262                          * that user has necessary permissions on the device.
  263                          */
  264                         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
  265                         error = VOP_ACCESS(devvp, VREAD | VWRITE,
  266                             td->td_ucred, td);
  267                         if (error)
  268                                 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
  269                         if (error) {
  270                                 VOP_UNLOCK(devvp, 0, td);
  271                                 return (error);
  272                         }
  273                         VOP_UNLOCK(devvp, 0, td);
  274                         fs->fs_flags &= ~FS_UNCLEAN;
  275                         if (fs->fs_clean == 0) {
  276                                 fs->fs_flags |= FS_UNCLEAN;
  277                                 if ((mp->mnt_flag & MNT_FORCE) ||
  278                                     ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
  279                                      (fs->fs_flags & FS_DOSOFTDEP))) {
  280                                         printf("WARNING: %s was not %s\n",
  281                                            fs->fs_fsmnt, "properly dismounted");
  282                                 } else {
  283                                         printf(
  284 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
  285                                             fs->fs_fsmnt);
  286                                         return (EPERM);
  287                                 }
  288                         }
  289                         DROP_GIANT();
  290                         g_topology_lock();
  291                         /*
  292                          * If we're the root device, we may not have an E count
  293                          * yet, get it now.
  294                          */
  295                         if (ump->um_cp->ace == 0)
  296                                 error = g_access(ump->um_cp, 0, 1, 1);
  297                         else
  298                                 error = g_access(ump->um_cp, 0, 1, 0);
  299                         g_topology_unlock();
  300                         PICKUP_GIANT();
  301                         if (error)
  302                                 return (error);
  303                         if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
  304                                 return (error);
  305                         fs->fs_ronly = 0;
  306                         MNT_ILOCK(mp);
  307                         mp->mnt_flag &= ~MNT_RDONLY;
  308                         MNT_IUNLOCK(mp);
  309                         fs->fs_clean = 0;
  310                         if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
  311                                 vn_finished_write(mp);
  312                                 return (error);
  313                         }
  314                         /* check to see if we need to start softdep */
  315                         if ((fs->fs_flags & FS_DOSOFTDEP) &&
  316                             (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
  317                                 vn_finished_write(mp);
  318                                 return (error);
  319                         }
  320                         if (fs->fs_snapinum[0] != 0)
  321                                 ffs_snapshot_mount(mp);
  322                         vn_finished_write(mp);
  323                 }
  324                 /*
  325                  * Soft updates is incompatible with "async",
  326                  * so if we are doing softupdates stop the user
  327                  * from setting the async flag in an update.
  328                  * Softdep_mount() clears it in an initial mount 
  329                  * or ro->rw remount.
  330                  */
  331                 if (mp->mnt_flag & MNT_SOFTDEP) {
  332                         /* XXX: Reset too late ? */
  333                         MNT_ILOCK(mp);
  334                         mp->mnt_flag &= ~MNT_ASYNC;
  335                         MNT_IUNLOCK(mp);
  336                 }
  337                 /*
  338                  * Keep MNT_ACLS flag if it is stored in superblock.
  339                  */
  340                 if ((fs->fs_flags & FS_ACLS) != 0) {
  341                         /* XXX: Set too late ? */
  342                         MNT_ILOCK(mp);
  343                         mp->mnt_flag |= MNT_ACLS;
  344                         MNT_IUNLOCK(mp);
  345                 }
  346 
  347                 /*
  348                  * If this is a snapshot request, take the snapshot.
  349                  */
  350                 if (mp->mnt_flag & MNT_SNAPSHOT)
  351                         return (ffs_snapshot(mp, fspec));
  352         }
  353 
  354         /*
  355          * Not an update, or updating the name: look up the name
  356          * and verify that it refers to a sensible disk device.
  357          */
  358         NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
  359         if ((error = namei(&ndp)) != 0)
  360                 return (error);
  361         NDFREE(&ndp, NDF_ONLY_PNBUF);
  362         devvp = ndp.ni_vp;
  363         if (!vn_isdisk(devvp, &error)) {
  364                 vput(devvp);
  365                 return (error);
  366         }
  367 
  368         /*
  369          * If mount by non-root, then verify that user has necessary
  370          * permissions on the device.
  371          */
  372         accessmode = VREAD;
  373         if ((mp->mnt_flag & MNT_RDONLY) == 0)
  374                 accessmode |= VWRITE;
  375         error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td);
  376         if (error)
  377                 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
  378         if (error) {
  379                 vput(devvp);
  380                 return (error);
  381         }
  382 
  383         if (mp->mnt_flag & MNT_UPDATE) {
  384                 /*
  385                  * Update only
  386                  *
  387                  * If it's not the same vnode, or at least the same device
  388                  * then it's not correct.
  389                  */
  390 
  391                 if (devvp->v_rdev != ump->um_devvp->v_rdev)
  392                         error = EINVAL; /* needs translation */
  393                 vput(devvp);
  394                 if (error)
  395                         return (error);
  396         } else {
  397                 /*
  398                  * New mount
  399                  *
  400                  * We need the name for the mount point (also used for
  401                  * "last mounted on") copied in. If an error occurs,
  402                  * the mount point is discarded by the upper level code.
  403                  * Note that vfs_mount() populates f_mntonname for us.
  404                  */
  405                 if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
  406                         vrele(devvp);
  407                         return (error);
  408                 }
  409         }
  410         vfs_mountedfrom(mp, fspec);
  411         return (0);
  412 }
  413 
  414 /*
  415  * Compatibility with old mount system call.
  416  */
  417 
  418 static int
  419 ffs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
  420 {
  421         struct ufs_args args;
  422         int error;
  423 
  424         if (data == NULL)
  425                 return (EINVAL);
  426         error = copyin(data, &args, sizeof args);
  427         if (error)
  428                 return (error);
  429 
  430         ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
  431         ma = mount_arg(ma, "export", &args.export, sizeof args.export);
  432         error = kernel_mount(ma, flags);
  433 
  434         return (error);
  435 }
  436 
  437 /*
  438  * Reload all incore data for a filesystem (used after running fsck on
  439  * the root filesystem and finding things to fix). The filesystem must
  440  * be mounted read-only.
  441  *
  442  * Things to do to update the mount:
  443  *      1) invalidate all cached meta-data.
  444  *      2) re-read superblock from disk.
  445  *      3) re-read summary information from disk.
  446  *      4) invalidate all inactive vnodes.
  447  *      5) invalidate all cached file data.
  448  *      6) re-read inode data for all active vnodes.
  449  */
  450 static int
  451 ffs_reload(struct mount *mp, struct thread *td)
  452 {
  453         struct vnode *vp, *mvp, *devvp;
  454         struct inode *ip;
  455         void *space;
  456         struct buf *bp;
  457         struct fs *fs, *newfs;
  458         struct ufsmount *ump;
  459         ufs2_daddr_t sblockloc;
  460         int i, blks, size, error;
  461         int32_t *lp;
  462 
  463         if ((mp->mnt_flag & MNT_RDONLY) == 0)
  464                 return (EINVAL);
  465         ump = VFSTOUFS(mp);
  466         /*
  467          * Step 1: invalidate all cached meta-data.
  468          */
  469         devvp = VFSTOUFS(mp)->um_devvp;
  470         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
  471         if (vinvalbuf(devvp, 0, td, 0, 0) != 0)
  472                 panic("ffs_reload: dirty1");
  473         VOP_UNLOCK(devvp, 0, td);
  474 
  475         /*
  476          * Step 2: re-read superblock from disk.
  477          */
  478         fs = VFSTOUFS(mp)->um_fs;
  479         if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
  480             NOCRED, &bp)) != 0)
  481                 return (error);
  482         newfs = (struct fs *)bp->b_data;
  483         if ((newfs->fs_magic != FS_UFS1_MAGIC &&
  484              newfs->fs_magic != FS_UFS2_MAGIC) ||
  485             newfs->fs_bsize > MAXBSIZE ||
  486             newfs->fs_bsize < sizeof(struct fs)) {
  487                         brelse(bp);
  488                         return (EIO);           /* XXX needs translation */
  489         }
  490         /*
  491          * Copy pointer fields back into superblock before copying in   XXX
  492          * new superblock. These should really be in the ufsmount.      XXX
  493          * Note that important parameters (eg fs_ncg) are unchanged.
  494          */
  495         newfs->fs_csp = fs->fs_csp;
  496         newfs->fs_maxcluster = fs->fs_maxcluster;
  497         newfs->fs_contigdirs = fs->fs_contigdirs;
  498         newfs->fs_active = fs->fs_active;
  499         /* The file system is still read-only. */
  500         newfs->fs_ronly = 1;
  501         sblockloc = fs->fs_sblockloc;
  502         bcopy(newfs, fs, (u_int)fs->fs_sbsize);
  503         brelse(bp);
  504         mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
  505         ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
  506         UFS_LOCK(ump);
  507         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
  508                 printf("%s: reload pending error: blocks %jd files %d\n",
  509                     fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
  510                     fs->fs_pendinginodes);
  511                 fs->fs_pendingblocks = 0;
  512                 fs->fs_pendinginodes = 0;
  513         }
  514         UFS_UNLOCK(ump);
  515 
  516         /*
  517          * Step 3: re-read summary information from disk.
  518          */
  519         blks = howmany(fs->fs_cssize, fs->fs_fsize);
  520         space = fs->fs_csp;
  521         for (i = 0; i < blks; i += fs->fs_frag) {
  522                 size = fs->fs_bsize;
  523                 if (i + fs->fs_frag > blks)
  524                         size = (blks - i) * fs->fs_fsize;
  525                 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
  526                     NOCRED, &bp);
  527                 if (error)
  528                         return (error);
  529                 bcopy(bp->b_data, space, (u_int)size);
  530                 space = (char *)space + size;
  531                 brelse(bp);
  532         }
  533         /*
  534          * We no longer know anything about clusters per cylinder group.
  535          */
  536         if (fs->fs_contigsumsize > 0) {
  537                 lp = fs->fs_maxcluster;
  538                 for (i = 0; i < fs->fs_ncg; i++)
  539                         *lp++ = fs->fs_contigsumsize;
  540         }
  541 
  542 loop:
  543         MNT_ILOCK(mp);
  544         MNT_VNODE_FOREACH(vp, mp, mvp) {
  545                 VI_LOCK(vp);
  546                 if (vp->v_iflag & VI_DOOMED) {
  547                         VI_UNLOCK(vp);
  548                         continue;
  549                 }
  550                 MNT_IUNLOCK(mp);
  551                 /*
  552                  * Step 4: invalidate all cached file data.
  553                  */
  554                 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
  555                         MNT_VNODE_FOREACH_ABORT(mp, mvp);
  556                         goto loop;
  557                 }
  558                 if (vinvalbuf(vp, 0, td, 0, 0))
  559                         panic("ffs_reload: dirty2");
  560                 /*
  561                  * Step 5: re-read inode data for all active vnodes.
  562                  */
  563                 ip = VTOI(vp);
  564                 error =
  565                     bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
  566                     (int)fs->fs_bsize, NOCRED, &bp);
  567                 if (error) {
  568                         VOP_UNLOCK(vp, 0, td);
  569                         vrele(vp);
  570                         MNT_VNODE_FOREACH_ABORT(mp, mvp);
  571                         return (error);
  572                 }
  573                 ffs_load_inode(bp, ip, fs, ip->i_number);
  574                 ip->i_effnlink = ip->i_nlink;
  575                 brelse(bp);
  576                 VOP_UNLOCK(vp, 0, td);
  577                 vrele(vp);
  578                 MNT_ILOCK(mp);
  579         }
  580         MNT_IUNLOCK(mp);
  581         return (0);
  582 }
  583 
  584 /*
  585  * Possible superblock locations ordered from most to least likely.
  586  */
  587 static int sblock_try[] = SBLOCKSEARCH;
  588 
  589 /*
  590  * Common code for mount and mountroot
  591  */
  592 static int
  593 ffs_mountfs(devvp, mp, td)
  594         struct vnode *devvp;
  595         struct mount *mp;
  596         struct thread *td;
  597 {
  598         struct ufsmount *ump;
  599         struct buf *bp;
  600         struct fs *fs;
  601         struct cdev *dev;
  602         void *space;
  603         ufs2_daddr_t sblockloc;
  604         int error, i, blks, size, ronly;
  605         int32_t *lp;
  606         struct ucred *cred;
  607         struct g_consumer *cp;
  608         struct mount *nmp;
  609 
  610         dev = devvp->v_rdev;
  611         cred = td ? td->td_ucred : NOCRED;
  612 
  613         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
  614         DROP_GIANT();
  615         g_topology_lock();
  616         error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
  617 
  618         /*
  619          * If we are a root mount, drop the E flag so fsck can do its magic.
  620          * We will pick it up again when we remount R/W.
  621          */
  622         if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
  623                 error = g_access(cp, 0, 0, -1);
  624         g_topology_unlock();
  625         PICKUP_GIANT();
  626         VOP_UNLOCK(devvp, 0, td);
  627         if (error)
  628                 return (error);
  629         if (devvp->v_rdev->si_iosize_max != 0)
  630                 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
  631         if (mp->mnt_iosize_max > MAXPHYS)
  632                 mp->mnt_iosize_max = MAXPHYS;
  633 
  634         devvp->v_bufobj.bo_private = cp;
  635         devvp->v_bufobj.bo_ops = &ffs_ops;
  636 
  637         bp = NULL;
  638         ump = NULL;
  639         fs = NULL;
  640         sblockloc = 0;
  641         /*
  642          * Try reading the superblock in each of its possible locations.
  643          */
  644         for (i = 0; sblock_try[i] != -1; i++) {
  645                 if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
  646                         error = EINVAL;
  647                         vfs_mount_error(mp,
  648                             "Invalid sectorsize %d for superblock size %d",
  649                             cp->provider->sectorsize, SBLOCKSIZE);
  650                         goto out;
  651                 }
  652                 if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
  653                     cred, &bp)) != 0)
  654                         goto out;
  655                 fs = (struct fs *)bp->b_data;
  656                 sblockloc = sblock_try[i];
  657                 if ((fs->fs_magic == FS_UFS1_MAGIC ||
  658                      (fs->fs_magic == FS_UFS2_MAGIC &&
  659                       (fs->fs_sblockloc == sblockloc ||
  660                        (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
  661                     fs->fs_bsize <= MAXBSIZE &&
  662                     fs->fs_bsize >= sizeof(struct fs))
  663                         break;
  664                 brelse(bp);
  665                 bp = NULL;
  666         }
  667         if (sblock_try[i] == -1) {
  668                 error = EINVAL;         /* XXX needs translation */
  669                 goto out;
  670         }
  671         fs->fs_fmod = 0;
  672         fs->fs_flags &= ~FS_INDEXDIRS;  /* no support for directory indicies */
  673         fs->fs_flags &= ~FS_UNCLEAN;
  674         if (fs->fs_clean == 0) {
  675                 fs->fs_flags |= FS_UNCLEAN;
  676                 if (ronly || (mp->mnt_flag & MNT_FORCE) ||
  677                     ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
  678                      (fs->fs_flags & FS_DOSOFTDEP))) {
  679                         printf(
  680 "WARNING: %s was not properly dismounted\n",
  681                             fs->fs_fsmnt);
  682                 } else {
  683                         printf(
  684 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
  685                             fs->fs_fsmnt);
  686                         error = EPERM;
  687                         goto out;
  688                 }
  689                 if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
  690                     (mp->mnt_flag & MNT_FORCE)) {
  691                         printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
  692                             (intmax_t)fs->fs_pendingblocks,
  693                             fs->fs_pendinginodes);
  694                         fs->fs_pendingblocks = 0;
  695                         fs->fs_pendinginodes = 0;
  696                 }
  697         }
  698         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
  699                 printf("%s: mount pending error: blocks %jd files %d\n",
  700                     fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
  701                     fs->fs_pendinginodes);
  702                 fs->fs_pendingblocks = 0;
  703                 fs->fs_pendinginodes = 0;
  704         }
  705         if ((fs->fs_flags & FS_GJOURNAL) != 0) {
  706 #ifdef UFS_GJOURNAL
  707                 /*
  708                  * Get journal provider name.
  709                  */
  710                 size = 1024;
  711                 mp->mnt_gjprovider = malloc(size, M_UFSMNT, M_WAITOK);
  712                 if (g_io_getattr("GJOURNAL::provider", cp, &size,
  713                     mp->mnt_gjprovider) == 0) {
  714                         mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, size,
  715                             M_UFSMNT, M_WAITOK);
  716                         MNT_ILOCK(mp);
  717                         mp->mnt_flag |= MNT_GJOURNAL;
  718                         MNT_IUNLOCK(mp);
  719                 } else {
  720                         printf(
  721 "WARNING: %s: GJOURNAL flag on fs but no gjournal provider below\n",
  722                             mp->mnt_stat.f_mntonname);
  723                         free(mp->mnt_gjprovider, M_UFSMNT);
  724                         mp->mnt_gjprovider = NULL;
  725                 }
  726 #else
  727                 printf(
  728 "WARNING: %s: GJOURNAL flag on fs but no UFS_GJOURNAL support\n",
  729                     mp->mnt_stat.f_mntonname);
  730 #endif
  731         } else {
  732                 mp->mnt_gjprovider = NULL;
  733         }
  734         ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
  735         ump->um_cp = cp;
  736         ump->um_bo = &devvp->v_bufobj;
  737         ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
  738         if (fs->fs_magic == FS_UFS1_MAGIC) {
  739                 ump->um_fstype = UFS1;
  740                 ump->um_balloc = ffs_balloc_ufs1;
  741         } else {
  742                 ump->um_fstype = UFS2;
  743                 ump->um_balloc = ffs_balloc_ufs2;
  744         }
  745         ump->um_blkatoff = ffs_blkatoff;
  746         ump->um_truncate = ffs_truncate;
  747         ump->um_update = ffs_update;
  748         ump->um_valloc = ffs_valloc;
  749         ump->um_vfree = ffs_vfree;
  750         ump->um_ifree = ffs_ifree;
  751         mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
  752         bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
  753         if (fs->fs_sbsize < SBLOCKSIZE)
  754                 bp->b_flags |= B_INVAL | B_NOCACHE;
  755         brelse(bp);
  756         bp = NULL;
  757         fs = ump->um_fs;
  758         ffs_oldfscompat_read(fs, ump, sblockloc);
  759         fs->fs_ronly = ronly;
  760         size = fs->fs_cssize;
  761         blks = howmany(size, fs->fs_fsize);
  762         if (fs->fs_contigsumsize > 0)
  763                 size += fs->fs_ncg * sizeof(int32_t);
  764         size += fs->fs_ncg * sizeof(u_int8_t);
  765         space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
  766         fs->fs_csp = space;
  767         for (i = 0; i < blks; i += fs->fs_frag) {
  768                 size = fs->fs_bsize;
  769                 if (i + fs->fs_frag > blks)
  770                         size = (blks - i) * fs->fs_fsize;
  771                 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
  772                     cred, &bp)) != 0) {
  773                         free(fs->fs_csp, M_UFSMNT);
  774                         goto out;
  775                 }
  776                 bcopy(bp->b_data, space, (u_int)size);
  777                 space = (char *)space + size;
  778                 brelse(bp);
  779                 bp = NULL;
  780         }
  781         if (fs->fs_contigsumsize > 0) {
  782                 fs->fs_maxcluster = lp = space;
  783                 for (i = 0; i < fs->fs_ncg; i++)
  784                         *lp++ = fs->fs_contigsumsize;
  785                 space = lp;
  786         }
  787         size = fs->fs_ncg * sizeof(u_int8_t);
  788         fs->fs_contigdirs = (u_int8_t *)space;
  789         bzero(fs->fs_contigdirs, size);
  790         fs->fs_active = NULL;
  791         mp->mnt_data = (qaddr_t)ump;
  792         mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
  793         mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
  794         nmp = NULL;
  795         if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 || 
  796             (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
  797                 if (nmp)
  798                         vfs_rel(nmp);
  799                 vfs_getnewfsid(mp);
  800         }
  801         mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
  802         MNT_ILOCK(mp);
  803         mp->mnt_flag |= MNT_LOCAL;
  804         MNT_IUNLOCK(mp);
  805         if ((fs->fs_flags & FS_MULTILABEL) != 0) {
  806 #ifdef MAC
  807                 MNT_ILOCK(mp);
  808                 mp->mnt_flag |= MNT_MULTILABEL;
  809                 MNT_IUNLOCK(mp);
  810 #else
  811                 printf(
  812 "WARNING: %s: multilabel flag on fs but no MAC support\n",
  813                     mp->mnt_stat.f_mntonname);
  814 #endif
  815         }
  816         if ((fs->fs_flags & FS_ACLS) != 0) {
  817 #ifdef UFS_ACL
  818                 MNT_ILOCK(mp);
  819                 mp->mnt_flag |= MNT_ACLS;
  820                 MNT_IUNLOCK(mp);
  821 #else
  822                 printf(
  823 "WARNING: %s: ACLs flag on fs but no ACLs support\n",
  824                     mp->mnt_stat.f_mntonname);
  825 #endif
  826         }
  827         ump->um_mountp = mp;
  828         ump->um_dev = dev;
  829         ump->um_devvp = devvp;
  830         ump->um_nindir = fs->fs_nindir;
  831         ump->um_bptrtodb = fs->fs_fsbtodb;
  832         ump->um_seqinc = fs->fs_frag;
  833         for (i = 0; i < MAXQUOTAS; i++)
  834                 ump->um_quotas[i] = NULLVP;
  835 #ifdef UFS_EXTATTR
  836         ufs_extattr_uepm_init(&ump->um_extattr);
  837 #endif
  838         /*
  839          * Set FS local "last mounted on" information (NULL pad)
  840          */
  841         bzero(fs->fs_fsmnt, MAXMNTLEN);
  842         strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
  843 
  844         if( mp->mnt_flag & MNT_ROOTFS) {
  845                 /*
  846                  * Root mount; update timestamp in mount structure.
  847                  * this will be used by the common root mount code
  848                  * to update the system clock.
  849                  */
  850                 mp->mnt_time = fs->fs_time;
  851         }
  852 
  853         if (ronly == 0) {
  854                 if ((fs->fs_flags & FS_DOSOFTDEP) &&
  855                     (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
  856                         free(fs->fs_csp, M_UFSMNT);
  857                         goto out;
  858                 }
  859                 if (fs->fs_snapinum[0] != 0)
  860                         ffs_snapshot_mount(mp);
  861                 fs->fs_fmod = 1;
  862                 fs->fs_clean = 0;
  863                 (void) ffs_sbupdate(ump, MNT_WAIT, 0);
  864         }
  865         /*
  866          * Initialize filesystem stat information in mount struct.
  867          */
  868 #ifdef UFS_EXTATTR
  869 #ifdef UFS_EXTATTR_AUTOSTART
  870         /*
  871          *
  872          * Auto-starting does the following:
  873          *      - check for /.attribute in the fs, and extattr_start if so
  874          *      - for each file in .attribute, enable that file with
  875          *        an attribute of the same name.
  876          * Not clear how to report errors -- probably eat them.
  877          * This would all happen while the filesystem was busy/not
  878          * available, so would effectively be "atomic".
  879          */
  880         (void) ufs_extattr_autostart(mp, td);
  881 #endif /* !UFS_EXTATTR_AUTOSTART */
  882 #endif /* !UFS_EXTATTR */
  883         MNT_ILOCK(mp);
  884         mp->mnt_kern_flag |= MNTK_MPSAFE;
  885         MNT_IUNLOCK(mp);
  886         return (0);
  887 out:
  888         if (bp)
  889                 brelse(bp);
  890         if (cp != NULL) {
  891                 DROP_GIANT();
  892                 g_topology_lock();
  893                 g_vfs_close(cp, td);
  894                 g_topology_unlock();
  895                 PICKUP_GIANT();
  896         }
  897         if (ump) {
  898                 mtx_destroy(UFS_MTX(ump));
  899                 if (mp->mnt_gjprovider != NULL) {
  900                         free(mp->mnt_gjprovider, M_UFSMNT);
  901                         mp->mnt_gjprovider = NULL;
  902                 }
  903                 free(ump->um_fs, M_UFSMNT);
  904                 free(ump, M_UFSMNT);
  905                 mp->mnt_data = (qaddr_t)0;
  906         }
  907         return (error);
  908 }
  909 
  910 #include <sys/sysctl.h>
  911 static int bigcgs = 0;
  912 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
  913 
  914 /*
  915  * Sanity checks for loading old filesystem superblocks.
  916  * See ffs_oldfscompat_write below for unwound actions.
  917  *
  918  * XXX - Parts get retired eventually.
  919  * Unfortunately new bits get added.
  920  */
  921 static void
  922 ffs_oldfscompat_read(fs, ump, sblockloc)
  923         struct fs *fs;
  924         struct ufsmount *ump;
  925         ufs2_daddr_t sblockloc;
  926 {
  927         off_t maxfilesize;
  928 
  929         /*
  930          * If not yet done, update fs_flags location and value of fs_sblockloc.
  931          */
  932         if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
  933                 fs->fs_flags = fs->fs_old_flags;
  934                 fs->fs_old_flags |= FS_FLAGS_UPDATED;
  935                 fs->fs_sblockloc = sblockloc;
  936         }
  937         /*
  938          * If not yet done, update UFS1 superblock with new wider fields.
  939          */
  940         if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
  941                 fs->fs_maxbsize = fs->fs_bsize;
  942                 fs->fs_time = fs->fs_old_time;
  943                 fs->fs_size = fs->fs_old_size;
  944                 fs->fs_dsize = fs->fs_old_dsize;
  945                 fs->fs_csaddr = fs->fs_old_csaddr;
  946                 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
  947                 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
  948                 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
  949                 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
  950         }
  951         if (fs->fs_magic == FS_UFS1_MAGIC &&
  952             fs->fs_old_inodefmt < FS_44INODEFMT) {
  953                 fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
  954                 fs->fs_qbmask = ~fs->fs_bmask;
  955                 fs->fs_qfmask = ~fs->fs_fmask;
  956         }
  957         if (fs->fs_magic == FS_UFS1_MAGIC) {
  958                 ump->um_savedmaxfilesize = fs->fs_maxfilesize;
  959                 maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
  960                 if (fs->fs_maxfilesize > maxfilesize)
  961                         fs->fs_maxfilesize = maxfilesize;
  962         }
  963         /* Compatibility for old filesystems */
  964         if (fs->fs_avgfilesize <= 0)
  965                 fs->fs_avgfilesize = AVFILESIZ;
  966         if (fs->fs_avgfpdir <= 0)
  967                 fs->fs_avgfpdir = AFPDIR;
  968         if (bigcgs) {
  969                 fs->fs_save_cgsize = fs->fs_cgsize;
  970                 fs->fs_cgsize = fs->fs_bsize;
  971         }
  972 }
  973 
  974 /*
  975  * Unwinding superblock updates for old filesystems.
  976  * See ffs_oldfscompat_read above for details.
  977  *
  978  * XXX - Parts get retired eventually.
  979  * Unfortunately new bits get added.
  980  */
  981 static void
  982 ffs_oldfscompat_write(fs, ump)
  983         struct fs *fs;
  984         struct ufsmount *ump;
  985 {
  986 
  987         /*
  988          * Copy back UFS2 updated fields that UFS1 inspects.
  989          */
  990         if (fs->fs_magic == FS_UFS1_MAGIC) {
  991                 fs->fs_old_time = fs->fs_time;
  992                 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
  993                 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
  994                 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
  995                 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
  996                 fs->fs_maxfilesize = ump->um_savedmaxfilesize;
  997         }
  998         if (bigcgs) {
  999                 fs->fs_cgsize = fs->fs_save_cgsize;
 1000                 fs->fs_save_cgsize = 0;
 1001         }
 1002 }
 1003 
 1004 /*
 1005  * unmount system call
 1006  */
 1007 static int
 1008 ffs_unmount(mp, mntflags, td)
 1009         struct mount *mp;
 1010         int mntflags;
 1011         struct thread *td;
 1012 {
 1013         struct ufsmount *ump = VFSTOUFS(mp);
 1014         struct fs *fs;
 1015         int error, flags;
 1016 
 1017         flags = 0;
 1018         if (mntflags & MNT_FORCE) {
 1019                 flags |= FORCECLOSE;
 1020         }
 1021 #ifdef UFS_EXTATTR
 1022         if ((error = ufs_extattr_stop(mp, td))) {
 1023                 if (error != EOPNOTSUPP)
 1024                         printf("ffs_unmount: ufs_extattr_stop returned %d\n",
 1025                             error);
 1026         } else {
 1027                 ufs_extattr_uepm_destroy(&ump->um_extattr);
 1028         }
 1029 #endif
 1030         if (mp->mnt_flag & MNT_SOFTDEP) {
 1031                 if ((error = softdep_flushfiles(mp, flags, td)) != 0)
 1032                         return (error);
 1033         } else {
 1034                 if ((error = ffs_flushfiles(mp, flags, td)) != 0)
 1035                         return (error);
 1036         }
 1037         fs = ump->um_fs;
 1038         UFS_LOCK(ump);
 1039         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 1040                 printf("%s: unmount pending error: blocks %jd files %d\n",
 1041                     fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 1042                     fs->fs_pendinginodes);
 1043                 fs->fs_pendingblocks = 0;
 1044                 fs->fs_pendinginodes = 0;
 1045         }
 1046         UFS_UNLOCK(ump);
 1047         if (fs->fs_ronly == 0) {
 1048                 fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
 1049                 error = ffs_sbupdate(ump, MNT_WAIT, 0);
 1050                 if (error) {
 1051                         fs->fs_clean = 0;
 1052                         return (error);
 1053                 }
 1054         }
 1055         DROP_GIANT();
 1056         g_topology_lock();
 1057         g_vfs_close(ump->um_cp, td);
 1058         g_topology_unlock();
 1059         PICKUP_GIANT();
 1060         vrele(ump->um_devvp);
 1061         mtx_destroy(UFS_MTX(ump));
 1062         if (mp->mnt_gjprovider != NULL) {
 1063                 free(mp->mnt_gjprovider, M_UFSMNT);
 1064                 mp->mnt_gjprovider = NULL;
 1065         }
 1066         free(fs->fs_csp, M_UFSMNT);
 1067         free(fs, M_UFSMNT);
 1068         free(ump, M_UFSMNT);
 1069         mp->mnt_data = (qaddr_t)0;
 1070         MNT_ILOCK(mp);
 1071         mp->mnt_flag &= ~MNT_LOCAL;
 1072         MNT_IUNLOCK(mp);
 1073         return (error);
 1074 }
 1075 
 1076 /*
 1077  * Flush out all the files in a filesystem.
 1078  */
 1079 int
 1080 ffs_flushfiles(mp, flags, td)
 1081         struct mount *mp;
 1082         int flags;
 1083         struct thread *td;
 1084 {
 1085         struct ufsmount *ump;
 1086         int error;
 1087 
 1088         ump = VFSTOUFS(mp);
 1089 #ifdef QUOTA
 1090         if (mp->mnt_flag & MNT_QUOTA) {
 1091                 int i;
 1092                 error = vflush(mp, 0, SKIPSYSTEM|flags, td);
 1093                 if (error)
 1094                         return (error);
 1095                 for (i = 0; i < MAXQUOTAS; i++) {
 1096                         quotaoff(td, mp, i);
 1097                 }
 1098                 /*
 1099                  * Here we fall through to vflush again to ensure
 1100                  * that we have gotten rid of all the system vnodes.
 1101                  */
 1102         }
 1103 #endif
 1104         ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
 1105         if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
 1106                 if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
 1107                         return (error);
 1108                 ffs_snapshot_unmount(mp);
 1109                 flags |= FORCECLOSE;
 1110                 /*
 1111                  * Here we fall through to vflush again to ensure
 1112                  * that we have gotten rid of all the system vnodes.
 1113                  */
 1114         }
 1115         /*
 1116          * Flush all the files.
 1117          */
 1118         if ((error = vflush(mp, 0, flags, td)) != 0)
 1119                 return (error);
 1120         /*
 1121          * Flush filesystem metadata.
 1122          */
 1123         vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
 1124         error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
 1125         VOP_UNLOCK(ump->um_devvp, 0, td);
 1126         return (error);
 1127 }
 1128 
 1129 /*
 1130  * Get filesystem statistics.
 1131  */
 1132 static int
 1133 ffs_statfs(mp, sbp, td)
 1134         struct mount *mp;
 1135         struct statfs *sbp;
 1136         struct thread *td;
 1137 {
 1138         struct ufsmount *ump;
 1139         struct fs *fs;
 1140 
 1141         ump = VFSTOUFS(mp);
 1142         fs = ump->um_fs;
 1143         if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
 1144                 panic("ffs_statfs");
 1145         sbp->f_version = STATFS_VERSION;
 1146         sbp->f_bsize = fs->fs_fsize;
 1147         sbp->f_iosize = fs->fs_bsize;
 1148         sbp->f_blocks = fs->fs_dsize;
 1149         UFS_LOCK(ump);
 1150         sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
 1151             fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
 1152         sbp->f_bavail = freespace(fs, fs->fs_minfree) +
 1153             dbtofsb(fs, fs->fs_pendingblocks);
 1154         sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
 1155         sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
 1156         UFS_UNLOCK(ump);
 1157         sbp->f_namemax = NAME_MAX;
 1158         return (0);
 1159 }
 1160 
 1161 /*
 1162  * Go through the disk queues to initiate sandbagged IO;
 1163  * go through the inodes to write those that have been modified;
 1164  * initiate the writing of the super block if it has been modified.
 1165  *
 1166  * Note: we are always called with the filesystem marked `MPBUSY'.
 1167  */
 1168 static int
 1169 ffs_sync(mp, waitfor, td)
 1170         struct mount *mp;
 1171         int waitfor;
 1172         struct thread *td;
 1173 {
 1174         struct vnode *mvp, *vp, *devvp;
 1175         struct inode *ip;
 1176         struct ufsmount *ump = VFSTOUFS(mp);
 1177         struct fs *fs;
 1178         int error, count, wait, lockreq, allerror = 0;
 1179         int suspend;
 1180         int suspended;
 1181         int secondary_writes;
 1182         int secondary_accwrites;
 1183         int softdep_deps;
 1184         int softdep_accdeps;
 1185         struct bufobj *bo;
 1186 
 1187         fs = ump->um_fs;
 1188         if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {            /* XXX */
 1189                 printf("fs = %s\n", fs->fs_fsmnt);
 1190                 panic("ffs_sync: rofs mod");
 1191         }
 1192         /*
 1193          * Write back each (modified) inode.
 1194          */
 1195         wait = 0;
 1196         suspend = 0;
 1197         suspended = 0;
 1198         lockreq = LK_EXCLUSIVE | LK_NOWAIT;
 1199         if (waitfor == MNT_SUSPEND) {
 1200                 suspend = 1;
 1201                 waitfor = MNT_WAIT;
 1202         }
 1203         if (waitfor == MNT_WAIT) {
 1204                 wait = 1;
 1205                 lockreq = LK_EXCLUSIVE;
 1206         }
 1207         lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
 1208         MNT_ILOCK(mp);
 1209 loop:
 1210         /* Grab snapshot of secondary write counts */
 1211         secondary_writes = mp->mnt_secondary_writes;
 1212         secondary_accwrites = mp->mnt_secondary_accwrites;
 1213 
 1214         /* Grab snapshot of softdep dependency counts */
 1215         MNT_IUNLOCK(mp);
 1216         softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
 1217         MNT_ILOCK(mp);
 1218 
 1219         MNT_VNODE_FOREACH(vp, mp, mvp) {
 1220                 /*
 1221                  * Depend on the mntvnode_slock to keep things stable enough
 1222                  * for a quick test.  Since there might be hundreds of
 1223                  * thousands of vnodes, we cannot afford even a subroutine
 1224                  * call unless there's a good chance that we have work to do.
 1225                  */
 1226                 VI_LOCK(vp);
 1227                 if (vp->v_iflag & VI_DOOMED) {
 1228                         VI_UNLOCK(vp);
 1229                         continue;
 1230                 }
 1231                 ip = VTOI(vp);
 1232                 if (vp->v_type == VNON || ((ip->i_flag &
 1233                     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
 1234                     vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
 1235                         VI_UNLOCK(vp);
 1236                         continue;
 1237                 }
 1238                 MNT_IUNLOCK(mp);
 1239                 if ((error = vget(vp, lockreq, td)) != 0) {
 1240                         MNT_ILOCK(mp);
 1241                         if (error == ENOENT || error == ENOLCK) {
 1242                                 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
 1243                                 goto loop;
 1244                         }
 1245                         continue;
 1246                 }
 1247                 if ((error = ffs_syncvnode(vp, waitfor)) != 0)
 1248                         allerror = error;
 1249                 vput(vp);
 1250                 MNT_ILOCK(mp);
 1251         }
 1252         MNT_IUNLOCK(mp);
 1253         /*
 1254          * Force stale filesystem control information to be flushed.
 1255          */
 1256         if (waitfor == MNT_WAIT) {
 1257                 if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
 1258                         allerror = error;
 1259                 /* Flushed work items may create new vnodes to clean */
 1260                 if (allerror == 0 && count) {
 1261                         MNT_ILOCK(mp);
 1262                         goto loop;
 1263                 }
 1264         }
 1265 #ifdef QUOTA
 1266         qsync(mp);
 1267 #endif
 1268         devvp = ump->um_devvp;
 1269         VI_LOCK(devvp);
 1270         bo = &devvp->v_bufobj;
 1271         if (waitfor != MNT_LAZY &&
 1272             (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
 1273                 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
 1274                 if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
 1275                         allerror = error;
 1276                 VOP_UNLOCK(devvp, 0, td);
 1277                 if (allerror == 0 && waitfor == MNT_WAIT) {
 1278                         MNT_ILOCK(mp);
 1279                         goto loop;
 1280                 }
 1281         } else if (suspend != 0) {
 1282                 if (softdep_check_suspend(mp,
 1283                                           devvp,
 1284                                           softdep_deps,
 1285                                           softdep_accdeps,
 1286                                           secondary_writes,
 1287                                           secondary_accwrites) != 0)
 1288                         goto loop;      /* More work needed */
 1289                 mtx_assert(MNT_MTX(mp), MA_OWNED);
 1290                 mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
 1291                 MNT_IUNLOCK(mp);
 1292                 suspended = 1;
 1293         } else
 1294                 VI_UNLOCK(devvp);
 1295         /*
 1296          * Write back modified superblock.
 1297          */
 1298         if (fs->fs_fmod != 0 &&
 1299             (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
 1300                 allerror = error;
 1301         return (allerror);
 1302 }
 1303 
 1304 int
 1305 ffs_vget(mp, ino, flags, vpp)
 1306         struct mount *mp;
 1307         ino_t ino;
 1308         int flags;
 1309         struct vnode **vpp;
 1310 {
 1311         struct fs *fs;
 1312         struct inode *ip;
 1313         struct ufsmount *ump;
 1314         struct buf *bp;
 1315         struct vnode *vp;
 1316         struct cdev *dev;
 1317         int error;
 1318         struct thread *td;
 1319 
 1320         error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
 1321         if (error || *vpp != NULL)
 1322                 return (error);
 1323 
 1324         /*
 1325          * We must promote to an exclusive lock for vnode creation.  This
 1326          * can happen if lookup is passed LOCKSHARED.
 1327          */
 1328         if ((flags & LK_TYPE_MASK) == LK_SHARED) {
 1329                 flags &= ~LK_TYPE_MASK;
 1330                 flags |= LK_EXCLUSIVE;
 1331         }
 1332 
 1333         /*
 1334          * We do not lock vnode creation as it is believed to be too
 1335          * expensive for such rare case as simultaneous creation of vnode
 1336          * for same ino by different processes. We just allow them to race
 1337          * and check later to decide who wins. Let the race begin!
 1338          */
 1339 
 1340         ump = VFSTOUFS(mp);
 1341         dev = ump->um_dev;
 1342         fs = ump->um_fs;
 1343 
 1344         /*
 1345          * If this MALLOC() is performed after the getnewvnode()
 1346          * it might block, leaving a vnode with a NULL v_data to be
 1347          * found by ffs_sync() if a sync happens to fire right then,
 1348          * which will cause a panic because ffs_sync() blindly
 1349          * dereferences vp->v_data (as well it should).
 1350          */
 1351         ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
 1352 
 1353         /* Allocate a new vnode/inode. */
 1354         if (fs->fs_magic == FS_UFS1_MAGIC)
 1355                 error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp);
 1356         else
 1357                 error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp);
 1358         if (error) {
 1359                 *vpp = NULL;
 1360                 uma_zfree(uma_inode, ip);
 1361                 return (error);
 1362         }
 1363         /*
 1364          * FFS supports recursive and shared locking.
 1365          */
 1366         vp->v_vnlock->lk_flags |= LK_CANRECURSE;
 1367         vp->v_vnlock->lk_flags &= ~LK_NOSHARE;
 1368         vp->v_data = ip;
 1369         vp->v_bufobj.bo_bsize = fs->fs_bsize;
 1370         ip->i_vnode = vp;
 1371         ip->i_ump = ump;
 1372         ip->i_fs = fs;
 1373         ip->i_dev = dev;
 1374         ip->i_number = ino;
 1375 #ifdef QUOTA
 1376         {
 1377                 int i;
 1378                 for (i = 0; i < MAXQUOTAS; i++)
 1379                         ip->i_dquot[i] = NODQUOT;
 1380         }
 1381 #endif
 1382 
 1383         td = curthread;
 1384         lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL, td);
 1385         error = insmntque(vp, mp);
 1386         if (error != 0) {
 1387                 uma_zfree(uma_inode, ip);
 1388                 *vpp = NULL;
 1389                 return (error);
 1390         }
 1391         error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL);
 1392         if (error || *vpp != NULL)
 1393                 return (error);
 1394 
 1395         /* Read in the disk contents for the inode, copy into the inode. */
 1396         error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
 1397             (int)fs->fs_bsize, NOCRED, &bp);
 1398         if (error) {
 1399                 /*
 1400                  * The inode does not contain anything useful, so it would
 1401                  * be misleading to leave it on its hash chain. With mode
 1402                  * still zero, it will be unlinked and returned to the free
 1403                  * list by vput().
 1404                  */
 1405                 brelse(bp);
 1406                 vput(vp);
 1407                 *vpp = NULL;
 1408                 return (error);
 1409         }
 1410         if (ip->i_ump->um_fstype == UFS1)
 1411                 ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
 1412         else
 1413                 ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
 1414         ffs_load_inode(bp, ip, fs, ino);
 1415         if (DOINGSOFTDEP(vp))
 1416                 softdep_load_inodeblock(ip);
 1417         else
 1418                 ip->i_effnlink = ip->i_nlink;
 1419         bqrelse(bp);
 1420 
 1421         /*
 1422          * Initialize the vnode from the inode, check for aliases.
 1423          * Note that the underlying vnode may have changed.
 1424          */
 1425         if (ip->i_ump->um_fstype == UFS1)
 1426                 error = ufs_vinit(mp, &ffs_fifoops1, &vp);
 1427         else
 1428                 error = ufs_vinit(mp, &ffs_fifoops2, &vp);
 1429         if (error) {
 1430                 vput(vp);
 1431                 *vpp = NULL;
 1432                 return (error);
 1433         }
 1434 
 1435         /*
 1436          * Finish inode initialization.
 1437          */
 1438 
 1439         /*
 1440          * Set up a generation number for this inode if it does not
 1441          * already have one. This should only happen on old filesystems.
 1442          */
 1443         if (ip->i_gen == 0) {
 1444                 ip->i_gen = arc4random() / 2 + 1;
 1445                 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 1446                         ip->i_flag |= IN_MODIFIED;
 1447                         DIP_SET(ip, i_gen, ip->i_gen);
 1448                 }
 1449         }
 1450         /*
 1451          * Ensure that uid and gid are correct. This is a temporary
 1452          * fix until fsck has been changed to do the update.
 1453          */
 1454         if (fs->fs_magic == FS_UFS1_MAGIC &&            /* XXX */
 1455             fs->fs_old_inodefmt < FS_44INODEFMT) {      /* XXX */
 1456                 ip->i_uid = ip->i_din1->di_ouid;        /* XXX */
 1457                 ip->i_gid = ip->i_din1->di_ogid;        /* XXX */
 1458         }                                               /* XXX */
 1459 
 1460 #ifdef MAC
 1461         if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
 1462                 /*
 1463                  * If this vnode is already allocated, and we're running
 1464                  * multi-label, attempt to perform a label association
 1465                  * from the extended attributes on the inode.
 1466                  */
 1467                 error = mac_associate_vnode_extattr(mp, vp);
 1468                 if (error) {
 1469                         /* ufs_inactive will release ip->i_devvp ref. */
 1470                         vput(vp);
 1471                         *vpp = NULL;
 1472                         return (error);
 1473                 }
 1474         }
 1475 #endif
 1476 
 1477         *vpp = vp;
 1478         return (0);
 1479 }
 1480 
 1481 /*
 1482  * File handle to vnode
 1483  *
 1484  * Have to be really careful about stale file handles:
 1485  * - check that the inode number is valid
 1486  * - call ffs_vget() to get the locked inode
 1487  * - check for an unallocated inode (i_mode == 0)
 1488  * - check that the given client host has export rights and return
 1489  *   those rights via. exflagsp and credanonp
 1490  */
 1491 static int
 1492 ffs_fhtovp(mp, fhp, vpp)
 1493         struct mount *mp;
 1494         struct fid *fhp;
 1495         struct vnode **vpp;
 1496 {
 1497         struct ufid *ufhp;
 1498         struct fs *fs;
 1499 
 1500         ufhp = (struct ufid *)fhp;
 1501         fs = VFSTOUFS(mp)->um_fs;
 1502         if (ufhp->ufid_ino < ROOTINO ||
 1503             ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
 1504                 return (ESTALE);
 1505         return (ufs_fhtovp(mp, ufhp, vpp));
 1506 }
 1507 
 1508 /*
 1509  * Initialize the filesystem.
 1510  */
 1511 static int
 1512 ffs_init(vfsp)
 1513         struct vfsconf *vfsp;
 1514 {
 1515 
 1516         softdep_initialize();
 1517         return (ufs_init(vfsp));
 1518 }
 1519 
 1520 /*
 1521  * Undo the work of ffs_init().
 1522  */
 1523 static int
 1524 ffs_uninit(vfsp)
 1525         struct vfsconf *vfsp;
 1526 {
 1527         int ret;
 1528 
 1529         ret = ufs_uninit(vfsp);
 1530         softdep_uninitialize();
 1531         return (ret);
 1532 }
 1533 
 1534 /*
 1535  * Write a superblock and associated information back to disk.
 1536  */
 1537 int
 1538 ffs_sbupdate(mp, waitfor, suspended)
 1539         struct ufsmount *mp;
 1540         int waitfor;
 1541         int suspended;
 1542 {
 1543         struct fs *fs = mp->um_fs;
 1544         struct buf *sbbp;
 1545         struct buf *bp;
 1546         int blks;
 1547         void *space;
 1548         int i, size, error, allerror = 0;
 1549 
 1550         if (fs->fs_ronly == 1 &&
 1551             (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) != 
 1552             (MNT_RDONLY | MNT_UPDATE))
 1553                 panic("ffs_sbupdate: write read-only filesystem");
 1554         /*
 1555          * We use the superblock's buf to serialize calls to ffs_sbupdate().
 1556          */
 1557         sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
 1558             0, 0, 0);
 1559         /*
 1560          * First write back the summary information.
 1561          */
 1562         blks = howmany(fs->fs_cssize, fs->fs_fsize);
 1563         space = fs->fs_csp;
 1564         for (i = 0; i < blks; i += fs->fs_frag) {
 1565                 size = fs->fs_bsize;
 1566                 if (i + fs->fs_frag > blks)
 1567                         size = (blks - i) * fs->fs_fsize;
 1568                 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
 1569                     size, 0, 0, 0);
 1570                 bcopy(space, bp->b_data, (u_int)size);
 1571                 space = (char *)space + size;
 1572                 if (suspended)
 1573                         bp->b_flags |= B_VALIDSUSPWRT;
 1574                 if (waitfor != MNT_WAIT)
 1575                         bawrite(bp);
 1576                 else if ((error = bwrite(bp)) != 0)
 1577                         allerror = error;
 1578         }
 1579         /*
 1580          * Now write back the superblock itself. If any errors occurred
 1581          * up to this point, then fail so that the superblock avoids
 1582          * being written out as clean.
 1583          */
 1584         if (allerror) {
 1585                 brelse(sbbp);
 1586                 return (allerror);
 1587         }
 1588         bp = sbbp;
 1589         if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
 1590             (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
 1591                 printf("%s: correcting fs_sblockloc from %jd to %d\n",
 1592                     fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
 1593                 fs->fs_sblockloc = SBLOCK_UFS1;
 1594         }
 1595         if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
 1596             (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
 1597                 printf("%s: correcting fs_sblockloc from %jd to %d\n",
 1598                     fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
 1599                 fs->fs_sblockloc = SBLOCK_UFS2;
 1600         }
 1601         fs->fs_fmod = 0;
 1602         fs->fs_time = time_second;
 1603         bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
 1604         ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
 1605         if (suspended)
 1606                 bp->b_flags |= B_VALIDSUSPWRT;
 1607         if (waitfor != MNT_WAIT)
 1608                 bawrite(bp);
 1609         else if ((error = bwrite(bp)) != 0)
 1610                 allerror = error;
 1611         return (allerror);
 1612 }
 1613 
 1614 static int
 1615 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
 1616         int attrnamespace, const char *attrname, struct thread *td)
 1617 {
 1618 
 1619 #ifdef UFS_EXTATTR
 1620         return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
 1621             attrname, td));
 1622 #else
 1623         return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
 1624             attrname, td));
 1625 #endif
 1626 }
 1627 
 1628 static void
 1629 ffs_ifree(struct ufsmount *ump, struct inode *ip)
 1630 {
 1631 
 1632         if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
 1633                 uma_zfree(uma_ufs1, ip->i_din1);
 1634         else if (ip->i_din2 != NULL)
 1635                 uma_zfree(uma_ufs2, ip->i_din2);
 1636         uma_zfree(uma_inode, ip);
 1637 }
 1638 
 1639 static int dobkgrdwrite = 1;
 1640 SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
 1641     "Do background writes (honoring the BV_BKGRDWRITE flag)?");
 1642 
 1643 /*
 1644  * Complete a background write started from bwrite.
 1645  */
 1646 static void
 1647 ffs_backgroundwritedone(struct buf *bp)
 1648 {
 1649         struct bufobj *bufobj;
 1650         struct buf *origbp;
 1651 
 1652         /*
 1653          * Find the original buffer that we are writing.
 1654          */
 1655         bufobj = bp->b_bufobj;
 1656         BO_LOCK(bufobj);
 1657         if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
 1658                 panic("backgroundwritedone: lost buffer");
 1659         /* Grab an extra reference to be dropped by the bufdone() below. */
 1660         bufobj_wrefl(bufobj);
 1661         BO_UNLOCK(bufobj);
 1662         /*
 1663          * Process dependencies then return any unfinished ones.
 1664          */
 1665         if (!LIST_EMPTY(&bp->b_dep))
 1666                 buf_complete(bp);
 1667 #ifdef SOFTUPDATES
 1668         if (!LIST_EMPTY(&bp->b_dep))
 1669                 softdep_move_dependencies(bp, origbp);
 1670 #endif
 1671         /*
 1672          * This buffer is marked B_NOCACHE so when it is released
 1673          * by biodone it will be tossed.
 1674          */
 1675         bp->b_flags |= B_NOCACHE;
 1676         bp->b_flags &= ~B_CACHE;
 1677         bufdone(bp);
 1678         BO_LOCK(bufobj);
 1679         /*
 1680          * Clear the BV_BKGRDINPROG flag in the original buffer
 1681          * and awaken it if it is waiting for the write to complete.
 1682          * If BV_BKGRDINPROG is not set in the original buffer it must
 1683          * have been released and re-instantiated - which is not legal.
 1684          */
 1685         KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
 1686             ("backgroundwritedone: lost buffer2"));
 1687         origbp->b_vflags &= ~BV_BKGRDINPROG;
 1688         if (origbp->b_vflags & BV_BKGRDWAIT) {
 1689                 origbp->b_vflags &= ~BV_BKGRDWAIT;
 1690                 wakeup(&origbp->b_xflags);
 1691         }
 1692         BO_UNLOCK(bufobj);
 1693 }
 1694 
 1695 
 1696 /*
 1697  * Write, release buffer on completion.  (Done by iodone
 1698  * if async).  Do not bother writing anything if the buffer
 1699  * is invalid.
 1700  *
 1701  * Note that we set B_CACHE here, indicating that buffer is
 1702  * fully valid and thus cacheable.  This is true even of NFS
 1703  * now so we set it generally.  This could be set either here 
 1704  * or in biodone() since the I/O is synchronous.  We put it
 1705  * here.
 1706  */
 1707 static int
 1708 ffs_bufwrite(struct buf *bp)
 1709 {
 1710         int oldflags, s;
 1711         struct buf *newbp;
 1712 
 1713         CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
 1714         if (bp->b_flags & B_INVAL) {
 1715                 brelse(bp);
 1716                 return (0);
 1717         }
 1718 
 1719         oldflags = bp->b_flags;
 1720 
 1721         if (BUF_REFCNT(bp) == 0)
 1722                 panic("bufwrite: buffer is not busy???");
 1723         s = splbio();
 1724         /*
 1725          * If a background write is already in progress, delay
 1726          * writing this block if it is asynchronous. Otherwise
 1727          * wait for the background write to complete.
 1728          */
 1729         BO_LOCK(bp->b_bufobj);
 1730         if (bp->b_vflags & BV_BKGRDINPROG) {
 1731                 if (bp->b_flags & B_ASYNC) {
 1732                         BO_UNLOCK(bp->b_bufobj);
 1733                         splx(s);
 1734                         bdwrite(bp);
 1735                         return (0);
 1736                 }
 1737                 bp->b_vflags |= BV_BKGRDWAIT;
 1738                 msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0);
 1739                 if (bp->b_vflags & BV_BKGRDINPROG)
 1740                         panic("bufwrite: still writing");
 1741         }
 1742         BO_UNLOCK(bp->b_bufobj);
 1743 
 1744         /* Mark the buffer clean */
 1745         bundirty(bp);
 1746 
 1747         /*
 1748          * If this buffer is marked for background writing and we
 1749          * do not have to wait for it, make a copy and write the
 1750          * copy so as to leave this buffer ready for further use.
 1751          *
 1752          * This optimization eats a lot of memory.  If we have a page
 1753          * or buffer shortfall we can't do it.
 1754          */
 1755         if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) && 
 1756             (bp->b_flags & B_ASYNC) &&
 1757             !vm_page_count_severe() &&
 1758             !buf_dirty_count_severe()) {
 1759                 KASSERT(bp->b_iodone == NULL,
 1760                     ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
 1761 
 1762                 /* get a new block */
 1763                 newbp = geteblk(bp->b_bufsize);
 1764 
 1765                 /*
 1766                  * set it to be identical to the old block.  We have to
 1767                  * set b_lblkno and BKGRDMARKER before calling bgetvp()
 1768                  * to avoid confusing the splay tree and gbincore().
 1769                  */
 1770                 memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
 1771                 newbp->b_lblkno = bp->b_lblkno;
 1772                 newbp->b_xflags |= BX_BKGRDMARKER;
 1773                 BO_LOCK(bp->b_bufobj);
 1774                 bp->b_vflags |= BV_BKGRDINPROG;
 1775                 bgetvp(bp->b_vp, newbp);
 1776                 BO_UNLOCK(bp->b_bufobj);
 1777                 newbp->b_bufobj = &bp->b_vp->v_bufobj;
 1778                 newbp->b_blkno = bp->b_blkno;
 1779                 newbp->b_offset = bp->b_offset;
 1780                 newbp->b_iodone = ffs_backgroundwritedone;
 1781                 newbp->b_flags |= B_ASYNC;
 1782                 newbp->b_flags &= ~B_INVAL;
 1783 
 1784 #ifdef SOFTUPDATES
 1785                 /* move over the dependencies */
 1786                 if (!LIST_EMPTY(&bp->b_dep))
 1787                         softdep_move_dependencies(bp, newbp);
 1788 #endif 
 1789 
 1790                 /*
 1791                  * Initiate write on the copy, release the original to
 1792                  * the B_LOCKED queue so that it cannot go away until
 1793                  * the background write completes. If not locked it could go
 1794                  * away and then be reconstituted while it was being written.
 1795                  * If the reconstituted buffer were written, we could end up
 1796                  * with two background copies being written at the same time.
 1797                  */
 1798                 bqrelse(bp);
 1799                 bp = newbp;
 1800         }
 1801 
 1802         /* Let the normal bufwrite do the rest for us */
 1803         return (bufwrite(bp));
 1804 }
 1805 
 1806 
 1807 static void
 1808 ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
 1809 {
 1810         struct vnode *vp;
 1811         int error;
 1812         struct buf *tbp;
 1813 
 1814         vp = bo->__bo_vnode;
 1815         if (bp->b_iocmd == BIO_WRITE) {
 1816                 if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
 1817                     bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
 1818                     (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
 1819                         panic("ffs_geom_strategy: bad I/O");
 1820                 bp->b_flags &= ~B_VALIDSUSPWRT;
 1821                 if ((vp->v_vflag & VV_COPYONWRITE) &&
 1822                     vp->v_rdev->si_snapdata != NULL) {
 1823                         if ((bp->b_flags & B_CLUSTER) != 0) {
 1824                                 runningbufwakeup(bp);
 1825                                 TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
 1826                                               b_cluster.cluster_entry) {
 1827                                         error = ffs_copyonwrite(vp, tbp);
 1828                                         if (error != 0 &&
 1829                                             error != EOPNOTSUPP) {
 1830                                                 bp->b_error = error;
 1831                                                 bp->b_ioflags |= BIO_ERROR;
 1832                                                 bufdone(bp);
 1833                                                 return;
 1834                                         }
 1835                                 }
 1836                                 bp->b_runningbufspace = bp->b_bufsize;
 1837                                 atomic_add_int(&runningbufspace,
 1838                                                bp->b_runningbufspace);
 1839                         } else {
 1840                                 error = ffs_copyonwrite(vp, bp);
 1841                                 if (error != 0 && error != EOPNOTSUPP) {
 1842                                         bp->b_error = error;
 1843                                         bp->b_ioflags |= BIO_ERROR;
 1844                                         bufdone(bp);
 1845                                         return;
 1846                                 }
 1847                         }
 1848                 }
 1849 #ifdef SOFTUPDATES
 1850                 if ((bp->b_flags & B_CLUSTER) != 0) {
 1851                         TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
 1852                                       b_cluster.cluster_entry) {
 1853                                 if (!LIST_EMPTY(&tbp->b_dep))
 1854                                         buf_start(tbp);
 1855                         }
 1856                 } else {
 1857                         if (!LIST_EMPTY(&bp->b_dep))
 1858                                 buf_start(bp);
 1859                 }
 1860 
 1861 #endif
 1862         }
 1863         g_vfs_strategy(bo, bp);
 1864 }
Cache object: f6978c610ad12cc7c233564623b37a29
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/ufs/ffs/ffs_vfsops.c

FreeBSD/Linux Kernel Cross Reference
sys/ufs/ffs/ffs_vfsops.c