ffs_vfsops.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1989, 1991, 1993, 1994
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 4. Neither the name of the University nor the names of its contributors
   14  *    may be used to endorse or promote products derived from this software
   15  *    without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  *      @(#)ffs_vfsops.c        8.31 (Berkeley) 5/20/95
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD: releng/6.4/sys/ufs/ffs/ffs_vfsops.c 179071 2008-05-17 12:46:24Z kib $");
   34 
   35 #include "opt_mac.h"
   36 #include "opt_quota.h"
   37 #include "opt_ufs.h"
   38 #include "opt_ffs.h"
   39 
   40 #include <sys/param.h>
   41 #include <sys/systm.h>
   42 #include <sys/namei.h>
   43 #include <sys/proc.h>
   44 #include <sys/kernel.h>
   45 #include <sys/mac.h>
   46 #include <sys/vnode.h>
   47 #include <sys/mount.h>
   48 #include <sys/bio.h>
   49 #include <sys/buf.h>
   50 #include <sys/conf.h>
   51 #include <sys/fcntl.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mutex.h>
   54 
   55 #include <ufs/ufs/extattr.h>
   56 #include <ufs/ufs/quota.h>
   57 #include <ufs/ufs/ufsmount.h>
   58 #include <ufs/ufs/inode.h>
   59 #include <ufs/ufs/ufs_extern.h>
   60 
   61 #include <ufs/ffs/fs.h>
   62 #include <ufs/ffs/ffs_extern.h>
   63 
   64 #include <vm/vm.h>
   65 #include <vm/uma.h>
   66 #include <vm/vm_page.h>
   67 
   68 #include <geom/geom.h>
   69 #include <geom/geom_vfs.h>
   70 
   71 static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
   72 
   73 static int      ffs_sbupdate(struct ufsmount *, int, int);
   74 static int      ffs_reload(struct mount *, struct thread *);
   75 static int      ffs_mountfs(struct vnode *, struct mount *, struct thread *);
   76 static void     ffs_oldfscompat_read(struct fs *, struct ufsmount *,
   77                     ufs2_daddr_t);
   78 static void     ffs_oldfscompat_write(struct fs *, struct ufsmount *);
   79 static void     ffs_ifree(struct ufsmount *ump, struct inode *ip);
   80 static vfs_init_t ffs_init;
   81 static vfs_uninit_t ffs_uninit;
   82 static vfs_extattrctl_t ffs_extattrctl;
   83 static vfs_cmount_t ffs_cmount;
   84 static vfs_unmount_t ffs_unmount;
   85 static vfs_mount_t ffs_mount;
   86 static vfs_statfs_t ffs_statfs;
   87 static vfs_fhtovp_t ffs_fhtovp;
   88 static vfs_vptofh_t ffs_vptofh;
   89 static vfs_sync_t ffs_sync;
   90 
   91 static struct vfsops ufs_vfsops = {
   92         .vfs_extattrctl =       ffs_extattrctl,
   93         .vfs_fhtovp =           ffs_fhtovp,
   94         .vfs_init =             ffs_init,
   95         .vfs_mount =            ffs_mount,
   96         .vfs_cmount =           ffs_cmount,
   97         .vfs_quotactl =         ufs_quotactl,
   98         .vfs_root =             ufs_root,
   99         .vfs_statfs =           ffs_statfs,
  100         .vfs_sync =             ffs_sync,
  101         .vfs_uninit =           ffs_uninit,
  102         .vfs_unmount =          ffs_unmount,
  103         .vfs_vget =             ffs_vget,
  104         .vfs_vptofh =           ffs_vptofh,
  105 };
  106 
  107 VFS_SET(ufs_vfsops, ufs, 0);
  108 MODULE_VERSION(ufs, 1);
  109 
  110 static b_strategy_t ffs_geom_strategy;
  111 static b_write_t ffs_bufwrite;
  112 
  113 static struct buf_ops ffs_ops = {
  114         .bop_name =     "FFS",
  115         .bop_write =    ffs_bufwrite,
  116         .bop_strategy = ffs_geom_strategy,
  117         .bop_sync =     bufsync,
  118 #ifdef NO_FFS_SNAPSHOT
  119         .bop_bdflush =  bufbdflush,
  120 #else
  121         .bop_bdflush =  ffs_bdflush,
  122 #endif
  123 };
  124 
  125 static const char *ffs_opts[] = { "acls", "async", "atime", "clusterr",
  126     "clusterw", "exec", "export", "force", "from", "multilabel", 
  127     "snapshot", "suid", "suiddir", "symfollow", "sync",
  128     "update", "union", NULL };
  129 
  130 static int
  131 ffs_mount(struct mount *mp, struct thread *td)
  132 {
  133         struct vnode *devvp;
  134         struct ufsmount *ump = 0;
  135         struct fs *fs;
  136         int error, flags;
  137         mode_t accessmode;
  138         struct nameidata ndp;
  139         char *fspec;
  140 
  141         if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
  142                 return (EINVAL);
  143         if (uma_inode == NULL) {
  144                 uma_inode = uma_zcreate("FFS inode",
  145                     sizeof(struct inode), NULL, NULL, NULL, NULL,
  146                     UMA_ALIGN_PTR, 0);
  147                 uma_ufs1 = uma_zcreate("FFS1 dinode",
  148                     sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
  149                     UMA_ALIGN_PTR, 0);
  150                 uma_ufs2 = uma_zcreate("FFS2 dinode",
  151                     sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
  152                     UMA_ALIGN_PTR, 0);
  153         }
  154 
  155         fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
  156         if (error)
  157                 return (error);
  158 
  159         /*
  160          * If updating, check whether changing from read-only to
  161          * read/write; if there is no device name, that's all we do.
  162          */
  163         if (mp->mnt_flag & MNT_UPDATE) {
  164                 ump = VFSTOUFS(mp);
  165                 fs = ump->um_fs;
  166                 devvp = ump->um_devvp;
  167                 if (fs->fs_ronly == 0 &&
  168                     vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
  169                         if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
  170                                 return (error);
  171                         /*
  172                          * Flush any dirty data.
  173                          */
  174                         if ((error = ffs_sync(mp, MNT_WAIT, td)) != 0) {
  175                                 vn_finished_write(mp);
  176                                 return (error);
  177                         }
  178                         /*
  179                          * Check for and optionally get rid of files open
  180                          * for writing.
  181                          */
  182                         flags = WRITECLOSE;
  183                         if (mp->mnt_flag & MNT_FORCE)
  184                                 flags |= FORCECLOSE;
  185                         if (mp->mnt_flag & MNT_SOFTDEP) {
  186                                 error = softdep_flushfiles(mp, flags, td);
  187                         } else {
  188                                 error = ffs_flushfiles(mp, flags, td);
  189                         }
  190                         if (error) {
  191                                 vn_finished_write(mp);
  192                                 return (error);
  193                         }
  194                         if (fs->fs_pendingblocks != 0 ||
  195                             fs->fs_pendinginodes != 0) {
  196                                 printf("%s: %s: blocks %jd files %d\n",
  197                                     fs->fs_fsmnt, "update error",
  198                                     (intmax_t)fs->fs_pendingblocks,
  199                                     fs->fs_pendinginodes);
  200                                 fs->fs_pendingblocks = 0;
  201                                 fs->fs_pendinginodes = 0;
  202                         }
  203                         if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
  204                                 fs->fs_clean = 1;
  205                         if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
  206                                 fs->fs_ronly = 0;
  207                                 fs->fs_clean = 0;
  208                                 vn_finished_write(mp);
  209                                 return (error);
  210                         }
  211                         vn_finished_write(mp);
  212                         DROP_GIANT();
  213                         g_topology_lock();
  214                         g_access(ump->um_cp, 0, -1, 0);
  215                         g_topology_unlock();
  216                         PICKUP_GIANT();
  217                         fs->fs_ronly = 1;
  218                         MNT_ILOCK(mp);
  219                         mp->mnt_flag |= MNT_RDONLY;
  220                         MNT_IUNLOCK(mp);
  221                 }
  222                 if ((mp->mnt_flag & MNT_RELOAD) &&
  223                     (error = ffs_reload(mp, td)) != 0)
  224                         return (error);
  225                 if (fs->fs_ronly &&
  226                     !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
  227                         /*
  228                          * If upgrade to read-write by non-root, then verify
  229                          * that user has necessary permissions on the device.
  230                          */
  231                         if (suser(td)) {
  232                                 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
  233                                 if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
  234                                     td->td_ucred, td)) != 0) {
  235                                         VOP_UNLOCK(devvp, 0, td);
  236                                         return (error);
  237                                 }
  238                                 VOP_UNLOCK(devvp, 0, td);
  239                         }
  240                         fs->fs_flags &= ~FS_UNCLEAN;
  241                         if (fs->fs_clean == 0) {
  242                                 fs->fs_flags |= FS_UNCLEAN;
  243                                 if ((mp->mnt_flag & MNT_FORCE) ||
  244                                     ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
  245                                      (fs->fs_flags & FS_DOSOFTDEP))) {
  246                                         printf("WARNING: %s was not %s\n",
  247                                            fs->fs_fsmnt, "properly dismounted");
  248                                 } else {
  249                                         printf(
  250 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
  251                                             fs->fs_fsmnt);
  252                                         return (EPERM);
  253                                 }
  254                         }
  255                         DROP_GIANT();
  256                         g_topology_lock();
  257                         /*
  258                          * If we're the root device, we may not have an E count
  259                          * yet, get it now.
  260                          */
  261                         if (ump->um_cp->ace == 0)
  262                                 error = g_access(ump->um_cp, 0, 1, 1);
  263                         else
  264                                 error = g_access(ump->um_cp, 0, 1, 0);
  265                         g_topology_unlock();
  266                         PICKUP_GIANT();
  267                         if (error)
  268                                 return (error);
  269                         if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
  270                                 return (error);
  271                         fs->fs_ronly = 0;
  272                         MNT_ILOCK(mp);
  273                         mp->mnt_flag &= ~MNT_RDONLY;
  274                         MNT_IUNLOCK(mp);
  275                         fs->fs_clean = 0;
  276                         if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
  277                                 vn_finished_write(mp);
  278                                 return (error);
  279                         }
  280                         /* check to see if we need to start softdep */
  281                         if ((fs->fs_flags & FS_DOSOFTDEP) &&
  282                             (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
  283                                 vn_finished_write(mp);
  284                                 return (error);
  285                         }
  286                         if (fs->fs_snapinum[0] != 0)
  287                                 ffs_snapshot_mount(mp);
  288                         vn_finished_write(mp);
  289                 }
  290                 /*
  291                  * Soft updates is incompatible with "async",
  292                  * so if we are doing softupdates stop the user
  293                  * from setting the async flag in an update.
  294                  * Softdep_mount() clears it in an initial mount 
  295                  * or ro->rw remount.
  296                  */
  297                 if (mp->mnt_flag & MNT_SOFTDEP) {
  298                         /* XXX: Reset too late ? */
  299                         MNT_ILOCK(mp);
  300                         mp->mnt_flag &= ~MNT_ASYNC;
  301                         MNT_IUNLOCK(mp);
  302                 }
  303                 /*
  304                  * Keep MNT_ACLS flag if it is stored in superblock.
  305                  */
  306                 if ((fs->fs_flags & FS_ACLS) != 0) {
  307                         /* XXX: Set too late ? */
  308                         MNT_ILOCK(mp);
  309                         mp->mnt_flag |= MNT_ACLS;
  310                         MNT_IUNLOCK(mp);
  311                 }
  312 
  313                 /*
  314                  * If this is a snapshot request, take the snapshot.
  315                  */
  316                 if (mp->mnt_flag & MNT_SNAPSHOT)
  317                         return (ffs_snapshot(mp, fspec));
  318         }
  319 
  320         /*
  321          * Not an update, or updating the name: look up the name
  322          * and verify that it refers to a sensible disk device.
  323          */
  324         NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
  325         if ((error = namei(&ndp)) != 0)
  326                 return (error);
  327         NDFREE(&ndp, NDF_ONLY_PNBUF);
  328         devvp = ndp.ni_vp;
  329         if (!vn_isdisk(devvp, &error)) {
  330                 vput(devvp);
  331                 return (error);
  332         }
  333 
  334         /*
  335          * If mount by non-root, then verify that user has necessary
  336          * permissions on the device.
  337          */
  338         if (suser(td)) {
  339                 accessmode = VREAD;
  340                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
  341                         accessmode |= VWRITE;
  342                 if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
  343                         vput(devvp);
  344                         return (error);
  345                 }
  346         }
  347 
  348         if (mp->mnt_flag & MNT_UPDATE) {
  349                 /*
  350                  * Update only
  351                  *
  352                  * If it's not the same vnode, or at least the same device
  353                  * then it's not correct.
  354                  */
  355 
  356                 if (devvp->v_rdev != ump->um_devvp->v_rdev)
  357                         error = EINVAL; /* needs translation */
  358                 vput(devvp);
  359                 if (error)
  360                         return (error);
  361         } else {
  362                 /*
  363                  * New mount
  364                  *
  365                  * We need the name for the mount point (also used for
  366                  * "last mounted on") copied in. If an error occurs,
  367                  * the mount point is discarded by the upper level code.
  368                  * Note that vfs_mount() populates f_mntonname for us.
  369                  */
  370                 if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
  371                         vrele(devvp);
  372                         return (error);
  373                 }
  374         }
  375         vfs_mountedfrom(mp, fspec);
  376         return (0);
  377 }
  378 
  379 /*
  380  * Compatibility with old mount system call.
  381  */
  382 
  383 static int
  384 ffs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
  385 {
  386         struct ufs_args args;
  387         int error;
  388 
  389         if (data == NULL)
  390                 return (EINVAL);
  391         error = copyin(data, &args, sizeof args);
  392         if (error)
  393                 return (error);
  394 
  395         ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
  396         ma = mount_arg(ma, "export", &args.export, sizeof args.export);
  397         error = kernel_mount(ma, flags);
  398 
  399         return (error);
  400 }
  401 
  402 /*
  403  * Reload all incore data for a filesystem (used after running fsck on
  404  * the root filesystem and finding things to fix). The filesystem must
  405  * be mounted read-only.
  406  *
  407  * Things to do to update the mount:
  408  *      1) invalidate all cached meta-data.
  409  *      2) re-read superblock from disk.
  410  *      3) re-read summary information from disk.
  411  *      4) invalidate all inactive vnodes.
  412  *      5) invalidate all cached file data.
  413  *      6) re-read inode data for all active vnodes.
  414  */
  415 static int
  416 ffs_reload(struct mount *mp, struct thread *td)
  417 {
  418         struct vnode *vp, *mvp, *devvp;
  419         struct inode *ip;
  420         void *space;
  421         struct buf *bp;
  422         struct fs *fs, *newfs;
  423         struct ufsmount *ump;
  424         ufs2_daddr_t sblockloc;
  425         int i, blks, size, error;
  426         int32_t *lp;
  427 
  428         if ((mp->mnt_flag & MNT_RDONLY) == 0)
  429                 return (EINVAL);
  430         ump = VFSTOUFS(mp);
  431         /*
  432          * Step 1: invalidate all cached meta-data.
  433          */
  434         devvp = VFSTOUFS(mp)->um_devvp;
  435         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
  436         if (vinvalbuf(devvp, 0, td, 0, 0) != 0)
  437                 panic("ffs_reload: dirty1");
  438         VOP_UNLOCK(devvp, 0, td);
  439 
  440         /*
  441          * Step 2: re-read superblock from disk.
  442          */
  443         fs = VFSTOUFS(mp)->um_fs;
  444         if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
  445             NOCRED, &bp)) != 0)
  446                 return (error);
  447         newfs = (struct fs *)bp->b_data;
  448         if ((newfs->fs_magic != FS_UFS1_MAGIC &&
  449              newfs->fs_magic != FS_UFS2_MAGIC) ||
  450             newfs->fs_bsize > MAXBSIZE ||
  451             newfs->fs_bsize < sizeof(struct fs)) {
  452                         brelse(bp);
  453                         return (EIO);           /* XXX needs translation */
  454         }
  455         /*
  456          * Copy pointer fields back into superblock before copying in   XXX
  457          * new superblock. These should really be in the ufsmount.      XXX
  458          * Note that important parameters (eg fs_ncg) are unchanged.
  459          */
  460         newfs->fs_csp = fs->fs_csp;
  461         newfs->fs_maxcluster = fs->fs_maxcluster;
  462         newfs->fs_contigdirs = fs->fs_contigdirs;
  463         newfs->fs_active = fs->fs_active;
  464         /* The file system is still read-only. */
  465         newfs->fs_ronly = 1;
  466         sblockloc = fs->fs_sblockloc;
  467         bcopy(newfs, fs, (u_int)fs->fs_sbsize);
  468         brelse(bp);
  469         mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
  470         ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
  471         UFS_LOCK(ump);
  472         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
  473                 printf("%s: reload pending error: blocks %jd files %d\n",
  474                     fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
  475                     fs->fs_pendinginodes);
  476                 fs->fs_pendingblocks = 0;
  477                 fs->fs_pendinginodes = 0;
  478         }
  479         UFS_UNLOCK(ump);
  480 
  481         /*
  482          * Step 3: re-read summary information from disk.
  483          */
  484         blks = howmany(fs->fs_cssize, fs->fs_fsize);
  485         space = fs->fs_csp;
  486         for (i = 0; i < blks; i += fs->fs_frag) {
  487                 size = fs->fs_bsize;
  488                 if (i + fs->fs_frag > blks)
  489                         size = (blks - i) * fs->fs_fsize;
  490                 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
  491                     NOCRED, &bp);
  492                 if (error)
  493                         return (error);
  494                 bcopy(bp->b_data, space, (u_int)size);
  495                 space = (char *)space + size;
  496                 brelse(bp);
  497         }
  498         /*
  499          * We no longer know anything about clusters per cylinder group.
  500          */
  501         if (fs->fs_contigsumsize > 0) {
  502                 lp = fs->fs_maxcluster;
  503                 for (i = 0; i < fs->fs_ncg; i++)
  504                         *lp++ = fs->fs_contigsumsize;
  505         }
  506 
  507 loop:
  508         MNT_ILOCK(mp);
  509         MNT_VNODE_FOREACH(vp, mp, mvp) {
  510                 VI_LOCK(vp);
  511                 if (vp->v_iflag & VI_DOOMED) {
  512                         VI_UNLOCK(vp);
  513                         continue;
  514                 }
  515                 MNT_IUNLOCK(mp);
  516                 /*
  517                  * Step 4: invalidate all cached file data.
  518                  */
  519                 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
  520                         MNT_VNODE_FOREACH_ABORT(mp, mvp);
  521                         goto loop;
  522                 }
  523                 if (vinvalbuf(vp, 0, td, 0, 0))
  524                         panic("ffs_reload: dirty2");
  525                 /*
  526                  * Step 5: re-read inode data for all active vnodes.
  527                  */
  528                 ip = VTOI(vp);
  529                 error =
  530                     bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
  531                     (int)fs->fs_bsize, NOCRED, &bp);
  532                 if (error) {
  533                         VOP_UNLOCK(vp, 0, td);
  534                         vrele(vp);
  535                         MNT_VNODE_FOREACH_ABORT(mp, mvp);
  536                         return (error);
  537                 }
  538                 ffs_load_inode(bp, ip, fs, ip->i_number);
  539                 ip->i_effnlink = ip->i_nlink;
  540                 brelse(bp);
  541                 VOP_UNLOCK(vp, 0, td);
  542                 vrele(vp);
  543                 MNT_ILOCK(mp);
  544         }
  545         MNT_IUNLOCK(mp);
  546         return (0);
  547 }
  548 
  549 /*
  550  * Possible superblock locations ordered from most to least likely.
  551  */
  552 static int sblock_try[] = SBLOCKSEARCH;
  553 
  554 /*
  555  * Common code for mount and mountroot
  556  */
  557 static int
  558 ffs_mountfs(devvp, mp, td)
  559         struct vnode *devvp;
  560         struct mount *mp;
  561         struct thread *td;
  562 {
  563         struct ufsmount *ump;
  564         struct buf *bp;
  565         struct fs *fs;
  566         struct cdev *dev;
  567         void *space;
  568         ufs2_daddr_t sblockloc;
  569         int error, i, blks, size, ronly;
  570         int32_t *lp;
  571         struct ucred *cred;
  572         struct g_consumer *cp;
  573         struct mount *nmp;
  574 
  575         dev = devvp->v_rdev;
  576         cred = td ? td->td_ucred : NOCRED;
  577 
  578         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
  579         DROP_GIANT();
  580         g_topology_lock();
  581         error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
  582 
  583         /*
  584          * If we are a root mount, drop the E flag so fsck can do its magic.
  585          * We will pick it up again when we remount R/W.
  586          */
  587         if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
  588                 error = g_access(cp, 0, 0, -1);
  589         g_topology_unlock();
  590         PICKUP_GIANT();
  591         VOP_UNLOCK(devvp, 0, td);
  592         if (error)
  593                 return (error);
  594         if (devvp->v_rdev->si_iosize_max != 0)
  595                 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
  596         if (mp->mnt_iosize_max > MAXPHYS)
  597                 mp->mnt_iosize_max = MAXPHYS;
  598 
  599         devvp->v_bufobj.bo_private = cp;
  600         devvp->v_bufobj.bo_ops = &ffs_ops;
  601 
  602         bp = NULL;
  603         ump = NULL;
  604         fs = NULL;
  605         sblockloc = 0;
  606         /*
  607          * Try reading the superblock in each of its possible locations.
  608          */
  609         for (i = 0; sblock_try[i] != -1; i++) {
  610                 if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
  611                         error = EINVAL;
  612                         printf("Invalid sectorsize %d for superblock size %d\n",
  613                             cp->provider->sectorsize, SBLOCKSIZE);
  614                         goto out;
  615                 }
  616                 if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
  617                     cred, &bp)) != 0)
  618                         goto out;
  619                 fs = (struct fs *)bp->b_data;
  620                 sblockloc = sblock_try[i];
  621                 if ((fs->fs_magic == FS_UFS1_MAGIC ||
  622                      (fs->fs_magic == FS_UFS2_MAGIC &&
  623                       (fs->fs_sblockloc == sblockloc ||
  624                        (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
  625                     fs->fs_bsize <= MAXBSIZE &&
  626                     fs->fs_bsize >= sizeof(struct fs))
  627                         break;
  628                 brelse(bp);
  629                 bp = NULL;
  630         }
  631         if (sblock_try[i] == -1) {
  632                 error = EINVAL;         /* XXX needs translation */
  633                 goto out;
  634         }
  635         fs->fs_fmod = 0;
  636         fs->fs_flags &= ~FS_INDEXDIRS;  /* no support for directory indicies */
  637         fs->fs_flags &= ~FS_UNCLEAN;
  638         if (fs->fs_clean == 0) {
  639                 fs->fs_flags |= FS_UNCLEAN;
  640                 if (ronly || (mp->mnt_flag & MNT_FORCE) ||
  641                     ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
  642                      (fs->fs_flags & FS_DOSOFTDEP))) {
  643                         printf(
  644 "WARNING: %s was not properly dismounted\n",
  645                             fs->fs_fsmnt);
  646                 } else {
  647                         printf(
  648 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
  649                             fs->fs_fsmnt);
  650                         error = EPERM;
  651                         goto out;
  652                 }
  653                 if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
  654                     (mp->mnt_flag & MNT_FORCE)) {
  655                         printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
  656                             (intmax_t)fs->fs_pendingblocks,
  657                             fs->fs_pendinginodes);
  658                         fs->fs_pendingblocks = 0;
  659                         fs->fs_pendinginodes = 0;
  660                 }
  661         }
  662         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
  663                 printf("%s: mount pending error: blocks %jd files %d\n",
  664                     fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
  665                     fs->fs_pendinginodes);
  666                 fs->fs_pendingblocks = 0;
  667                 fs->fs_pendinginodes = 0;
  668         }
  669         ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
  670         ump->um_cp = cp;
  671         ump->um_bo = &devvp->v_bufobj;
  672         ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
  673         if (fs->fs_magic == FS_UFS1_MAGIC) {
  674                 ump->um_fstype = UFS1;
  675                 ump->um_balloc = ffs_balloc_ufs1;
  676         } else {
  677                 ump->um_fstype = UFS2;
  678                 ump->um_balloc = ffs_balloc_ufs2;
  679         }
  680         ump->um_blkatoff = ffs_blkatoff;
  681         ump->um_truncate = ffs_truncate;
  682         ump->um_update = ffs_update;
  683         ump->um_valloc = ffs_valloc;
  684         ump->um_vfree = ffs_vfree;
  685         ump->um_ifree = ffs_ifree;
  686         mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
  687         bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
  688         if (fs->fs_sbsize < SBLOCKSIZE)
  689                 bp->b_flags |= B_INVAL | B_NOCACHE;
  690         brelse(bp);
  691         bp = NULL;
  692         fs = ump->um_fs;
  693         ffs_oldfscompat_read(fs, ump, sblockloc);
  694         fs->fs_ronly = ronly;
  695         size = fs->fs_cssize;
  696         blks = howmany(size, fs->fs_fsize);
  697         if (fs->fs_contigsumsize > 0)
  698                 size += fs->fs_ncg * sizeof(int32_t);
  699         size += fs->fs_ncg * sizeof(u_int8_t);
  700         space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
  701         fs->fs_csp = space;
  702         for (i = 0; i < blks; i += fs->fs_frag) {
  703                 size = fs->fs_bsize;
  704                 if (i + fs->fs_frag > blks)
  705                         size = (blks - i) * fs->fs_fsize;
  706                 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
  707                     cred, &bp)) != 0) {
  708                         free(fs->fs_csp, M_UFSMNT);
  709                         goto out;
  710                 }
  711                 bcopy(bp->b_data, space, (u_int)size);
  712                 space = (char *)space + size;
  713                 brelse(bp);
  714                 bp = NULL;
  715         }
  716         if (fs->fs_contigsumsize > 0) {
  717                 fs->fs_maxcluster = lp = space;
  718                 for (i = 0; i < fs->fs_ncg; i++)
  719                         *lp++ = fs->fs_contigsumsize;
  720                 space = lp;
  721         }
  722         size = fs->fs_ncg * sizeof(u_int8_t);
  723         fs->fs_contigdirs = (u_int8_t *)space;
  724         bzero(fs->fs_contigdirs, size);
  725         fs->fs_active = NULL;
  726         mp->mnt_data = (qaddr_t)ump;
  727         mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
  728         mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
  729         nmp = NULL;
  730         if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 || 
  731             (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
  732                 if (nmp)
  733                         vfs_rel(nmp);
  734                 vfs_getnewfsid(mp);
  735         }
  736         mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
  737         MNT_ILOCK(mp);
  738         mp->mnt_flag |= MNT_LOCAL;
  739         MNT_IUNLOCK(mp);
  740         if ((fs->fs_flags & FS_MULTILABEL) != 0) {
  741 #ifdef MAC
  742                 MNT_ILOCK(mp);
  743                 mp->mnt_flag |= MNT_MULTILABEL;
  744                 MNT_IUNLOCK(mp);
  745 #else
  746                 printf(
  747 "WARNING: %s: multilabel flag on fs but no MAC support\n",
  748                     mp->mnt_stat.f_mntonname);
  749 #endif
  750         }
  751         if ((fs->fs_flags & FS_ACLS) != 0) {
  752 #ifdef UFS_ACL
  753                 MNT_ILOCK(mp);
  754                 mp->mnt_flag |= MNT_ACLS;
  755                 MNT_IUNLOCK(mp);
  756 #else
  757                 printf(
  758 "WARNING: %s: ACLs flag on fs but no ACLs support\n",
  759                     mp->mnt_stat.f_mntonname);
  760 #endif
  761         }
  762         ump->um_mountp = mp;
  763         ump->um_dev = dev;
  764         ump->um_devvp = devvp;
  765         ump->um_nindir = fs->fs_nindir;
  766         ump->um_bptrtodb = fs->fs_fsbtodb;
  767         ump->um_seqinc = fs->fs_frag;
  768         for (i = 0; i < MAXQUOTAS; i++)
  769                 ump->um_quotas[i] = NULLVP;
  770 #ifdef UFS_EXTATTR
  771         ufs_extattr_uepm_init(&ump->um_extattr);
  772 #endif
  773         /*
  774          * Set FS local "last mounted on" information (NULL pad)
  775          */
  776         bzero(fs->fs_fsmnt, MAXMNTLEN);
  777         strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
  778 
  779         if( mp->mnt_flag & MNT_ROOTFS) {
  780                 /*
  781                  * Root mount; update timestamp in mount structure.
  782                  * this will be used by the common root mount code
  783                  * to update the system clock.
  784                  */
  785                 mp->mnt_time = fs->fs_time;
  786         }
  787 
  788         if (ronly == 0) {
  789                 if ((fs->fs_flags & FS_DOSOFTDEP) &&
  790                     (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
  791                         free(fs->fs_csp, M_UFSMNT);
  792                         goto out;
  793                 }
  794                 if (fs->fs_snapinum[0] != 0)
  795                         ffs_snapshot_mount(mp);
  796                 fs->fs_fmod = 1;
  797                 fs->fs_clean = 0;
  798                 (void) ffs_sbupdate(ump, MNT_WAIT, 0);
  799         }
  800         /*
  801          * Initialize filesystem stat information in mount struct.
  802          */
  803 #ifdef UFS_EXTATTR
  804 #ifdef UFS_EXTATTR_AUTOSTART
  805         /*
  806          *
  807          * Auto-starting does the following:
  808          *      - check for /.attribute in the fs, and extattr_start if so
  809          *      - for each file in .attribute, enable that file with
  810          *        an attribute of the same name.
  811          * Not clear how to report errors -- probably eat them.
  812          * This would all happen while the filesystem was busy/not
  813          * available, so would effectively be "atomic".
  814          */
  815         (void) ufs_extattr_autostart(mp, td);
  816 #endif /* !UFS_EXTATTR_AUTOSTART */
  817 #endif /* !UFS_EXTATTR */
  818         MNT_ILOCK(mp);
  819         mp->mnt_kern_flag |= MNTK_MPSAFE;
  820         MNT_IUNLOCK(mp);
  821         return (0);
  822 out:
  823         if (bp)
  824                 brelse(bp);
  825         if (cp != NULL) {
  826                 DROP_GIANT();
  827                 g_topology_lock();
  828                 g_vfs_close(cp, td);
  829                 g_topology_unlock();
  830                 PICKUP_GIANT();
  831         }
  832         if (ump) {
  833                 mtx_destroy(UFS_MTX(ump));
  834                 free(ump->um_fs, M_UFSMNT);
  835                 free(ump, M_UFSMNT);
  836                 mp->mnt_data = (qaddr_t)0;
  837         }
  838         return (error);
  839 }
  840 
  841 #include <sys/sysctl.h>
  842 static int bigcgs = 0;
  843 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
  844 
  845 /*
  846  * Sanity checks for loading old filesystem superblocks.
  847  * See ffs_oldfscompat_write below for unwound actions.
  848  *
  849  * XXX - Parts get retired eventually.
  850  * Unfortunately new bits get added.
  851  */
  852 static void
  853 ffs_oldfscompat_read(fs, ump, sblockloc)
  854         struct fs *fs;
  855         struct ufsmount *ump;
  856         ufs2_daddr_t sblockloc;
  857 {
  858         off_t maxfilesize;
  859 
  860         /*
  861          * If not yet done, update fs_flags location and value of fs_sblockloc.
  862          */
  863         if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
  864                 fs->fs_flags = fs->fs_old_flags;
  865                 fs->fs_old_flags |= FS_FLAGS_UPDATED;
  866                 fs->fs_sblockloc = sblockloc;
  867         }
  868         /*
  869          * If not yet done, update UFS1 superblock with new wider fields.
  870          */
  871         if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
  872                 fs->fs_maxbsize = fs->fs_bsize;
  873                 fs->fs_time = fs->fs_old_time;
  874                 fs->fs_size = fs->fs_old_size;
  875                 fs->fs_dsize = fs->fs_old_dsize;
  876                 fs->fs_csaddr = fs->fs_old_csaddr;
  877                 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
  878                 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
  879                 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
  880                 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
  881         }
  882         if (fs->fs_magic == FS_UFS1_MAGIC &&
  883             fs->fs_old_inodefmt < FS_44INODEFMT) {
  884                 fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
  885                 fs->fs_qbmask = ~fs->fs_bmask;
  886                 fs->fs_qfmask = ~fs->fs_fmask;
  887         }
  888         if (fs->fs_magic == FS_UFS1_MAGIC) {
  889                 ump->um_savedmaxfilesize = fs->fs_maxfilesize;
  890                 maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
  891                 if (fs->fs_maxfilesize > maxfilesize)
  892                         fs->fs_maxfilesize = maxfilesize;
  893         }
  894         /* Compatibility for old filesystems */
  895         if (fs->fs_avgfilesize <= 0)
  896                 fs->fs_avgfilesize = AVFILESIZ;
  897         if (fs->fs_avgfpdir <= 0)
  898                 fs->fs_avgfpdir = AFPDIR;
  899         if (bigcgs) {
  900                 fs->fs_save_cgsize = fs->fs_cgsize;
  901                 fs->fs_cgsize = fs->fs_bsize;
  902         }
  903 }
  904 
  905 /*
  906  * Unwinding superblock updates for old filesystems.
  907  * See ffs_oldfscompat_read above for details.
  908  *
  909  * XXX - Parts get retired eventually.
  910  * Unfortunately new bits get added.
  911  */
  912 static void
  913 ffs_oldfscompat_write(fs, ump)
  914         struct fs *fs;
  915         struct ufsmount *ump;
  916 {
  917 
  918         /*
  919          * Copy back UFS2 updated fields that UFS1 inspects.
  920          */
  921         if (fs->fs_magic == FS_UFS1_MAGIC) {
  922                 fs->fs_old_time = fs->fs_time;
  923                 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
  924                 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
  925                 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
  926                 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
  927                 fs->fs_maxfilesize = ump->um_savedmaxfilesize;
  928         }
  929         if (bigcgs) {
  930                 fs->fs_cgsize = fs->fs_save_cgsize;
  931                 fs->fs_save_cgsize = 0;
  932         }
  933 }
  934 
  935 /*
  936  * unmount system call
  937  */
  938 static int
  939 ffs_unmount(mp, mntflags, td)
  940         struct mount *mp;
  941         int mntflags;
  942         struct thread *td;
  943 {
  944         struct ufsmount *ump = VFSTOUFS(mp);
  945         struct fs *fs;
  946         int error, flags;
  947 
  948         flags = 0;
  949         if (mntflags & MNT_FORCE) {
  950                 flags |= FORCECLOSE;
  951         }
  952 #ifdef UFS_EXTATTR
  953         if ((error = ufs_extattr_stop(mp, td))) {
  954                 if (error != EOPNOTSUPP)
  955                         printf("ffs_unmount: ufs_extattr_stop returned %d\n",
  956                             error);
  957         } else {
  958                 ufs_extattr_uepm_destroy(&ump->um_extattr);
  959         }
  960 #endif
  961         if (mp->mnt_flag & MNT_SOFTDEP) {
  962                 if ((error = softdep_flushfiles(mp, flags, td)) != 0)
  963                         return (error);
  964         } else {
  965                 if ((error = ffs_flushfiles(mp, flags, td)) != 0)
  966                         return (error);
  967         }
  968         fs = ump->um_fs;
  969         UFS_LOCK(ump);
  970         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
  971                 printf("%s: unmount pending error: blocks %jd files %d\n",
  972                     fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
  973                     fs->fs_pendinginodes);
  974                 fs->fs_pendingblocks = 0;
  975                 fs->fs_pendinginodes = 0;
  976         }
  977         UFS_UNLOCK(ump);
  978         if (fs->fs_ronly == 0) {
  979                 fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
  980                 error = ffs_sbupdate(ump, MNT_WAIT, 0);
  981                 if (error) {
  982                         fs->fs_clean = 0;
  983                         return (error);
  984                 }
  985         }
  986         DROP_GIANT();
  987         g_topology_lock();
  988         g_vfs_close(ump->um_cp, td);
  989         g_topology_unlock();
  990         PICKUP_GIANT();
  991         vrele(ump->um_devvp);
  992         mtx_destroy(UFS_MTX(ump));
  993         free(fs->fs_csp, M_UFSMNT);
  994         free(fs, M_UFSMNT);
  995         free(ump, M_UFSMNT);
  996         mp->mnt_data = (qaddr_t)0;
  997         MNT_ILOCK(mp);
  998         mp->mnt_flag &= ~MNT_LOCAL;
  999         MNT_IUNLOCK(mp);
 1000         return (error);
 1001 }
 1002 
 1003 /*
 1004  * Flush out all the files in a filesystem.
 1005  */
 1006 int
 1007 ffs_flushfiles(mp, flags, td)
 1008         struct mount *mp;
 1009         int flags;
 1010         struct thread *td;
 1011 {
 1012         struct ufsmount *ump;
 1013         int error;
 1014 
 1015         ump = VFSTOUFS(mp);
 1016 #ifdef QUOTA
 1017         if (mp->mnt_flag & MNT_QUOTA) {
 1018                 int i;
 1019                 error = vflush(mp, 0, SKIPSYSTEM|flags, td);
 1020                 if (error)
 1021                         return (error);
 1022                 for (i = 0; i < MAXQUOTAS; i++) {
 1023                         quotaoff(td, mp, i);
 1024                 }
 1025                 /*
 1026                  * Here we fall through to vflush again to ensure
 1027                  * that we have gotten rid of all the system vnodes.
 1028                  */
 1029         }
 1030 #endif
 1031         ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
 1032         if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
 1033                 if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
 1034                         return (error);
 1035                 ffs_snapshot_unmount(mp);
 1036                 flags |= FORCECLOSE;
 1037                 /*
 1038                  * Here we fall through to vflush again to ensure
 1039                  * that we have gotten rid of all the system vnodes.
 1040                  */
 1041         }
 1042         /*
 1043          * Flush all the files.
 1044          */
 1045         if ((error = vflush(mp, 0, flags, td)) != 0)
 1046                 return (error);
 1047         /*
 1048          * Flush filesystem metadata.
 1049          */
 1050         vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
 1051         error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
 1052         VOP_UNLOCK(ump->um_devvp, 0, td);
 1053         return (error);
 1054 }
 1055 
 1056 /*
 1057  * Get filesystem statistics.
 1058  */
 1059 static int
 1060 ffs_statfs(mp, sbp, td)
 1061         struct mount *mp;
 1062         struct statfs *sbp;
 1063         struct thread *td;
 1064 {
 1065         struct ufsmount *ump;
 1066         struct fs *fs;
 1067 
 1068         ump = VFSTOUFS(mp);
 1069         fs = ump->um_fs;
 1070         if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
 1071                 panic("ffs_statfs");
 1072         sbp->f_version = STATFS_VERSION;
 1073         sbp->f_bsize = fs->fs_fsize;
 1074         sbp->f_iosize = fs->fs_bsize;
 1075         sbp->f_blocks = fs->fs_dsize;
 1076         UFS_LOCK(ump);
 1077         sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
 1078             fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
 1079         sbp->f_bavail = freespace(fs, fs->fs_minfree) +
 1080             dbtofsb(fs, fs->fs_pendingblocks);
 1081         sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
 1082         sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
 1083         UFS_UNLOCK(ump);
 1084         sbp->f_namemax = NAME_MAX;
 1085         return (0);
 1086 }
 1087 
 1088 /*
 1089  * Go through the disk queues to initiate sandbagged IO;
 1090  * go through the inodes to write those that have been modified;
 1091  * initiate the writing of the super block if it has been modified.
 1092  *
 1093  * Note: we are always called with the filesystem marked `MPBUSY'.
 1094  */
 1095 static int
 1096 ffs_sync(mp, waitfor, td)
 1097         struct mount *mp;
 1098         int waitfor;
 1099         struct thread *td;
 1100 {
 1101         struct vnode *mvp, *vp, *devvp;
 1102         struct inode *ip;
 1103         struct ufsmount *ump = VFSTOUFS(mp);
 1104         struct fs *fs;
 1105         int error, count, wait, lockreq, allerror = 0;
 1106         int suspend;
 1107         int suspended;
 1108         int secondary_writes;
 1109         int secondary_accwrites;
 1110         int softdep_deps;
 1111         int softdep_accdeps;
 1112         struct bufobj *bo;
 1113 
 1114         fs = ump->um_fs;
 1115         if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {            /* XXX */
 1116                 printf("fs = %s\n", fs->fs_fsmnt);
 1117                 panic("ffs_sync: rofs mod");
 1118         }
 1119         /*
 1120          * Write back each (modified) inode.
 1121          */
 1122         wait = 0;
 1123         suspend = 0;
 1124         suspended = 0;
 1125         lockreq = LK_EXCLUSIVE | LK_NOWAIT;
 1126         if (waitfor == MNT_SUSPEND) {
 1127                 suspend = 1;
 1128                 waitfor = MNT_WAIT;
 1129         }
 1130         if (waitfor == MNT_WAIT) {
 1131                 wait = 1;
 1132                 lockreq = LK_EXCLUSIVE;
 1133         }
 1134         lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
 1135         MNT_ILOCK(mp);
 1136 loop:
 1137         /* Grab snapshot of secondary write counts */
 1138         secondary_writes = mp->mnt_secondary_writes;
 1139         secondary_accwrites = mp->mnt_secondary_accwrites;
 1140 
 1141         /* Grab snapshot of softdep dependency counts */
 1142         MNT_IUNLOCK(mp);
 1143         softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
 1144         MNT_ILOCK(mp);
 1145 
 1146         MNT_VNODE_FOREACH(vp, mp, mvp) {
 1147                 /*
 1148                  * Depend on the mntvnode_slock to keep things stable enough
 1149                  * for a quick test.  Since there might be hundreds of
 1150                  * thousands of vnodes, we cannot afford even a subroutine
 1151                  * call unless there's a good chance that we have work to do.
 1152                  */
 1153                 VI_LOCK(vp);
 1154                 if (vp->v_iflag & VI_DOOMED) {
 1155                         VI_UNLOCK(vp);
 1156                         continue;
 1157                 }
 1158                 ip = VTOI(vp);
 1159                 if (vp->v_type == VNON || ((ip->i_flag &
 1160                     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
 1161                     vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
 1162                         VI_UNLOCK(vp);
 1163                         continue;
 1164                 }
 1165                 MNT_IUNLOCK(mp);
 1166                 if ((error = vget(vp, lockreq, td)) != 0) {
 1167                         MNT_ILOCK(mp);
 1168                         if (error == ENOENT || error == ENOLCK) {
 1169                                 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
 1170                                 goto loop;
 1171                         }
 1172                         continue;
 1173                 }
 1174                 if ((error = ffs_syncvnode(vp, waitfor)) != 0)
 1175                         allerror = error;
 1176                 vput(vp);
 1177                 MNT_ILOCK(mp);
 1178         }
 1179         MNT_IUNLOCK(mp);
 1180         /*
 1181          * Force stale filesystem control information to be flushed.
 1182          */
 1183         if (waitfor == MNT_WAIT) {
 1184                 if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
 1185                         allerror = error;
 1186                 /* Flushed work items may create new vnodes to clean */
 1187                 if (allerror == 0 && count) {
 1188                         MNT_ILOCK(mp);
 1189                         goto loop;
 1190                 }
 1191         }
 1192 #ifdef QUOTA
 1193         qsync(mp);
 1194 #endif
 1195         devvp = ump->um_devvp;
 1196         VI_LOCK(devvp);
 1197         bo = &devvp->v_bufobj;
 1198         if (waitfor != MNT_LAZY &&
 1199             (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
 1200                 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
 1201                 if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
 1202                         allerror = error;
 1203                 VOP_UNLOCK(devvp, 0, td);
 1204                 if (allerror == 0 && waitfor == MNT_WAIT) {
 1205                         MNT_ILOCK(mp);
 1206                         goto loop;
 1207                 }
 1208         } else if (suspend != 0) {
 1209                 if (softdep_check_suspend(mp,
 1210                                           devvp,
 1211                                           softdep_deps,
 1212                                           softdep_accdeps,
 1213                                           secondary_writes,
 1214                                           secondary_accwrites) != 0)
 1215                         goto loop;      /* More work needed */
 1216                 mtx_assert(MNT_MTX(mp), MA_OWNED);
 1217                 mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
 1218                 MNT_IUNLOCK(mp);
 1219                 suspended = 1;
 1220         } else
 1221                 VI_UNLOCK(devvp);
 1222         /*
 1223          * Write back modified superblock.
 1224          */
 1225         if (fs->fs_fmod != 0 &&
 1226             (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
 1227                 allerror = error;
 1228         return (allerror);
 1229 }
 1230 
 1231 int
 1232 ffs_vget(mp, ino, flags, vpp)
 1233         struct mount *mp;
 1234         ino_t ino;
 1235         int flags;
 1236         struct vnode **vpp;
 1237 {
 1238         struct fs *fs;
 1239         struct inode *ip;
 1240         struct ufsmount *ump;
 1241         struct buf *bp;
 1242         struct vnode *vp;
 1243         struct cdev *dev;
 1244         int error;
 1245 
 1246         error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
 1247         if (error || *vpp != NULL)
 1248                 return (error);
 1249 
 1250         /*
 1251          * We must promote to an exclusive lock for vnode creation.  This
 1252          * can happen if lookup is passed LOCKSHARED.
 1253          */
 1254         if ((flags & LK_TYPE_MASK) == LK_SHARED) {
 1255                 flags &= ~LK_TYPE_MASK;
 1256                 flags |= LK_EXCLUSIVE;
 1257         }
 1258 
 1259         /*
 1260          * We do not lock vnode creation as it is believed to be too
 1261          * expensive for such rare case as simultaneous creation of vnode
 1262          * for same ino by different processes. We just allow them to race
 1263          * and check later to decide who wins. Let the race begin!
 1264          */
 1265 
 1266         ump = VFSTOUFS(mp);
 1267         dev = ump->um_dev;
 1268         fs = ump->um_fs;
 1269 
 1270         /*
 1271          * If this MALLOC() is performed after the getnewvnode()
 1272          * it might block, leaving a vnode with a NULL v_data to be
 1273          * found by ffs_sync() if a sync happens to fire right then,
 1274          * which will cause a panic because ffs_sync() blindly
 1275          * dereferences vp->v_data (as well it should).
 1276          */
 1277         ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
 1278 
 1279         /* Allocate a new vnode/inode. */
 1280         if (fs->fs_magic == FS_UFS1_MAGIC)
 1281                 error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp);
 1282         else
 1283                 error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp);
 1284         if (error) {
 1285                 *vpp = NULL;
 1286                 uma_zfree(uma_inode, ip);
 1287                 return (error);
 1288         }
 1289         /*
 1290          * FFS supports recursive and shared locking.
 1291          */
 1292         vp->v_vnlock->lk_flags |= LK_CANRECURSE;
 1293         vp->v_vnlock->lk_flags &= ~LK_NOSHARE;
 1294         vp->v_data = ip;
 1295         vp->v_bufobj.bo_bsize = fs->fs_bsize;
 1296         ip->i_vnode = vp;
 1297         ip->i_ump = ump;
 1298         ip->i_fs = fs;
 1299         ip->i_dev = dev;
 1300         ip->i_number = ino;
 1301 #ifdef QUOTA
 1302         {
 1303                 int i;
 1304                 for (i = 0; i < MAXQUOTAS; i++)
 1305                         ip->i_dquot[i] = NODQUOT;
 1306         }
 1307 #endif
 1308 
 1309         error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
 1310         if (error || *vpp != NULL)
 1311                 return (error);
 1312 
 1313         /* Read in the disk contents for the inode, copy into the inode. */
 1314         error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
 1315             (int)fs->fs_bsize, NOCRED, &bp);
 1316         if (error) {
 1317                 /*
 1318                  * The inode does not contain anything useful, so it would
 1319                  * be misleading to leave it on its hash chain. With mode
 1320                  * still zero, it will be unlinked and returned to the free
 1321                  * list by vput().
 1322                  */
 1323                 brelse(bp);
 1324                 vput(vp);
 1325                 *vpp = NULL;
 1326                 return (error);
 1327         }
 1328         if (ip->i_ump->um_fstype == UFS1)
 1329                 ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
 1330         else
 1331                 ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
 1332         ffs_load_inode(bp, ip, fs, ino);
 1333         if (DOINGSOFTDEP(vp))
 1334                 softdep_load_inodeblock(ip);
 1335         else
 1336                 ip->i_effnlink = ip->i_nlink;
 1337         bqrelse(bp);
 1338 
 1339         /*
 1340          * Initialize the vnode from the inode, check for aliases.
 1341          * Note that the underlying vnode may have changed.
 1342          */
 1343         if (ip->i_ump->um_fstype == UFS1)
 1344                 error = ufs_vinit(mp, &ffs_fifoops1, &vp);
 1345         else
 1346                 error = ufs_vinit(mp, &ffs_fifoops2, &vp);
 1347         if (error) {
 1348                 vput(vp);
 1349                 *vpp = NULL;
 1350                 return (error);
 1351         }
 1352 
 1353         /*
 1354          * Finish inode initialization.
 1355          */
 1356 
 1357         /*
 1358          * Set up a generation number for this inode if it does not
 1359          * already have one. This should only happen on old filesystems.
 1360          */
 1361         if (ip->i_gen == 0) {
 1362                 ip->i_gen = arc4random() / 2 + 1;
 1363                 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 1364                         ip->i_flag |= IN_MODIFIED;
 1365                         DIP_SET(ip, i_gen, ip->i_gen);
 1366                 }
 1367         }
 1368         /*
 1369          * Ensure that uid and gid are correct. This is a temporary
 1370          * fix until fsck has been changed to do the update.
 1371          */
 1372         if (fs->fs_magic == FS_UFS1_MAGIC &&            /* XXX */
 1373             fs->fs_old_inodefmt < FS_44INODEFMT) {      /* XXX */
 1374                 ip->i_uid = ip->i_din1->di_ouid;        /* XXX */
 1375                 ip->i_gid = ip->i_din1->di_ogid;        /* XXX */
 1376         }                                               /* XXX */
 1377 
 1378 #ifdef MAC
 1379         if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
 1380                 /*
 1381                  * If this vnode is already allocated, and we're running
 1382                  * multi-label, attempt to perform a label association
 1383                  * from the extended attributes on the inode.
 1384                  */
 1385                 error = mac_associate_vnode_extattr(mp, vp);
 1386                 if (error) {
 1387                         /* ufs_inactive will release ip->i_devvp ref. */
 1388                         vput(vp);
 1389                         *vpp = NULL;
 1390                         return (error);
 1391                 }
 1392         }
 1393 #endif
 1394 
 1395         *vpp = vp;
 1396         return (0);
 1397 }
 1398 
 1399 /*
 1400  * File handle to vnode
 1401  *
 1402  * Have to be really careful about stale file handles:
 1403  * - check that the inode number is valid
 1404  * - call ffs_vget() to get the locked inode
 1405  * - check for an unallocated inode (i_mode == 0)
 1406  * - check that the given client host has export rights and return
 1407  *   those rights via. exflagsp and credanonp
 1408  */
 1409 static int
 1410 ffs_fhtovp(mp, fhp, vpp)
 1411         struct mount *mp;
 1412         struct fid *fhp;
 1413         struct vnode **vpp;
 1414 {
 1415         struct ufid *ufhp;
 1416         struct fs *fs;
 1417 
 1418         ufhp = (struct ufid *)fhp;
 1419         fs = VFSTOUFS(mp)->um_fs;
 1420         if (ufhp->ufid_ino < ROOTINO ||
 1421             ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
 1422                 return (ESTALE);
 1423         return (ufs_fhtovp(mp, ufhp, vpp));
 1424 }
 1425 
 1426 /*
 1427  * Vnode pointer to File handle
 1428  */
 1429 /* ARGSUSED */
 1430 static int
 1431 ffs_vptofh(vp, fhp)
 1432         struct vnode *vp;
 1433         struct fid *fhp;
 1434 {
 1435         struct inode *ip;
 1436         struct ufid *ufhp;
 1437 
 1438         ip = VTOI(vp);
 1439         ufhp = (struct ufid *)fhp;
 1440         ufhp->ufid_len = sizeof(struct ufid);
 1441         ufhp->ufid_ino = ip->i_number;
 1442         ufhp->ufid_gen = ip->i_gen;
 1443         return (0);
 1444 }
 1445 
 1446 /*
 1447  * Initialize the filesystem.
 1448  */
 1449 static int
 1450 ffs_init(vfsp)
 1451         struct vfsconf *vfsp;
 1452 {
 1453 
 1454         softdep_initialize();
 1455         return (ufs_init(vfsp));
 1456 }
 1457 
 1458 /*
 1459  * Undo the work of ffs_init().
 1460  */
 1461 static int
 1462 ffs_uninit(vfsp)
 1463         struct vfsconf *vfsp;
 1464 {
 1465         int ret;
 1466 
 1467         ret = ufs_uninit(vfsp);
 1468         softdep_uninitialize();
 1469         return (ret);
 1470 }
 1471 
 1472 /*
 1473  * Write a superblock and associated information back to disk.
 1474  */
 1475 static int
 1476 ffs_sbupdate(mp, waitfor, suspended)
 1477         struct ufsmount *mp;
 1478         int waitfor;
 1479         int suspended;
 1480 {
 1481         struct fs *fs = mp->um_fs;
 1482         struct buf *sbbp;
 1483         struct buf *bp;
 1484         int blks;
 1485         void *space;
 1486         int i, size, error, allerror = 0;
 1487 
 1488         if (fs->fs_ronly == 1 &&
 1489             (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) != 
 1490             (MNT_RDONLY | MNT_UPDATE))
 1491                 panic("ffs_sbupdate: write read-only filesystem");
 1492         /*
 1493          * We use the superblock's buf to serialize calls to ffs_sbupdate().
 1494          */
 1495         sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
 1496             0, 0, 0);
 1497         /*
 1498          * First write back the summary information.
 1499          */
 1500         blks = howmany(fs->fs_cssize, fs->fs_fsize);
 1501         space = fs->fs_csp;
 1502         for (i = 0; i < blks; i += fs->fs_frag) {
 1503                 size = fs->fs_bsize;
 1504                 if (i + fs->fs_frag > blks)
 1505                         size = (blks - i) * fs->fs_fsize;
 1506                 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
 1507                     size, 0, 0, 0);
 1508                 bcopy(space, bp->b_data, (u_int)size);
 1509                 space = (char *)space + size;
 1510                 if (suspended)
 1511                         bp->b_flags |= B_VALIDSUSPWRT;
 1512                 if (waitfor != MNT_WAIT)
 1513                         bawrite(bp);
 1514                 else if ((error = bwrite(bp)) != 0)
 1515                         allerror = error;
 1516         }
 1517         /*
 1518          * Now write back the superblock itself. If any errors occurred
 1519          * up to this point, then fail so that the superblock avoids
 1520          * being written out as clean.
 1521          */
 1522         if (allerror) {
 1523                 brelse(sbbp);
 1524                 return (allerror);
 1525         }
 1526         bp = sbbp;
 1527         if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
 1528             (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
 1529                 printf("%s: correcting fs_sblockloc from %jd to %d\n",
 1530                     fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
 1531                 fs->fs_sblockloc = SBLOCK_UFS1;
 1532         }
 1533         if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
 1534             (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
 1535                 printf("%s: correcting fs_sblockloc from %jd to %d\n",
 1536                     fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
 1537                 fs->fs_sblockloc = SBLOCK_UFS2;
 1538         }
 1539         fs->fs_fmod = 0;
 1540         fs->fs_time = time_second;
 1541         bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
 1542         ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
 1543         if (suspended)
 1544                 bp->b_flags |= B_VALIDSUSPWRT;
 1545         if (waitfor != MNT_WAIT)
 1546                 bawrite(bp);
 1547         else if ((error = bwrite(bp)) != 0)
 1548                 allerror = error;
 1549         return (allerror);
 1550 }
 1551 
 1552 static int
 1553 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
 1554         int attrnamespace, const char *attrname, struct thread *td)
 1555 {
 1556 
 1557 #ifdef UFS_EXTATTR
 1558         return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
 1559             attrname, td));
 1560 #else
 1561         return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
 1562             attrname, td));
 1563 #endif
 1564 }
 1565 
 1566 static void
 1567 ffs_ifree(struct ufsmount *ump, struct inode *ip)
 1568 {
 1569 
 1570         if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
 1571                 uma_zfree(uma_ufs1, ip->i_din1);
 1572         else if (ip->i_din2 != NULL)
 1573                 uma_zfree(uma_ufs2, ip->i_din2);
 1574         uma_zfree(uma_inode, ip);
 1575 }
 1576 
 1577 static int dobkgrdwrite = 1;
 1578 SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
 1579     "Do background writes (honoring the BV_BKGRDWRITE flag)?");
 1580 
 1581 /*
 1582  * Complete a background write started from bwrite.
 1583  */
 1584 static void
 1585 ffs_backgroundwritedone(struct buf *bp)
 1586 {
 1587         struct bufobj *bufobj;
 1588         struct buf *origbp;
 1589 
 1590         /*
 1591          * Find the original buffer that we are writing.
 1592          */
 1593         bufobj = bp->b_bufobj;
 1594         BO_LOCK(bufobj);
 1595         if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
 1596                 panic("backgroundwritedone: lost buffer");
 1597         /* Grab an extra reference to be dropped by the bufdone() below. */
 1598         bufobj_wrefl(bufobj);
 1599         BO_UNLOCK(bufobj);
 1600         /*
 1601          * Process dependencies then return any unfinished ones.
 1602          */
 1603         if (!LIST_EMPTY(&bp->b_dep))
 1604                 buf_complete(bp);
 1605 #ifdef SOFTUPDATES
 1606         if (!LIST_EMPTY(&bp->b_dep))
 1607                 softdep_move_dependencies(bp, origbp);
 1608 #endif
 1609         /*
 1610          * This buffer is marked B_NOCACHE so when it is released
 1611          * by biodone it will be tossed.
 1612          */
 1613         bp->b_flags |= B_NOCACHE;
 1614         bp->b_flags &= ~B_CACHE;
 1615         bufdone(bp);
 1616         BO_LOCK(bufobj);
 1617         /*
 1618          * Clear the BV_BKGRDINPROG flag in the original buffer
 1619          * and awaken it if it is waiting for the write to complete.
 1620          * If BV_BKGRDINPROG is not set in the original buffer it must
 1621          * have been released and re-instantiated - which is not legal.
 1622          */
 1623         KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
 1624             ("backgroundwritedone: lost buffer2"));
 1625         origbp->b_vflags &= ~BV_BKGRDINPROG;
 1626         if (origbp->b_vflags & BV_BKGRDWAIT) {
 1627                 origbp->b_vflags &= ~BV_BKGRDWAIT;
 1628                 wakeup(&origbp->b_xflags);
 1629         }
 1630         BO_UNLOCK(bufobj);
 1631 }
 1632 
 1633 
 1634 /*
 1635  * Write, release buffer on completion.  (Done by iodone
 1636  * if async).  Do not bother writing anything if the buffer
 1637  * is invalid.
 1638  *
 1639  * Note that we set B_CACHE here, indicating that buffer is
 1640  * fully valid and thus cacheable.  This is true even of NFS
 1641  * now so we set it generally.  This could be set either here 
 1642  * or in biodone() since the I/O is synchronous.  We put it
 1643  * here.
 1644  */
 1645 static int
 1646 ffs_bufwrite(struct buf *bp)
 1647 {
 1648         int oldflags, s;
 1649         struct buf *newbp;
 1650 
 1651         CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
 1652         if (bp->b_flags & B_INVAL) {
 1653                 brelse(bp);
 1654                 return (0);
 1655         }
 1656 
 1657         oldflags = bp->b_flags;
 1658 
 1659         if (BUF_REFCNT(bp) == 0)
 1660                 panic("bufwrite: buffer is not busy???");
 1661         s = splbio();
 1662         /*
 1663          * If a background write is already in progress, delay
 1664          * writing this block if it is asynchronous. Otherwise
 1665          * wait for the background write to complete.
 1666          */
 1667         BO_LOCK(bp->b_bufobj);
 1668         if (bp->b_vflags & BV_BKGRDINPROG) {
 1669                 if (bp->b_flags & B_ASYNC) {
 1670                         BO_UNLOCK(bp->b_bufobj);
 1671                         splx(s);
 1672                         bdwrite(bp);
 1673                         return (0);
 1674                 }
 1675                 bp->b_vflags |= BV_BKGRDWAIT;
 1676                 msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0);
 1677                 if (bp->b_vflags & BV_BKGRDINPROG)
 1678                         panic("bufwrite: still writing");
 1679         }
 1680         BO_UNLOCK(bp->b_bufobj);
 1681 
 1682         /* Mark the buffer clean */
 1683         bundirty(bp);
 1684 
 1685         /*
 1686          * If this buffer is marked for background writing and we
 1687          * do not have to wait for it, make a copy and write the
 1688          * copy so as to leave this buffer ready for further use.
 1689          *
 1690          * This optimization eats a lot of memory.  If we have a page
 1691          * or buffer shortfall we can't do it.
 1692          */
 1693         if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) && 
 1694             (bp->b_flags & B_ASYNC) &&
 1695             !vm_page_count_severe() &&
 1696             !buf_dirty_count_severe()) {
 1697                 KASSERT(bp->b_iodone == NULL,
 1698                     ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
 1699 
 1700                 /* get a new block */
 1701                 newbp = geteblk(bp->b_bufsize);
 1702 
 1703                 /*
 1704                  * set it to be identical to the old block.  We have to
 1705                  * set b_lblkno and BKGRDMARKER before calling bgetvp()
 1706                  * to avoid confusing the splay tree and gbincore().
 1707                  */
 1708                 memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
 1709                 newbp->b_lblkno = bp->b_lblkno;
 1710                 newbp->b_xflags |= BX_BKGRDMARKER;
 1711                 BO_LOCK(bp->b_bufobj);
 1712                 bp->b_vflags |= BV_BKGRDINPROG;
 1713                 bgetvp(bp->b_vp, newbp);
 1714                 BO_UNLOCK(bp->b_bufobj);
 1715                 newbp->b_bufobj = &bp->b_vp->v_bufobj;
 1716                 newbp->b_blkno = bp->b_blkno;
 1717                 newbp->b_offset = bp->b_offset;
 1718                 newbp->b_iodone = ffs_backgroundwritedone;
 1719                 newbp->b_flags |= B_ASYNC;
 1720                 newbp->b_flags &= ~B_INVAL;
 1721 
 1722 #ifdef SOFTUPDATES
 1723                 /* move over the dependencies */
 1724                 if (!LIST_EMPTY(&bp->b_dep))
 1725                         softdep_move_dependencies(bp, newbp);
 1726 #endif 
 1727 
 1728                 /*
 1729                  * Initiate write on the copy, release the original to
 1730                  * the B_LOCKED queue so that it cannot go away until
 1731                  * the background write completes. If not locked it could go
 1732                  * away and then be reconstituted while it was being written.
 1733                  * If the reconstituted buffer were written, we could end up
 1734                  * with two background copies being written at the same time.
 1735                  */
 1736                 bqrelse(bp);
 1737                 bp = newbp;
 1738         }
 1739 
 1740         /* Let the normal bufwrite do the rest for us */
 1741         return (bufwrite(bp));
 1742 }
 1743 
 1744 
 1745 static void
 1746 ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
 1747 {
 1748         struct vnode *vp;
 1749         int error;
 1750         struct buf *tbp;
 1751 
 1752         vp = bo->__bo_vnode;
 1753         if (bp->b_iocmd == BIO_WRITE) {
 1754                 if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
 1755                     bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
 1756                     (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
 1757                         panic("ffs_geom_strategy: bad I/O");
 1758                 bp->b_flags &= ~B_VALIDSUSPWRT;
 1759                 if ((vp->v_vflag & VV_COPYONWRITE) &&
 1760                     vp->v_rdev->si_snapdata != NULL) {
 1761                         if ((bp->b_flags & B_CLUSTER) != 0) {
 1762                                 runningbufwakeup(bp);
 1763                                 TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
 1764                                               b_cluster.cluster_entry) {
 1765                                         error = ffs_copyonwrite(vp, tbp);
 1766                                         if (error != 0 &&
 1767                                             error != EOPNOTSUPP) {
 1768                                                 bp->b_error = error;
 1769                                                 bp->b_ioflags |= BIO_ERROR;
 1770                                                 bufdone(bp);
 1771                                                 return;
 1772                                         }
 1773                                 }
 1774                                 bp->b_runningbufspace = bp->b_bufsize;
 1775                                 atomic_add_int(&runningbufspace,
 1776                                                bp->b_runningbufspace);
 1777                         } else {
 1778                                 error = ffs_copyonwrite(vp, bp);
 1779                                 if (error != 0 && error != EOPNOTSUPP) {
 1780                                         bp->b_error = error;
 1781                                         bp->b_ioflags |= BIO_ERROR;
 1782                                         bufdone(bp);
 1783                                         return;
 1784                                 }
 1785                         }
 1786                 }
 1787 #ifdef SOFTUPDATES
 1788                 if ((bp->b_flags & B_CLUSTER) != 0) {
 1789                         TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
 1790                                       b_cluster.cluster_entry) {
 1791                                 if (!LIST_EMPTY(&tbp->b_dep))
 1792                                         buf_start(tbp);
 1793                         }
 1794                 } else {
 1795                         if (!LIST_EMPTY(&bp->b_dep))
 1796                                 buf_start(bp);
 1797                 }
 1798 
 1799 #endif
 1800         }
 1801         g_vfs_strategy(bo, bp);
 1802 }
Cache object: d592d99e255ad65188c5154b5ace7a5a
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/ufs/ffs/ffs_vfsops.c

FreeBSD/Linux Kernel Cross Reference
sys/ufs/ffs/ffs_vfsops.c