ffs_snapshot.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*
    2  * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved.
    3  *
    4  * Further information about snapshots can be obtained from:
    5  *
    6  *      Marshall Kirk McKusick          http://www.mckusick.com/softdep/
    7  *      1614 Oxford Street              mckusick@mckusick.com
    8  *      Berkeley, CA 94709-1608         +1-510-843-9542
    9  *      USA
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  *
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
   22  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
   23  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   24  * DISCLAIMED.  IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
   25  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      @(#)ffs_snapshot.c      8.11 (McKusick) 7/23/00
   34  */
   35 
   36 #include <sys/cdefs.h>
   37 __FBSDID("$FreeBSD: releng/5.2/sys/ufs/ffs/ffs_snapshot.c 122596 2003-11-13 03:56:32Z alc $");
   38 
   39 #include <sys/param.h>
   40 #include <sys/kernel.h>
   41 #include <sys/systm.h>
   42 #include <sys/conf.h>
   43 #include <sys/bio.h>
   44 #include <sys/buf.h>
   45 #include <sys/proc.h>
   46 #include <sys/namei.h>
   47 #include <sys/sched.h>
   48 #include <sys/stat.h>
   49 #include <sys/malloc.h>
   50 #include <sys/mount.h>
   51 #include <sys/resource.h>
   52 #include <sys/resourcevar.h>
   53 #include <sys/vnode.h>
   54 
   55 #include <ufs/ufs/extattr.h>
   56 #include <ufs/ufs/quota.h>
   57 #include <ufs/ufs/ufsmount.h>
   58 #include <ufs/ufs/inode.h>
   59 #include <ufs/ufs/ufs_extern.h>
   60 
   61 #include <ufs/ffs/fs.h>
   62 #include <ufs/ffs/ffs_extern.h>
   63 
   64 #define KERNCRED thread0.td_ucred
   65 #define DEBUG 1
   66 
   67 static int cgaccount(int, struct vnode *, struct buf *, int);
   68 static int expunge_ufs1(struct vnode *, struct inode *, struct fs *,
   69     int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
   70     ufs_lbn_t, int), int);
   71 static int indiracct_ufs1(struct vnode *, struct vnode *, int,
   72     ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
   73     int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
   74     ufs_lbn_t, int), int);
   75 static int fullacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
   76     struct fs *, ufs_lbn_t, int);
   77 static int snapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
   78     struct fs *, ufs_lbn_t, int);
   79 static int mapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
   80     struct fs *, ufs_lbn_t, int);
   81 static int expunge_ufs2(struct vnode *, struct inode *, struct fs *,
   82     int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
   83     ufs_lbn_t, int), int);
   84 static int indiracct_ufs2(struct vnode *, struct vnode *, int,
   85     ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
   86     int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
   87     ufs_lbn_t, int), int);
   88 static int fullacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
   89     struct fs *, ufs_lbn_t, int);
   90 static int snapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
   91     struct fs *, ufs_lbn_t, int);
   92 static int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
   93     struct fs *, ufs_lbn_t, int);
   94 static int ffs_copyonwrite(struct vnode *, struct buf *);
   95 static int readblock(struct buf *, ufs2_daddr_t);
   96 
   97 /*
   98  * To ensure the consistency of snapshots across crashes, we must
   99  * synchronously write out copied blocks before allowing the
  100  * originals to be modified. Because of the rather severe speed
  101  * penalty that this imposes, the following flag allows this
  102  * crash persistence to be disabled.
  103  */
  104 int dopersistence = 0;
  105 
  106 #ifdef DEBUG
  107 #include <sys/sysctl.h>
  108 SYSCTL_INT(_debug, OID_AUTO, dopersistence, CTLFLAG_RW, &dopersistence, 0, "");
  109 static int snapdebug = 0;
  110 SYSCTL_INT(_debug, OID_AUTO, snapdebug, CTLFLAG_RW, &snapdebug, 0, "");
  111 int collectsnapstats = 0;
  112 SYSCTL_INT(_debug, OID_AUTO, collectsnapstats, CTLFLAG_RW, &collectsnapstats,
  113         0, "");
  114 #endif /* DEBUG */
  115 
  116 /*
  117  * Create a snapshot file and initialize it for the filesystem.
  118  */
  119 int
  120 ffs_snapshot(mp, snapfile)
  121         struct mount *mp;
  122         char *snapfile;
  123 {
  124         ufs2_daddr_t numblks, blkno, *blkp, *snapblklist;
  125         int error, cg, snaploc;
  126         int i, size, len, loc;
  127         int flag = mp->mnt_flag;
  128         struct timespec starttime = {0, 0}, endtime;
  129         char saved_nice = 0;
  130         long redo = 0, snaplistsize = 0;
  131         int32_t *lp;
  132         void *space;
  133         struct fs *copy_fs = NULL, *fs = VFSTOUFS(mp)->um_fs;
  134         struct snaphead *snaphead;
  135         struct thread *td = curthread;
  136         struct inode *ip, *xp;
  137         struct buf *bp, *nbp, *ibp, *sbp = NULL;
  138         struct nameidata nd;
  139         struct mount *wrtmp;
  140         struct vattr vat;
  141         struct vnode *vp, *xvp, *nvp, *devvp;
  142         struct uio auio;
  143         struct iovec aiov;
  144 
  145         /*
  146          * Need to serialize access to snapshot code per filesystem.
  147          */
  148         /*
  149          * Assign a snapshot slot in the superblock.
  150          */
  151         for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
  152                 if (fs->fs_snapinum[snaploc] == 0)
  153                         break;
  154         if (snaploc == FSMAXSNAP)
  155                 return (ENOSPC);
  156         /*
  157          * Create the snapshot file.
  158          */
  159 restart:
  160         NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, snapfile, td);
  161         if ((error = namei(&nd)) != 0)
  162                 return (error);
  163         if (nd.ni_vp != NULL) {
  164                 vput(nd.ni_vp);
  165                 error = EEXIST;
  166         }
  167         if (nd.ni_dvp->v_mount != mp)
  168                 error = EXDEV;
  169         if (error) {
  170                 NDFREE(&nd, NDF_ONLY_PNBUF);
  171                 if (nd.ni_dvp == nd.ni_vp)
  172                         vrele(nd.ni_dvp);
  173                 else
  174                         vput(nd.ni_dvp);
  175                 return (error);
  176         }
  177         VATTR_NULL(&vat);
  178         vat.va_type = VREG;
  179         vat.va_mode = S_IRUSR;
  180         vat.va_vaflags |= VA_EXCLUSIVE;
  181         if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp))
  182                 wrtmp = NULL;
  183         if (wrtmp != mp)
  184                 panic("ffs_snapshot: mount mismatch");
  185         if (vn_start_write(NULL, &wrtmp, V_NOWAIT) != 0) {
  186                 NDFREE(&nd, NDF_ONLY_PNBUF);
  187                 vput(nd.ni_dvp);
  188                 if ((error = vn_start_write(NULL, &wrtmp,
  189                     V_XSLEEP | PCATCH)) != 0)
  190                         return (error);
  191                 goto restart;
  192         }
  193         VOP_LEASE(nd.ni_dvp, td, KERNCRED, LEASE_WRITE);
  194         error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vat);
  195         vput(nd.ni_dvp);
  196         if (error) {
  197                 NDFREE(&nd, NDF_ONLY_PNBUF);
  198                 vn_finished_write(wrtmp);
  199                 return (error);
  200         }
  201         vp = nd.ni_vp;
  202         ip = VTOI(vp);
  203         devvp = ip->i_devvp;
  204         /*
  205          * Allocate and copy the last block contents so as to be able
  206          * to set size to that of the filesystem.
  207          */
  208         numblks = howmany(fs->fs_size, fs->fs_frag);
  209         error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)),
  210             fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp);
  211         if (error)
  212                 goto out;
  213         ip->i_size = lblktosize(fs, (off_t)numblks);
  214         DIP(ip, i_size) = ip->i_size;
  215         ip->i_flag |= IN_CHANGE | IN_UPDATE;
  216         if ((error = readblock(bp, numblks - 1)) != 0)
  217                 goto out;
  218         bawrite(bp);
  219         /*
  220          * Preallocate critical data structures so that we can copy
  221          * them in without further allocation after we suspend all
  222          * operations on the filesystem. We would like to just release
  223          * the allocated buffers without writing them since they will
  224          * be filled in below once we are ready to go, but this upsets
  225          * the soft update code, so we go ahead and write the new buffers.
  226          *
  227          * Allocate all indirect blocks and mark all of them as not
  228          * needing to be copied.
  229          */
  230         for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
  231                 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno),
  232                     fs->fs_bsize, td->td_ucred, BA_METAONLY, &ibp);
  233                 if (error)
  234                         goto out;
  235                 bawrite(ibp);
  236         }
  237         /*
  238          * Allocate copies for the superblock and its summary information.
  239          */
  240         error = UFS_BALLOC(vp, fs->fs_sblockloc, fs->fs_sbsize, KERNCRED,
  241             0, &nbp);
  242         if (error)
  243                 goto out;
  244         bawrite(nbp);
  245         blkno = fragstoblks(fs, fs->fs_csaddr);
  246         len = howmany(fs->fs_cssize, fs->fs_bsize);
  247         for (loc = 0; loc < len; loc++) {
  248                 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)),
  249                     fs->fs_bsize, KERNCRED, 0, &nbp);
  250                 if (error)
  251                         goto out;
  252                 bawrite(nbp);
  253         }
  254         /*
  255          * Allocate all cylinder group blocks.
  256          */
  257         for (cg = 0; cg < fs->fs_ncg; cg++) {
  258                 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)),
  259                     fs->fs_bsize, KERNCRED, 0, &nbp);
  260                 if (error)
  261                         goto out;
  262                 bawrite(nbp);
  263         }
  264         /*
  265          * Copy all the cylinder group maps. Although the
  266          * filesystem is still active, we hope that only a few
  267          * cylinder groups will change between now and when we
  268          * suspend operations. Thus, we will be able to quickly
  269          * touch up the few cylinder groups that changed during
  270          * the suspension period.
  271          */
  272         len = howmany(fs->fs_ncg, NBBY);
  273         MALLOC(fs->fs_active, int *, len, M_DEVBUF, M_WAITOK);
  274         bzero(fs->fs_active, len);
  275         for (cg = 0; cg < fs->fs_ncg; cg++) {
  276                 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)),
  277                     fs->fs_bsize, KERNCRED, 0, &nbp);
  278                 if (error)
  279                         goto out;
  280                 error = cgaccount(cg, vp, nbp, 1);
  281                 bawrite(nbp);
  282                 if (error)
  283                         goto out;
  284         }
  285         /*
  286          * Change inode to snapshot type file.
  287          */
  288         ip->i_flags |= SF_SNAPSHOT;
  289         DIP(ip, i_flags) = ip->i_flags;
  290         ip->i_flag |= IN_CHANGE | IN_UPDATE;
  291         /*
  292          * Ensure that the snapshot is completely on disk.
  293          * Since we have marked it as a snapshot it is safe to
  294          * unlock it as no process will be allowed to write to it.
  295          */
  296         if ((error = VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td)) != 0)
  297                 goto out;
  298         VOP_UNLOCK(vp, 0, td);
  299         /*
  300          * All allocations are done, so we can now snapshot the system.
  301          *
  302          * Recind nice scheduling while running with the filesystem suspended.
  303          */
  304         if (td->td_ksegrp->kg_nice > 0) {
  305                 PROC_LOCK(td->td_proc);
  306                 mtx_lock_spin(&sched_lock);
  307                 saved_nice = td->td_ksegrp->kg_nice;
  308                 sched_nice(td->td_ksegrp, 0);
  309                 mtx_unlock_spin(&sched_lock);
  310                 PROC_UNLOCK(td->td_proc);
  311         }
  312         /*
  313          * Suspend operation on filesystem.
  314          */
  315         for (;;) {
  316                 vn_finished_write(wrtmp);
  317                 if ((error = vfs_write_suspend(vp->v_mount)) != 0) {
  318                         vn_start_write(NULL, &wrtmp, V_WAIT);
  319                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  320                         goto out;
  321                 }
  322                 if (mp->mnt_kern_flag & MNTK_SUSPENDED)
  323                         break;
  324                 vn_start_write(NULL, &wrtmp, V_WAIT);
  325         }
  326         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  327         if (collectsnapstats)
  328                 nanotime(&starttime);
  329         /*
  330          * First, copy all the cylinder group maps that have changed.
  331          */
  332         for (cg = 0; cg < fs->fs_ncg; cg++) {
  333                 if ((ACTIVECGNUM(fs, cg) & ACTIVECGOFF(cg)) != 0)
  334                         continue;
  335                 redo++;
  336                 error = UFS_BALLOC(vp, lfragtosize(fs, cgtod(fs, cg)),
  337                     fs->fs_bsize, KERNCRED, 0, &nbp);
  338                 if (error)
  339                         goto out1;
  340                 error = cgaccount(cg, vp, nbp, 2);
  341                 bawrite(nbp);
  342                 if (error)
  343                         goto out1;
  344         }
  345         /*
  346          * Grab a copy of the superblock and its summary information.
  347          * We delay writing it until the suspension is released below.
  348          */
  349         error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize,
  350             KERNCRED, &sbp);
  351         if (error) {
  352                 brelse(sbp);
  353                 sbp = NULL;
  354                 goto out1;
  355         }
  356         loc = blkoff(fs, fs->fs_sblockloc);
  357         copy_fs = (struct fs *)(sbp->b_data + loc);
  358         bcopy(fs, copy_fs, fs->fs_sbsize);
  359         if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
  360                 copy_fs->fs_clean = 1;
  361         size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE;
  362         if (fs->fs_sbsize < size)
  363                 bzero(&sbp->b_data[loc + fs->fs_sbsize], size - fs->fs_sbsize);
  364         size = blkroundup(fs, fs->fs_cssize);
  365         if (fs->fs_contigsumsize > 0)
  366                 size += fs->fs_ncg * sizeof(int32_t);
  367         space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
  368         copy_fs->fs_csp = space;
  369         bcopy(fs->fs_csp, copy_fs->fs_csp, fs->fs_cssize);
  370         (char *)space += fs->fs_cssize;
  371         loc = howmany(fs->fs_cssize, fs->fs_fsize);
  372         i = fs->fs_frag - loc % fs->fs_frag;
  373         len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize;
  374         if (len > 0) {
  375                 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + loc),
  376                     len, KERNCRED, &bp)) != 0) {
  377                         brelse(bp);
  378                         free(copy_fs->fs_csp, M_UFSMNT);
  379                         bawrite(sbp);
  380                         sbp = NULL;
  381                         goto out1;
  382                 }
  383                 bcopy(bp->b_data, space, (u_int)len);
  384                 (char *)space += len;
  385                 bp->b_flags |= B_INVAL | B_NOCACHE;
  386                 brelse(bp);
  387         }
  388         if (fs->fs_contigsumsize > 0) {
  389                 copy_fs->fs_maxcluster = lp = space;
  390                 for (i = 0; i < fs->fs_ncg; i++)
  391                         *lp++ = fs->fs_contigsumsize;
  392         }
  393         /*
  394          * We must check for active files that have been unlinked
  395          * (e.g., with a zero link count). We have to expunge all
  396          * trace of these files from the snapshot so that they are
  397          * not reclaimed prematurely by fsck or unnecessarily dumped.
  398          * We turn off the MNTK_SUSPENDED flag to avoid a panic from
  399          * spec_strategy about writing on a suspended filesystem.
  400          * Note that we skip unlinked snapshot files as they will
  401          * be handled separately below.
  402          *
  403          * We also calculate the needed size for the snapshot list.
  404          */
  405         snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) +
  406             FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */;
  407         mp->mnt_kern_flag &= ~MNTK_SUSPENDED;
  408         MNT_ILOCK(mp);
  409 loop:
  410         for (xvp = TAILQ_FIRST(&mp->mnt_nvnodelist); xvp; xvp = nvp) {
  411                 /*
  412                  * Make sure this vnode wasn't reclaimed in getnewvnode().
  413                  * Start over if it has (it won't be on the list anymore).
  414                  */
  415                 if (xvp->v_mount != mp)
  416                         goto loop;
  417                 nvp = TAILQ_NEXT(xvp, v_nmntvnodes);
  418                 VI_LOCK(xvp);
  419                 MNT_IUNLOCK(mp);
  420                 if ((xvp->v_iflag & VI_XLOCK) ||
  421                     xvp->v_usecount == 0 || xvp->v_type == VNON ||
  422                     (VTOI(xvp)->i_flags & SF_SNAPSHOT)) {
  423                         VI_UNLOCK(xvp);
  424                         MNT_ILOCK(mp);
  425                         continue;
  426                 }
  427                 if (snapdebug)
  428                         vprint("ffs_snapshot: busy vnode", xvp);
  429                 if (vn_lock(xvp, LK_EXCLUSIVE | LK_INTERLOCK, td) != 0) {
  430                         MNT_ILOCK(mp);
  431                         goto loop;
  432                 }
  433                 if (VOP_GETATTR(xvp, &vat, td->td_ucred, td) == 0 &&
  434                     vat.va_nlink > 0) {
  435                         VOP_UNLOCK(xvp, 0, td);
  436                         MNT_ILOCK(mp);
  437                         continue;
  438                 }
  439                 xp = VTOI(xvp);
  440                 if (ffs_checkfreefile(copy_fs, vp, xp->i_number)) {
  441                         VOP_UNLOCK(xvp, 0, td);
  442                         MNT_ILOCK(mp);
  443                         continue;
  444                 }
  445                 /*
  446                  * If there is a fragment, clear it here.
  447                  */
  448                 blkno = 0;
  449                 loc = howmany(xp->i_size, fs->fs_bsize) - 1;
  450                 if (loc < NDADDR) {
  451                         len = fragroundup(fs, blkoff(fs, xp->i_size));
  452                         if (len < fs->fs_bsize) {
  453                                 ffs_blkfree(copy_fs, vp, DIP(xp, i_db[loc]),
  454                                     len, xp->i_number);
  455                                 blkno = DIP(xp, i_db[loc]);
  456                                 DIP(xp, i_db[loc]) = 0;
  457                         }
  458                 }
  459                 snaplistsize += 1;
  460                 if (xp->i_ump->um_fstype == UFS1)
  461                         error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
  462                             BLK_NOCOPY);
  463                 else
  464                         error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2,
  465                             BLK_NOCOPY);
  466                 if (blkno)
  467                         DIP(xp, i_db[loc]) = blkno;
  468                 if (!error)
  469                         error = ffs_freefile(copy_fs, vp, xp->i_number,
  470                             xp->i_mode);
  471                 VOP_UNLOCK(xvp, 0, td);
  472                 if (error) {
  473                         free(copy_fs->fs_csp, M_UFSMNT);
  474                         bawrite(sbp);
  475                         sbp = NULL;
  476                         goto out1;
  477                 }
  478                 MNT_ILOCK(mp);
  479         }
  480         MNT_IUNLOCK(mp);
  481         /*
  482          * If there already exist snapshots on this filesystem, grab a
  483          * reference to their shared lock. If this is the first snapshot
  484          * on this filesystem, we need to allocate a lock for the snapshots
  485          * to share. In either case, acquire the snapshot lock and give
  486          * up our original private lock.
  487          */
  488         VI_LOCK(devvp);
  489         snaphead = &devvp->v_rdev->si_snapshots;
  490         if ((xp = TAILQ_FIRST(snaphead)) != NULL) {
  491                 VI_LOCK(vp);
  492                 vp->v_vnlock = ITOV(xp)->v_vnlock;
  493                 VI_UNLOCK(devvp);
  494         } else {
  495                 struct lock *lkp;
  496 
  497                 VI_UNLOCK(devvp);
  498                 MALLOC(lkp, struct lock *, sizeof(struct lock), M_UFSMNT,
  499                     M_WAITOK);
  500                 lockinit(lkp, PVFS, "snaplk", VLKTIMEOUT,
  501                     LK_CANRECURSE | LK_NOPAUSE);
  502                 VI_LOCK(vp);
  503                 vp->v_vnlock = lkp;
  504         }
  505         vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY, td);
  506         transferlockers(&vp->v_lock, vp->v_vnlock);
  507         lockmgr(&vp->v_lock, LK_RELEASE, NULL, td);
  508         /*
  509          * If this is the first snapshot on this filesystem, then we need
  510          * to allocate the space for the list of preallocated snapshot blocks.
  511          * This list will be refined below, but this preliminary one will
  512          * keep us out of deadlock until the full one is ready.
  513          */
  514         if (xp == NULL) {
  515                 MALLOC(snapblklist, daddr_t *, snaplistsize * sizeof(daddr_t),
  516                     M_UFSMNT, M_WAITOK);
  517                 blkp = &snapblklist[1];
  518                 *blkp++ = lblkno(fs, fs->fs_sblockloc);
  519                 blkno = fragstoblks(fs, fs->fs_csaddr);
  520                 for (cg = 0; cg < fs->fs_ncg; cg++) {
  521                         if (fragstoblks(fs, cgtod(fs, cg) > blkno))
  522                                 break;
  523                         *blkp++ = fragstoblks(fs, cgtod(fs, cg));
  524                 }
  525                 len = howmany(fs->fs_cssize, fs->fs_bsize);
  526                 for (loc = 0; loc < len; loc++)
  527                         *blkp++ = blkno + loc;
  528                 for (; cg < fs->fs_ncg; cg++)
  529                         *blkp++ = fragstoblks(fs, cgtod(fs, cg));
  530                 snapblklist[0] = blkp - snapblklist;
  531                 VI_LOCK(devvp);
  532                 if (devvp->v_rdev->si_snapblklist != NULL)
  533                         panic("ffs_snapshot: non-empty list");
  534                 devvp->v_rdev->si_snapblklist = snapblklist;
  535                 devvp->v_rdev->si_snaplistsize = blkp - snapblklist;
  536                 VI_UNLOCK(devvp);
  537         }
  538         /*
  539          * Record snapshot inode. Since this is the newest snapshot,
  540          * it must be placed at the end of the list.
  541          */
  542         VI_LOCK(devvp);
  543         fs->fs_snapinum[snaploc] = ip->i_number;
  544         if (ip->i_nextsnap.tqe_prev != 0)
  545                 panic("ffs_snapshot: %d already on list", ip->i_number);
  546         TAILQ_INSERT_TAIL(snaphead, ip, i_nextsnap);
  547         devvp->v_rdev->si_copyonwrite = ffs_copyonwrite;
  548         devvp->v_vflag |= VV_COPYONWRITE;
  549         VI_UNLOCK(devvp);
  550         ASSERT_VOP_LOCKED(vp, "ffs_snapshot vp");
  551         vp->v_vflag |= VV_SYSTEM;
  552 out1:
  553         /*
  554          * Resume operation on filesystem.
  555          */
  556         vfs_write_resume(vp->v_mount);
  557         vn_start_write(NULL, &wrtmp, V_WAIT);
  558         if (collectsnapstats && starttime.tv_sec > 0) {
  559                 nanotime(&endtime);
  560                 timespecsub(&endtime, &starttime);
  561                 printf("%s: suspended %ld.%03ld sec, redo %ld of %d\n",
  562                     vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec,
  563                     endtime.tv_nsec / 1000000, redo, fs->fs_ncg);
  564         }
  565         if (sbp == NULL)
  566                 goto out;
  567         /*
  568          * Copy allocation information from all the snapshots in
  569          * this snapshot and then expunge them from its view.
  570          */
  571         snaphead = &devvp->v_rdev->si_snapshots;
  572         TAILQ_FOREACH(xp, snaphead, i_nextsnap) {
  573                 if (xp == ip)
  574                         break;
  575                 if (xp->i_ump->um_fstype == UFS1)
  576                         error = expunge_ufs1(vp, xp, fs, snapacct_ufs1,
  577                             BLK_SNAP);
  578                 else
  579                         error = expunge_ufs2(vp, xp, fs, snapacct_ufs2,
  580                             BLK_SNAP);
  581                 if (error) {
  582                         fs->fs_snapinum[snaploc] = 0;
  583                         goto done;
  584                 }
  585         }
  586         /*
  587          * Allocate space for the full list of preallocated snapshot blocks.
  588          */
  589         MALLOC(snapblklist, daddr_t *, snaplistsize * sizeof(daddr_t),
  590             M_UFSMNT, M_WAITOK);
  591         ip->i_snapblklist = &snapblklist[1];
  592         /*
  593          * Expunge the blocks used by the snapshots from the set of
  594          * blocks marked as used in the snapshot bitmaps. Also, collect
  595          * the list of allocated blocks in i_snapblklist.
  596          */
  597         if (ip->i_ump->um_fstype == UFS1)
  598                 error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP);
  599         else
  600                 error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP);
  601         if (error) {
  602                 fs->fs_snapinum[snaploc] = 0;
  603                 FREE(snapblklist, M_UFSMNT);
  604                 goto done;
  605         }
  606         if (snaplistsize < ip->i_snapblklist - snapblklist)
  607                 panic("ffs_snapshot: list too small");
  608         snaplistsize = ip->i_snapblklist - snapblklist;
  609         snapblklist[0] = snaplistsize;
  610         ip->i_snapblklist = 0;
  611         /*
  612          * Write out the list of allocated blocks to the end of the snapshot.
  613          */
  614         auio.uio_iov = &aiov;
  615         auio.uio_iovcnt = 1;
  616         aiov.iov_base = (void *)snapblklist;
  617         aiov.iov_len = snaplistsize * sizeof(daddr_t);
  618         auio.uio_resid = aiov.iov_len;;
  619         auio.uio_offset = ip->i_size;
  620         auio.uio_segflg = UIO_SYSSPACE;
  621         auio.uio_rw = UIO_WRITE;
  622         auio.uio_td = td;
  623         if ((error = VOP_WRITE(vp, &auio, IO_UNIT, td->td_ucred)) != 0) {
  624                 fs->fs_snapinum[snaploc] = 0;
  625                 FREE(snapblklist, M_UFSMNT);
  626                 goto done;
  627         }
  628         /*
  629          * Write the superblock and its summary information
  630          * to the snapshot.
  631          */
  632         blkno = fragstoblks(fs, fs->fs_csaddr);
  633         len = howmany(fs->fs_cssize, fs->fs_bsize);
  634         space = copy_fs->fs_csp;
  635         for (loc = 0; loc < len; loc++) {
  636                 error = bread(vp, blkno + loc, fs->fs_bsize, KERNCRED, &nbp);
  637                 if (error) {
  638                         brelse(nbp);
  639                         fs->fs_snapinum[snaploc] = 0;
  640                         FREE(snapblklist, M_UFSMNT);
  641                         goto done;
  642                 }
  643                 bcopy(space, nbp->b_data, fs->fs_bsize);
  644                 space = (char *)space + fs->fs_bsize;
  645                 bawrite(nbp);
  646         }
  647         /*
  648          * As this is the newest list, it is the most inclusive, so
  649          * should replace the previous list.
  650          */
  651         VI_LOCK(devvp);
  652         space = devvp->v_rdev->si_snapblklist;
  653         devvp->v_rdev->si_snapblklist = snapblklist;
  654         devvp->v_rdev->si_snaplistsize = snaplistsize;
  655         VI_UNLOCK(devvp);
  656         if (space != NULL)
  657                 FREE(space, M_UFSMNT);
  658 done:
  659         free(copy_fs->fs_csp, M_UFSMNT);
  660         bawrite(sbp);
  661 out:
  662         if (saved_nice > 0) {
  663                 PROC_LOCK(td->td_proc);
  664                 mtx_lock_spin(&sched_lock);
  665                 sched_nice(td->td_ksegrp, saved_nice);
  666                 mtx_unlock_spin(&sched_lock);
  667                 PROC_UNLOCK(td->td_proc);
  668         }
  669         if (fs->fs_active != 0) {
  670                 FREE(fs->fs_active, M_DEVBUF);
  671                 fs->fs_active = 0;
  672         }
  673         mp->mnt_flag = flag;
  674         if (error)
  675                 (void) UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED, td);
  676         (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td);
  677         if (error)
  678                 vput(vp);
  679         else
  680                 VOP_UNLOCK(vp, 0, td);
  681         vn_finished_write(wrtmp);
  682         return (error);
  683 }
  684 
  685 /*
  686  * Copy a cylinder group map. All the unallocated blocks are marked
  687  * BLK_NOCOPY so that the snapshot knows that it need not copy them
  688  * if they are later written. If passno is one, then this is a first
  689  * pass, so only setting needs to be done. If passno is 2, then this
  690  * is a revision to a previous pass which must be undone as the
  691  * replacement pass is done.
  692  */
  693 static int
  694 cgaccount(cg, vp, nbp, passno)
  695         int cg;
  696         struct vnode *vp;
  697         struct buf *nbp;
  698         int passno;
  699 {
  700         struct buf *bp, *ibp;
  701         struct inode *ip;
  702         struct cg *cgp;
  703         struct fs *fs;
  704         ufs2_daddr_t base, numblks;
  705         int error, len, loc, indiroff;
  706 
  707         ip = VTOI(vp);
  708         fs = ip->i_fs;
  709         error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
  710                 (int)fs->fs_cgsize, KERNCRED, &bp);
  711         if (error) {
  712                 brelse(bp);
  713                 return (error);
  714         }
  715         cgp = (struct cg *)bp->b_data;
  716         if (!cg_chkmagic(cgp)) {
  717                 brelse(bp);
  718                 return (EIO);
  719         }
  720         atomic_set_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg));
  721         bcopy(bp->b_data, nbp->b_data, fs->fs_cgsize);
  722         if (fs->fs_cgsize < fs->fs_bsize)
  723                 bzero(&nbp->b_data[fs->fs_cgsize],
  724                     fs->fs_bsize - fs->fs_cgsize);
  725         if (passno == 2)
  726                 nbp->b_flags |= B_VALIDSUSPWRT;
  727         numblks = howmany(fs->fs_size, fs->fs_frag);
  728         len = howmany(fs->fs_fpg, fs->fs_frag);
  729         base = cg * fs->fs_fpg / fs->fs_frag;
  730         if (base + len >= numblks)
  731                 len = numblks - base - 1;
  732         loc = 0;
  733         if (base < NDADDR) {
  734                 for ( ; loc < NDADDR; loc++) {
  735                         if (ffs_isblock(fs, cg_blksfree(cgp), loc))
  736                                 DIP(ip, i_db[loc]) = BLK_NOCOPY;
  737                         else if (passno == 2 && DIP(ip, i_db[loc])== BLK_NOCOPY)
  738                                 DIP(ip, i_db[loc]) = 0;
  739                         else if (passno == 1 && DIP(ip, i_db[loc])== BLK_NOCOPY)
  740                                 panic("ffs_snapshot: lost direct block");
  741                 }
  742         }
  743         error = UFS_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)),
  744             fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
  745         if (error) {
  746                 brelse(bp);
  747                 return (error);
  748         }
  749         indiroff = (base + loc - NDADDR) % NINDIR(fs);
  750         for ( ; loc < len; loc++, indiroff++) {
  751                 if (indiroff >= NINDIR(fs)) {
  752                         if (passno == 2)
  753                                 ibp->b_flags |= B_VALIDSUSPWRT;
  754                         bawrite(ibp);
  755                         error = UFS_BALLOC(vp,
  756                             lblktosize(fs, (off_t)(base + loc)),
  757                             fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
  758                         if (error) {
  759                                 brelse(bp);
  760                                 return (error);
  761                         }
  762                         indiroff = 0;
  763                 }
  764                 if (ip->i_ump->um_fstype == UFS1) {
  765                         if (ffs_isblock(fs, cg_blksfree(cgp), loc))
  766                                 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] =
  767                                     BLK_NOCOPY;
  768                         else if (passno == 2 && ((ufs1_daddr_t *)(ibp->b_data))
  769                             [indiroff] == BLK_NOCOPY)
  770                                 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = 0;
  771                         else if (passno == 1 && ((ufs1_daddr_t *)(ibp->b_data))
  772                             [indiroff] == BLK_NOCOPY)
  773                                 panic("ffs_snapshot: lost indirect block");
  774                         continue;
  775                 }
  776                 if (ffs_isblock(fs, cg_blksfree(cgp), loc))
  777                         ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY;
  778                 else if (passno == 2 &&
  779                     ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY)
  780                         ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = 0;
  781                 else if (passno == 1 &&
  782                     ((ufs2_daddr_t *)(ibp->b_data)) [indiroff] == BLK_NOCOPY)
  783                         panic("ffs_snapshot: lost indirect block");
  784         }
  785         bqrelse(bp);
  786         if (passno == 2)
  787                 ibp->b_flags |= B_VALIDSUSPWRT;
  788         bdwrite(ibp);
  789         return (0);
  790 }
  791 
  792 /*
  793  * Before expunging a snapshot inode, note all the
  794  * blocks that it claims with BLK_SNAP so that fsck will
  795  * be able to account for those blocks properly and so
  796  * that this snapshot knows that it need not copy them
  797  * if the other snapshot holding them is freed. This code
  798  * is reproduced once each for UFS1 and UFS2.
  799  */
  800 static int
  801 expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype)
  802         struct vnode *snapvp;
  803         struct inode *cancelip;
  804         struct fs *fs;
  805         int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
  806             struct fs *, ufs_lbn_t, int);
  807         int expungetype;
  808 {
  809         int i, error, indiroff;
  810         ufs_lbn_t lbn, rlbn;
  811         ufs2_daddr_t len, blkno, numblks, blksperindir;
  812         struct ufs1_dinode *dip;
  813         struct thread *td = curthread;
  814         struct buf *bp;
  815 
  816         /*
  817          * Prepare to expunge the inode. If its inode block has not
  818          * yet been copied, then allocate and fill the copy.
  819          */
  820         lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number));
  821         blkno = 0;
  822         if (lbn < NDADDR) {
  823                 blkno = VTOI(snapvp)->i_din1->di_db[lbn];
  824         } else {
  825                 td->td_pflags |= TDP_COWINPROGRESS;
  826                 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn),
  827                    fs->fs_bsize, KERNCRED, BA_METAONLY, &bp);
  828                 td->td_pflags &= ~TDP_COWINPROGRESS;
  829                 if (error)
  830                         return (error);
  831                 indiroff = (lbn - NDADDR) % NINDIR(fs);
  832                 blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff];
  833                 bqrelse(bp);
  834         }
  835         if (blkno != 0) {
  836                 if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp)))
  837                         return (error);
  838         } else {
  839                 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn),
  840                     fs->fs_bsize, KERNCRED, 0, &bp);
  841                 if (error)
  842                         return (error);
  843                 if ((error = readblock(bp, lbn)) != 0)
  844                         return (error);
  845         }
  846         /*
  847          * Set a snapshot inode to be a zero length file, regular files
  848          * to be completely unallocated.
  849          */
  850         dip = (struct ufs1_dinode *)bp->b_data +
  851             ino_to_fsbo(fs, cancelip->i_number);
  852         if (expungetype == BLK_NOCOPY)
  853                 dip->di_mode = 0;
  854         dip->di_size = 0;
  855         dip->di_blocks = 0;
  856         dip->di_flags &= ~SF_SNAPSHOT;
  857         bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs1_daddr_t));
  858         bdwrite(bp);
  859         /*
  860          * Now go through and expunge all the blocks in the file
  861          * using the function requested.
  862          */
  863         numblks = howmany(cancelip->i_size, fs->fs_bsize);
  864         if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_db[0],
  865             &cancelip->i_din1->di_db[NDADDR], fs, 0, expungetype)))
  866                 return (error);
  867         if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_ib[0],
  868             &cancelip->i_din1->di_ib[NIADDR], fs, -1, expungetype)))
  869                 return (error);
  870         blksperindir = 1;
  871         lbn = -NDADDR;
  872         len = numblks - NDADDR;
  873         rlbn = NDADDR;
  874         for (i = 0; len > 0 && i < NIADDR; i++) {
  875                 error = indiracct_ufs1(snapvp, ITOV(cancelip), i,
  876                     cancelip->i_din1->di_ib[i], lbn, rlbn, len,
  877                     blksperindir, fs, acctfunc, expungetype);
  878                 if (error)
  879                         return (error);
  880                 blksperindir *= NINDIR(fs);
  881                 lbn -= blksperindir + 1;
  882                 len -= blksperindir;
  883                 rlbn += blksperindir;
  884         }
  885         return (0);
  886 }
  887 
  888 /*
  889  * Descend an indirect block chain for vnode cancelvp accounting for all
  890  * its indirect blocks in snapvp.
  891  */ 
  892 static int
  893 indiracct_ufs1(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks,
  894             blksperindir, fs, acctfunc, expungetype)
  895         struct vnode *snapvp;
  896         struct vnode *cancelvp;
  897         int level;
  898         ufs1_daddr_t blkno;
  899         ufs_lbn_t lbn;
  900         ufs_lbn_t rlbn;
  901         ufs_lbn_t remblks;
  902         ufs_lbn_t blksperindir;
  903         struct fs *fs;
  904         int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
  905             struct fs *, ufs_lbn_t, int);
  906         int expungetype;
  907 {
  908         int error, num, i;
  909         ufs_lbn_t subblksperindir;
  910         struct indir indirs[NIADDR + 2];
  911         ufs1_daddr_t last, *bap;
  912         struct buf *bp;
  913 
  914         if (blkno == 0) {
  915                 if (expungetype == BLK_NOCOPY)
  916                         return (0);
  917                 panic("indiracct_ufs1: missing indir");
  918         }
  919         if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
  920                 return (error);
  921         if (lbn != indirs[num - 1 - level].in_lbn || num < 2)
  922                 panic("indiracct_ufs1: botched params");
  923         /*
  924          * We have to expand bread here since it will deadlock looking
  925          * up the block number for any blocks that are not in the cache.
  926          */
  927         bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0);
  928         bp->b_blkno = fsbtodb(fs, blkno);
  929         if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
  930             (error = readblock(bp, fragstoblks(fs, blkno)))) {
  931                 brelse(bp);
  932                 return (error);
  933         }
  934         /*
  935          * Account for the block pointers in this indirect block.
  936          */
  937         last = howmany(remblks, blksperindir);
  938         if (last > NINDIR(fs))
  939                 last = NINDIR(fs);
  940         MALLOC(bap, ufs1_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK);
  941         bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize);
  942         bqrelse(bp);
  943         error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs,
  944             level == 0 ? rlbn : -1, expungetype);
  945         if (error || level == 0)
  946                 goto out;
  947         /*
  948          * Account for the block pointers in each of the indirect blocks
  949          * in the levels below us.
  950          */
  951         subblksperindir = blksperindir / NINDIR(fs);
  952         for (lbn++, level--, i = 0; i < last; i++) {
  953                 error = indiracct_ufs1(snapvp, cancelvp, level, bap[i], lbn,
  954                     rlbn, remblks, subblksperindir, fs, acctfunc, expungetype);
  955                 if (error)
  956                         goto out;
  957                 rlbn += blksperindir;
  958                 lbn -= blksperindir;
  959                 remblks -= blksperindir;
  960         }
  961 out:
  962         FREE(bap, M_DEVBUF);
  963         return (error);
  964 }
  965 
  966 /*
  967  * Do both snap accounting and map accounting.
  968  */
  969 static int
  970 fullacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)
  971         struct vnode *vp;
  972         ufs1_daddr_t *oldblkp, *lastblkp;
  973         struct fs *fs;
  974         ufs_lbn_t lblkno;
  975         int exptype;    /* BLK_SNAP or BLK_NOCOPY */
  976 {
  977         int error;
  978 
  979         if ((error = snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype)))
  980                 return (error);
  981         return (mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, exptype));
  982 }
  983 
  984 /*
  985  * Identify a set of blocks allocated in a snapshot inode.
  986  */
  987 static int
  988 snapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
  989         struct vnode *vp;
  990         ufs1_daddr_t *oldblkp, *lastblkp;
  991         struct fs *fs;
  992         ufs_lbn_t lblkno;
  993         int expungetype;        /* BLK_SNAP or BLK_NOCOPY */
  994 {
  995         struct inode *ip = VTOI(vp);
  996         ufs1_daddr_t blkno, *blkp;
  997         ufs_lbn_t lbn;
  998         struct buf *ibp;
  999         int error;
 1000 
 1001         for ( ; oldblkp < lastblkp; oldblkp++) {
 1002                 blkno = *oldblkp;
 1003                 if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
 1004                         continue;
 1005                 lbn = fragstoblks(fs, blkno);
 1006                 if (lbn < NDADDR) {
 1007                         blkp = &ip->i_din1->di_db[lbn];
 1008                         ip->i_flag |= IN_CHANGE | IN_UPDATE;
 1009                 } else {
 1010                         error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
 1011                             fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
 1012                         if (error)
 1013                                 return (error);
 1014                         blkp = &((ufs1_daddr_t *)(ibp->b_data))
 1015                             [(lbn - NDADDR) % NINDIR(fs)];
 1016                 }
 1017                 /*
 1018                  * If we are expunging a snapshot vnode and we
 1019                  * find a block marked BLK_NOCOPY, then it is
 1020                  * one that has been allocated to this snapshot after
 1021                  * we took our current snapshot and can be ignored.
 1022                  */
 1023                 if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) {
 1024                         if (lbn >= NDADDR)
 1025                                 brelse(ibp);
 1026                 } else {
 1027                         if (*blkp != 0)
 1028                                 panic("snapacct_ufs1: bad block");
 1029                         *blkp = expungetype;
 1030                         if (lbn >= NDADDR)
 1031                                 bdwrite(ibp);
 1032                 }
 1033         }
 1034         return (0);
 1035 }
 1036 
 1037 /*
 1038  * Account for a set of blocks allocated in a snapshot inode.
 1039  */
 1040 static int
 1041 mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
 1042         struct vnode *vp;
 1043         ufs1_daddr_t *oldblkp, *lastblkp;
 1044         struct fs *fs;
 1045         ufs_lbn_t lblkno;
 1046         int expungetype;
 1047 {
 1048         ufs1_daddr_t blkno;
 1049         struct inode *ip;
 1050         ino_t inum;
 1051         int acctit;
 1052 
 1053         ip = VTOI(vp);
 1054         inum = ip->i_number;
 1055         if (lblkno == -1)
 1056                 acctit = 0;
 1057         else
 1058                 acctit = 1;
 1059         for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) {
 1060                 blkno = *oldblkp;
 1061                 if (blkno == 0 || blkno == BLK_NOCOPY)
 1062                         continue;
 1063                 if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP)
 1064                         *ip->i_snapblklist++ = lblkno;
 1065                 if (blkno == BLK_SNAP)
 1066                         blkno = blkstofrags(fs, lblkno);
 1067                 ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum);
 1068         }
 1069         return (0);
 1070 }
 1071 
 1072 /*
 1073  * Before expunging a snapshot inode, note all the
 1074  * blocks that it claims with BLK_SNAP so that fsck will
 1075  * be able to account for those blocks properly and so
 1076  * that this snapshot knows that it need not copy them
 1077  * if the other snapshot holding them is freed. This code
 1078  * is reproduced once each for UFS1 and UFS2.
 1079  */
 1080 static int
 1081 expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype)
 1082         struct vnode *snapvp;
 1083         struct inode *cancelip;
 1084         struct fs *fs;
 1085         int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
 1086             struct fs *, ufs_lbn_t, int);
 1087         int expungetype;
 1088 {
 1089         int i, error, indiroff;
 1090         ufs_lbn_t lbn, rlbn;
 1091         ufs2_daddr_t len, blkno, numblks, blksperindir;
 1092         struct ufs2_dinode *dip;
 1093         struct thread *td = curthread;
 1094         struct buf *bp;
 1095 
 1096         /*
 1097          * Prepare to expunge the inode. If its inode block has not
 1098          * yet been copied, then allocate and fill the copy.
 1099          */
 1100         lbn = fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number));
 1101         blkno = 0;
 1102         if (lbn < NDADDR) {
 1103                 blkno = VTOI(snapvp)->i_din2->di_db[lbn];
 1104         } else {
 1105                 td->td_pflags |= TDP_COWINPROGRESS;
 1106                 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn),
 1107                    fs->fs_bsize, KERNCRED, BA_METAONLY, &bp);
 1108                 td->td_pflags &= ~TDP_COWINPROGRESS;
 1109                 if (error)
 1110                         return (error);
 1111                 indiroff = (lbn - NDADDR) % NINDIR(fs);
 1112                 blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff];
 1113                 bqrelse(bp);
 1114         }
 1115         if (blkno != 0) {
 1116                 if ((error = bread(snapvp, lbn, fs->fs_bsize, KERNCRED, &bp)))
 1117                         return (error);
 1118         } else {
 1119                 error = UFS_BALLOC(snapvp, lblktosize(fs, (off_t)lbn),
 1120                     fs->fs_bsize, KERNCRED, 0, &bp);
 1121                 if (error)
 1122                         return (error);
 1123                 if ((error = readblock(bp, lbn)) != 0)
 1124                         return (error);
 1125         }
 1126         /*
 1127          * Set a snapshot inode to be a zero length file, regular files
 1128          * to be completely unallocated.
 1129          */
 1130         dip = (struct ufs2_dinode *)bp->b_data +
 1131             ino_to_fsbo(fs, cancelip->i_number);
 1132         if (expungetype == BLK_NOCOPY)
 1133                 dip->di_mode = 0;
 1134         dip->di_size = 0;
 1135         dip->di_blocks = 0;
 1136         dip->di_flags &= ~SF_SNAPSHOT;
 1137         bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs2_daddr_t));
 1138         bdwrite(bp);
 1139         /*
 1140          * Now go through and expunge all the blocks in the file
 1141          * using the function requested.
 1142          */
 1143         numblks = howmany(cancelip->i_size, fs->fs_bsize);
 1144         if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_db[0],
 1145             &cancelip->i_din2->di_db[NDADDR], fs, 0, expungetype)))
 1146                 return (error);
 1147         if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_ib[0],
 1148             &cancelip->i_din2->di_ib[NIADDR], fs, -1, expungetype)))
 1149                 return (error);
 1150         blksperindir = 1;
 1151         lbn = -NDADDR;
 1152         len = numblks - NDADDR;
 1153         rlbn = NDADDR;
 1154         for (i = 0; len > 0 && i < NIADDR; i++) {
 1155                 error = indiracct_ufs2(snapvp, ITOV(cancelip), i,
 1156                     cancelip->i_din2->di_ib[i], lbn, rlbn, len,
 1157                     blksperindir, fs, acctfunc, expungetype);
 1158                 if (error)
 1159                         return (error);
 1160                 blksperindir *= NINDIR(fs);
 1161                 lbn -= blksperindir + 1;
 1162                 len -= blksperindir;
 1163                 rlbn += blksperindir;
 1164         }
 1165         return (0);
 1166 }
 1167 
 1168 /*
 1169  * Descend an indirect block chain for vnode cancelvp accounting for all
 1170  * its indirect blocks in snapvp.
 1171  */ 
 1172 static int
 1173 indiracct_ufs2(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks,
 1174             blksperindir, fs, acctfunc, expungetype)
 1175         struct vnode *snapvp;
 1176         struct vnode *cancelvp;
 1177         int level;
 1178         ufs2_daddr_t blkno;
 1179         ufs_lbn_t lbn;
 1180         ufs_lbn_t rlbn;
 1181         ufs_lbn_t remblks;
 1182         ufs_lbn_t blksperindir;
 1183         struct fs *fs;
 1184         int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
 1185             struct fs *, ufs_lbn_t, int);
 1186         int expungetype;
 1187 {
 1188         int error, num, i;
 1189         ufs_lbn_t subblksperindir;
 1190         struct indir indirs[NIADDR + 2];
 1191         ufs2_daddr_t last, *bap;
 1192         struct buf *bp;
 1193 
 1194         if (blkno == 0) {
 1195                 if (expungetype == BLK_NOCOPY)
 1196                         return (0);
 1197                 panic("indiracct_ufs2: missing indir");
 1198         }
 1199         if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
 1200                 return (error);
 1201         if (lbn != indirs[num - 1 - level].in_lbn || num < 2)
 1202                 panic("indiracct_ufs2: botched params");
 1203         /*
 1204          * We have to expand bread here since it will deadlock looking
 1205          * up the block number for any blocks that are not in the cache.
 1206          */
 1207         bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0, 0);
 1208         bp->b_blkno = fsbtodb(fs, blkno);
 1209         if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
 1210             (error = readblock(bp, fragstoblks(fs, blkno)))) {
 1211                 brelse(bp);
 1212                 return (error);
 1213         }
 1214         /*
 1215          * Account for the block pointers in this indirect block.
 1216          */
 1217         last = howmany(remblks, blksperindir);
 1218         if (last > NINDIR(fs))
 1219                 last = NINDIR(fs);
 1220         MALLOC(bap, ufs2_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK);
 1221         bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize);
 1222         bqrelse(bp);
 1223         error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs,
 1224             level == 0 ? rlbn : -1, expungetype);
 1225         if (error || level == 0)
 1226                 goto out;
 1227         /*
 1228          * Account for the block pointers in each of the indirect blocks
 1229          * in the levels below us.
 1230          */
 1231         subblksperindir = blksperindir / NINDIR(fs);
 1232         for (lbn++, level--, i = 0; i < last; i++) {
 1233                 error = indiracct_ufs2(snapvp, cancelvp, level, bap[i], lbn,
 1234                     rlbn, remblks, subblksperindir, fs, acctfunc, expungetype);
 1235                 if (error)
 1236                         goto out;
 1237                 rlbn += blksperindir;
 1238                 lbn -= blksperindir;
 1239                 remblks -= blksperindir;
 1240         }
 1241 out:
 1242         FREE(bap, M_DEVBUF);
 1243         return (error);
 1244 }
 1245 
 1246 /*
 1247  * Do both snap accounting and map accounting.
 1248  */
 1249 static int
 1250 fullacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)
 1251         struct vnode *vp;
 1252         ufs2_daddr_t *oldblkp, *lastblkp;
 1253         struct fs *fs;
 1254         ufs_lbn_t lblkno;
 1255         int exptype;    /* BLK_SNAP or BLK_NOCOPY */
 1256 {
 1257         int error;
 1258 
 1259         if ((error = snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype)))
 1260                 return (error);
 1261         return (mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, exptype));
 1262 }
 1263 
 1264 /*
 1265  * Identify a set of blocks allocated in a snapshot inode.
 1266  */
 1267 static int
 1268 snapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
 1269         struct vnode *vp;
 1270         ufs2_daddr_t *oldblkp, *lastblkp;
 1271         struct fs *fs;
 1272         ufs_lbn_t lblkno;
 1273         int expungetype;        /* BLK_SNAP or BLK_NOCOPY */
 1274 {
 1275         struct inode *ip = VTOI(vp);
 1276         ufs2_daddr_t blkno, *blkp;
 1277         ufs_lbn_t lbn;
 1278         struct buf *ibp;
 1279         int error;
 1280 
 1281         for ( ; oldblkp < lastblkp; oldblkp++) {
 1282                 blkno = *oldblkp;
 1283                 if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
 1284                         continue;
 1285                 lbn = fragstoblks(fs, blkno);
 1286                 if (lbn < NDADDR) {
 1287                         blkp = &ip->i_din2->di_db[lbn];
 1288                         ip->i_flag |= IN_CHANGE | IN_UPDATE;
 1289                 } else {
 1290                         error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
 1291                             fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
 1292                         if (error)
 1293                                 return (error);
 1294                         blkp = &((ufs2_daddr_t *)(ibp->b_data))
 1295                             [(lbn - NDADDR) % NINDIR(fs)];
 1296                 }
 1297                 /*
 1298                  * If we are expunging a snapshot vnode and we
 1299                  * find a block marked BLK_NOCOPY, then it is
 1300                  * one that has been allocated to this snapshot after
 1301                  * we took our current snapshot and can be ignored.
 1302                  */
 1303                 if (expungetype == BLK_SNAP && *blkp == BLK_NOCOPY) {
 1304                         if (lbn >= NDADDR)
 1305                                 brelse(ibp);
 1306                 } else {
 1307                         if (*blkp != 0)
 1308                                 panic("snapacct_ufs2: bad block");
 1309                         *blkp = expungetype;
 1310                         if (lbn >= NDADDR)
 1311                                 bdwrite(ibp);
 1312                 }
 1313         }
 1314         return (0);
 1315 }
 1316 
 1317 /*
 1318  * Account for a set of blocks allocated in a snapshot inode.
 1319  */
 1320 static int
 1321 mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
 1322         struct vnode *vp;
 1323         ufs2_daddr_t *oldblkp, *lastblkp;
 1324         struct fs *fs;
 1325         ufs_lbn_t lblkno;
 1326         int expungetype;
 1327 {
 1328         ufs2_daddr_t blkno;
 1329         struct inode *ip;
 1330         ino_t inum;
 1331         int acctit;
 1332 
 1333         ip = VTOI(vp);
 1334         inum = ip->i_number;
 1335         if (lblkno == -1)
 1336                 acctit = 0;
 1337         else
 1338                 acctit = 1;
 1339         for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) {
 1340                 blkno = *oldblkp;
 1341                 if (blkno == 0 || blkno == BLK_NOCOPY)
 1342                         continue;
 1343                 if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP)
 1344                         *ip->i_snapblklist++ = lblkno;
 1345                 if (blkno == BLK_SNAP)
 1346                         blkno = blkstofrags(fs, lblkno);
 1347                 ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum);
 1348         }
 1349         return (0);
 1350 }
 1351 
 1352 /*
 1353  * Decrement extra reference on snapshot when last name is removed.
 1354  * It will not be freed until the last open reference goes away.
 1355  */
 1356 void
 1357 ffs_snapgone(ip)
 1358         struct inode *ip;
 1359 {
 1360         struct inode *xp;
 1361         struct fs *fs;
 1362         int snaploc;
 1363 
 1364         /*
 1365          * Find snapshot in incore list.
 1366          */
 1367         TAILQ_FOREACH(xp, &ip->i_devvp->v_rdev->si_snapshots, i_nextsnap)
 1368                 if (xp == ip)
 1369                         break;
 1370         if (xp != NULL)
 1371                 vrele(ITOV(ip));
 1372         else if (snapdebug)
 1373                 printf("ffs_snapgone: lost snapshot vnode %d\n",
 1374                     ip->i_number);
 1375         /*
 1376          * Delete snapshot inode from superblock. Keep list dense.
 1377          */
 1378         fs = ip->i_fs;
 1379         for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
 1380                 if (fs->fs_snapinum[snaploc] == ip->i_number)
 1381                         break;
 1382         if (snaploc < FSMAXSNAP) {
 1383                 for (snaploc++; snaploc < FSMAXSNAP; snaploc++) {
 1384                         if (fs->fs_snapinum[snaploc] == 0)
 1385                                 break;
 1386                         fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc];
 1387                 }
 1388                 fs->fs_snapinum[snaploc - 1] = 0;
 1389         }
 1390 }
 1391 
 1392 /*
 1393  * Prepare a snapshot file for being removed.
 1394  */
 1395 void
 1396 ffs_snapremove(vp)
 1397         struct vnode *vp;
 1398 {
 1399         struct inode *ip;
 1400         struct vnode *devvp;
 1401         struct lock *lkp;
 1402         struct buf *ibp;
 1403         struct fs *fs;
 1404         struct thread *td = curthread;
 1405         ufs2_daddr_t numblks, blkno, dblk, *snapblklist;
 1406         int error, loc, last;
 1407 
 1408         ip = VTOI(vp);
 1409         fs = ip->i_fs;
 1410         devvp = ip->i_devvp;
 1411         /*
 1412          * If active, delete from incore list (this snapshot may
 1413          * already have been in the process of being deleted, so
 1414          * would not have been active).
 1415          *
 1416          * Clear copy-on-write flag if last snapshot.
 1417          */
 1418         if (ip->i_nextsnap.tqe_prev != 0) {
 1419                 VI_LOCK(devvp);
 1420                 lockmgr(&vp->v_lock, LK_INTERLOCK | LK_EXCLUSIVE,
 1421                     VI_MTX(devvp), td);
 1422                 VI_LOCK(devvp);
 1423                 TAILQ_REMOVE(&devvp->v_rdev->si_snapshots, ip, i_nextsnap);
 1424                 ip->i_nextsnap.tqe_prev = 0;
 1425                 lkp = vp->v_vnlock;
 1426                 vp->v_vnlock = &vp->v_lock;
 1427                 lockmgr(lkp, LK_RELEASE, NULL, td);
 1428                 if (TAILQ_FIRST(&devvp->v_rdev->si_snapshots) != 0) {
 1429                         VI_UNLOCK(devvp);
 1430                 } else {
 1431                         snapblklist = devvp->v_rdev->si_snapblklist;
 1432                         devvp->v_rdev->si_snapblklist = 0;
 1433                         devvp->v_rdev->si_snaplistsize = 0;
 1434                         devvp->v_rdev->si_copyonwrite = 0;
 1435                         devvp->v_vflag &= ~VV_COPYONWRITE;
 1436                         lockmgr(lkp, LK_DRAIN|LK_INTERLOCK, VI_MTX(devvp), td);
 1437                         lockmgr(lkp, LK_RELEASE, NULL, td);
 1438                         lockdestroy(lkp);
 1439                         FREE(lkp, M_UFSMNT);
 1440                         FREE(snapblklist, M_UFSMNT);
 1441                 }
 1442         }
 1443         /*
 1444          * Clear all BLK_NOCOPY fields. Pass any block claims to other
 1445          * snapshots that want them (see ffs_snapblkfree below).
 1446          */
 1447         for (blkno = 1; blkno < NDADDR; blkno++) {
 1448                 dblk = DIP(ip, i_db[blkno]);
 1449                 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
 1450                         DIP(ip, i_db[blkno]) = 0;
 1451                 else if ((dblk == blkstofrags(fs, blkno) &&
 1452                      ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize,
 1453                      ip->i_number))) {
 1454                         DIP(ip, i_blocks) -= btodb(fs->fs_bsize);
 1455                         DIP(ip, i_db[blkno]) = 0;
 1456                 }
 1457         }
 1458         numblks = howmany(ip->i_size, fs->fs_bsize);
 1459         for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
 1460                 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)blkno),
 1461                     fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
 1462                 if (error)
 1463                         continue;
 1464                 if (fs->fs_size - blkno > NINDIR(fs))
 1465                         last = NINDIR(fs);
 1466                 else
 1467                         last = fs->fs_size - blkno;
 1468                 for (loc = 0; loc < last; loc++) {
 1469                         if (ip->i_ump->um_fstype == UFS1) {
 1470                                 dblk = ((ufs1_daddr_t *)(ibp->b_data))[loc];
 1471                                 if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
 1472                                         ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0;
 1473                                 else if ((dblk == blkstofrags(fs, blkno) &&
 1474                                      ffs_snapblkfree(fs, ip->i_devvp, dblk,
 1475                                      fs->fs_bsize, ip->i_number))) {
 1476                                         ip->i_din1->di_blocks -=
 1477                                             btodb(fs->fs_bsize);
 1478                                         ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0;
 1479                                 }
 1480                                 continue;
 1481                         }
 1482                         dblk = ((ufs2_daddr_t *)(ibp->b_data))[loc];
 1483                         if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
 1484                                 ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0;
 1485                         else if ((dblk == blkstofrags(fs, blkno) &&
 1486                              ffs_snapblkfree(fs, ip->i_devvp, dblk,
 1487                              fs->fs_bsize, ip->i_number))) {
 1488                                 ip->i_din2->di_blocks -= btodb(fs->fs_bsize);
 1489                                 ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0;
 1490                         }
 1491                 }
 1492                 bawrite(ibp);
 1493         }
 1494         /*
 1495          * Clear snapshot flag and drop reference.
 1496          */
 1497         ip->i_flags &= ~SF_SNAPSHOT;
 1498         DIP(ip, i_flags) = ip->i_flags;
 1499         ip->i_flag |= IN_CHANGE | IN_UPDATE;
 1500 }
 1501 
 1502 /*
 1503  * Notification that a block is being freed. Return zero if the free
 1504  * should be allowed to proceed. Return non-zero if the snapshot file
 1505  * wants to claim the block. The block will be claimed if it is an
 1506  * uncopied part of one of the snapshots. It will be freed if it is
 1507  * either a BLK_NOCOPY or has already been copied in all of the snapshots.
 1508  * If a fragment is being freed, then all snapshots that care about
 1509  * it must make a copy since a snapshot file can only claim full sized
 1510  * blocks. Note that if more than one snapshot file maps the block,
 1511  * we can pick one at random to claim it. Since none of the snapshots
 1512  * can change, we are assurred that they will all see the same unmodified
 1513  * image. When deleting a snapshot file (see ffs_snapremove above), we
 1514  * must push any of these claimed blocks to one of the other snapshots
 1515  * that maps it. These claimed blocks are easily identified as they will
 1516  * have a block number equal to their logical block number within the
 1517  * snapshot. A copied block can never have this property because they
 1518  * must always have been allocated from a BLK_NOCOPY location.
 1519  */
 1520 int
 1521 ffs_snapblkfree(fs, devvp, bno, size, inum)
 1522         struct fs *fs;
 1523         struct vnode *devvp;
 1524         ufs2_daddr_t bno;
 1525         long size;
 1526         ino_t inum;
 1527 {
 1528         struct buf *ibp, *cbp, *savedcbp = 0;
 1529         struct thread *td = curthread;
 1530         struct inode *ip;
 1531         struct vnode *vp = NULL;
 1532         ufs_lbn_t lbn;
 1533         ufs2_daddr_t blkno;
 1534         int indiroff = 0, snapshot_locked = 0, error = 0, claimedblk = 0;
 1535         struct snaphead *snaphead;
 1536 
 1537         lbn = fragstoblks(fs, bno);
 1538 retry:
 1539         VI_LOCK(devvp);
 1540         snaphead = &devvp->v_rdev->si_snapshots;
 1541         TAILQ_FOREACH(ip, snaphead, i_nextsnap) {
 1542                 vp = ITOV(ip);
 1543                 /*
 1544                  * Lookup block being written.
 1545                  */
 1546                 if (lbn < NDADDR) {
 1547                         blkno = DIP(ip, i_db[lbn]);
 1548                 } else {
 1549                         if (snapshot_locked == 0 &&
 1550                             lockmgr(vp->v_vnlock,
 1551                               LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
 1552                               VI_MTX(devvp), td) != 0)
 1553                                 goto retry;
 1554                         snapshot_locked = 1;
 1555                         td->td_pflags |= TDP_COWINPROGRESS;
 1556                         error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
 1557                             fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
 1558                         td->td_pflags &= ~TDP_COWINPROGRESS;
 1559                         if (error)
 1560                                 break;
 1561                         indiroff = (lbn - NDADDR) % NINDIR(fs);
 1562                         if (ip->i_ump->um_fstype == UFS1)
 1563                                 blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff];
 1564                         else
 1565                                 blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff];
 1566                 }
 1567                 /*
 1568                  * Check to see if block needs to be copied.
 1569                  */
 1570                 if (blkno == 0) {
 1571                         /*
 1572                          * A block that we map is being freed. If it has not
 1573                          * been claimed yet, we will claim or copy it (below).
 1574                          */
 1575                         claimedblk = 1;
 1576                 } else if (blkno == BLK_SNAP) {
 1577                         /*
 1578                          * No previous snapshot claimed the block,
 1579                          * so it will be freed and become a BLK_NOCOPY
 1580                          * (don't care) for us.
 1581                          */
 1582                         if (claimedblk)
 1583                                 panic("snapblkfree: inconsistent block type");
 1584                         if (snapshot_locked == 0 &&
 1585                             lockmgr(vp->v_vnlock,
 1586                               LK_INTERLOCK | LK_EXCLUSIVE | LK_NOWAIT,
 1587                               VI_MTX(devvp), td) != 0) {
 1588                                 if (lbn >= NDADDR)
 1589                                         bqrelse(ibp);
 1590                                 vn_lock(vp, LK_EXCLUSIVE | LK_SLEEPFAIL, td);
 1591                                 goto retry;
 1592                         }
 1593                         snapshot_locked = 1;
 1594                         if (lbn < NDADDR) {
 1595                                 DIP(ip, i_db[lbn]) = BLK_NOCOPY;
 1596                                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
 1597                         } else if (ip->i_ump->um_fstype == UFS1) {
 1598                                 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] =
 1599                                     BLK_NOCOPY;
 1600                                 bdwrite(ibp);
 1601                         } else {
 1602                                 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] =
 1603                                     BLK_NOCOPY;
 1604                                 bdwrite(ibp);
 1605                         }
 1606                         continue;
 1607                 } else /* BLK_NOCOPY or default */ {
 1608                         /*
 1609                          * If the snapshot has already copied the block
 1610                          * (default), or does not care about the block,
 1611                          * it is not needed.
 1612                          */
 1613                         if (lbn >= NDADDR)
 1614                                 bqrelse(ibp);
 1615                         continue;
 1616                 }
 1617                 /*
 1618                  * If this is a full size block, we will just grab it
 1619                  * and assign it to the snapshot inode. Otherwise we
 1620                  * will proceed to copy it. See explanation for this
 1621                  * routine as to why only a single snapshot needs to
 1622                  * claim this block.
 1623                  */
 1624                 if (snapshot_locked == 0 &&
 1625                     lockmgr(vp->v_vnlock,
 1626                       LK_INTERLOCK | LK_EXCLUSIVE | LK_NOWAIT,
 1627                       VI_MTX(devvp), td) != 0) {
 1628                         if (lbn >= NDADDR)
 1629                                 bqrelse(ibp);
 1630                         vn_lock(vp, LK_EXCLUSIVE | LK_SLEEPFAIL, td);
 1631                         goto retry;
 1632                 }
 1633                 snapshot_locked = 1;
 1634                 if (size == fs->fs_bsize) {
 1635 #ifdef DEBUG
 1636                         if (snapdebug)
 1637                                 printf("%s %d lbn %jd from inum %d\n",
 1638                                     "Grabonremove: snapino", ip->i_number,
 1639                                     (intmax_t)lbn, inum);
 1640 #endif
 1641                         if (lbn < NDADDR) {
 1642                                 DIP(ip, i_db[lbn]) = bno;
 1643                         } else if (ip->i_ump->um_fstype == UFS1) {
 1644                                 ((ufs1_daddr_t *)(ibp->b_data))[indiroff] = bno;
 1645                                 bdwrite(ibp);
 1646                         } else {
 1647                                 ((ufs2_daddr_t *)(ibp->b_data))[indiroff] = bno;
 1648                                 bdwrite(ibp);
 1649                         }
 1650                         DIP(ip, i_blocks) += btodb(size);
 1651                         ip->i_flag |= IN_CHANGE | IN_UPDATE;
 1652                         VOP_UNLOCK(vp, 0, td);
 1653                         return (1);
 1654                 }
 1655                 if (lbn >= NDADDR)
 1656                         bqrelse(ibp);
 1657                 /*
 1658                  * Allocate the block into which to do the copy. Note that this
 1659                  * allocation will never require any additional allocations for
 1660                  * the snapshot inode.
 1661                  */
 1662                 td->td_pflags |= TDP_COWINPROGRESS;
 1663                 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
 1664                     fs->fs_bsize, KERNCRED, 0, &cbp);
 1665                 td->td_pflags &= ~TDP_COWINPROGRESS;
 1666                 if (error)
 1667                         break;
 1668 #ifdef DEBUG
 1669                 if (snapdebug)
 1670                         printf("%s%d lbn %jd %s %d size %ld to blkno %jd\n",
 1671                             "Copyonremove: snapino ", ip->i_number,
 1672                             (intmax_t)lbn, "for inum", inum, size,
 1673                             (intmax_t)cbp->b_blkno);
 1674 #endif
 1675                 /*
 1676                  * If we have already read the old block contents, then
 1677                  * simply copy them to the new block. Note that we need
 1678                  * to synchronously write snapshots that have not been
 1679                  * unlinked, and hence will be visible after a crash,
 1680                  * to ensure their integrity.
 1681                  */
 1682                 if (savedcbp != 0) {
 1683                         bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
 1684                         bawrite(cbp);
 1685                         if (dopersistence && ip->i_effnlink > 0)
 1686                                 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td);
 1687                         continue;
 1688                 }
 1689                 /*
 1690                  * Otherwise, read the old block contents into the buffer.
 1691                  */
 1692                 if ((error = readblock(cbp, lbn)) != 0) {
 1693                         bzero(cbp->b_data, fs->fs_bsize);
 1694                         bawrite(cbp);
 1695                         if (dopersistence && ip->i_effnlink > 0)
 1696                                 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td);
 1697                         break;
 1698                 }
 1699                 savedcbp = cbp;
 1700         }
 1701         /*
 1702          * Note that we need to synchronously write snapshots that
 1703          * have not been unlinked, and hence will be visible after
 1704          * a crash, to ensure their integrity.
 1705          */
 1706         if (savedcbp) {
 1707                 vp = savedcbp->b_vp;
 1708                 bawrite(savedcbp);
 1709                 if (dopersistence && VTOI(vp)->i_effnlink > 0)
 1710                         (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td);
 1711         }
 1712         /*
 1713          * If we have been unable to allocate a block in which to do
 1714          * the copy, then return non-zero so that the fragment will
 1715          * not be freed. Although space will be lost, the snapshot
 1716          * will stay consistent.
 1717          */
 1718         if (snapshot_locked)
 1719                 VOP_UNLOCK(vp, 0, td);
 1720         else
 1721                 VI_UNLOCK(devvp);
 1722         return (error);
 1723 }
 1724 
 1725 /*
 1726  * Associate snapshot files when mounting.
 1727  */
 1728 void
 1729 ffs_snapshot_mount(mp)
 1730         struct mount *mp;
 1731 {
 1732         struct ufsmount *ump = VFSTOUFS(mp);
 1733         struct vnode *devvp = ump->um_devvp;
 1734         struct fs *fs = ump->um_fs;
 1735         struct thread *td = curthread;
 1736         struct snaphead *snaphead;
 1737         struct vnode *vp;
 1738         struct inode *ip, *xp;
 1739         struct uio auio;
 1740         struct iovec aiov;
 1741         void *snapblklist;
 1742         char *reason;
 1743         daddr_t snaplistsize;
 1744         int error, snaploc, loc;
 1745 
 1746         /*
 1747          * XXX The following needs to be set before UFS_TRUNCATE or
 1748          * VOP_READ can be called.
 1749          */
 1750         mp->mnt_stat.f_iosize = fs->fs_bsize;
 1751         /*
 1752          * Process each snapshot listed in the superblock.
 1753          */
 1754         vp = NULL;
 1755         snaphead = &devvp->v_rdev->si_snapshots;
 1756         for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) {
 1757                 if (fs->fs_snapinum[snaploc] == 0)
 1758                         break;
 1759                 if ((error = VFS_VGET(mp, fs->fs_snapinum[snaploc],
 1760                     LK_EXCLUSIVE, &vp)) != 0){
 1761                         printf("ffs_snapshot_mount: vget failed %d\n", error);
 1762                         continue;
 1763                 }
 1764                 ip = VTOI(vp);
 1765                 if ((ip->i_flags & SF_SNAPSHOT) == 0 || ip->i_size ==
 1766                     lblktosize(fs, howmany(fs->fs_size, fs->fs_frag))) {
 1767                         if ((ip->i_flags & SF_SNAPSHOT) == 0) {
 1768                                 reason = "non-snapshot";
 1769                         } else {
 1770                                 reason = "old format snapshot";
 1771                                 (void)UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED, td);
 1772                                 (void)VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td);
 1773                         }
 1774                         printf("ffs_snapshot_mount: %s inode %d\n",
 1775                             reason, fs->fs_snapinum[snaploc]);
 1776                         vput(vp);
 1777                         vp = NULL;
 1778                         for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) {
 1779                                 if (fs->fs_snapinum[loc] == 0)
 1780                                         break;
 1781                                 fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc];
 1782                         }
 1783                         fs->fs_snapinum[loc - 1] = 0;
 1784                         snaploc--;
 1785                         continue;
 1786                 }
 1787                 /*
 1788                  * If there already exist snapshots on this filesystem, grab a
 1789                  * reference to their shared lock. If this is the first snapshot
 1790                  * on this filesystem, we need to allocate a lock for the
 1791                  * snapshots to share. In either case, acquire the snapshot
 1792                  * lock and give up our original private lock.
 1793                  */
 1794                 VI_LOCK(devvp);
 1795                 if ((xp = TAILQ_FIRST(snaphead)) != NULL) {
 1796                         VI_LOCK(vp);
 1797                         vp->v_vnlock = ITOV(xp)->v_vnlock;
 1798                         VI_UNLOCK(devvp);
 1799                 } else {
 1800                         struct lock *lkp;
 1801 
 1802                         VI_UNLOCK(devvp);
 1803                         MALLOC(lkp, struct lock *, sizeof(struct lock),
 1804                             M_UFSMNT, M_WAITOK);
 1805                         lockinit(lkp, PVFS, "snaplk", VLKTIMEOUT,
 1806                             LK_CANRECURSE | LK_NOPAUSE);
 1807                         VI_LOCK(vp);
 1808                         vp->v_vnlock = lkp;
 1809                 }
 1810                 vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY, td);
 1811                 transferlockers(&vp->v_lock, vp->v_vnlock);
 1812                 lockmgr(&vp->v_lock, LK_RELEASE, NULL, td);
 1813                 /*
 1814                  * Link it onto the active snapshot list.
 1815                  */
 1816                 VI_LOCK(devvp);
 1817                 if (ip->i_nextsnap.tqe_prev != 0)
 1818                         panic("ffs_snapshot_mount: %d already on list",
 1819                             ip->i_number);
 1820                 else
 1821                         TAILQ_INSERT_TAIL(snaphead, ip, i_nextsnap);
 1822                 vp->v_vflag |= VV_SYSTEM;
 1823                 VI_UNLOCK(devvp);
 1824                 VOP_UNLOCK(vp, 0, td);
 1825         }
 1826         /*
 1827          * No usable snapshots found.
 1828          */
 1829         if (vp == NULL)
 1830                 return;
 1831         /*
 1832          * Allocate the space for the block hints list. We always want to
 1833          * use the list from the newest snapshot.
 1834          */
 1835         auio.uio_iov = &aiov;
 1836         auio.uio_iovcnt = 1;
 1837         aiov.iov_base = (void *)&snaplistsize;
 1838         aiov.iov_len = sizeof(snaplistsize);
 1839         auio.uio_resid = aiov.iov_len;
 1840         auio.uio_offset =
 1841             lblktosize(fs, howmany(fs->fs_size, fs->fs_frag));
 1842         auio.uio_segflg = UIO_SYSSPACE;
 1843         auio.uio_rw = UIO_READ;
 1844         auio.uio_td = td;
 1845         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1846         if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) {
 1847                 printf("ffs_snapshot_mount: read_1 failed %d\n", error);
 1848                 VOP_UNLOCK(vp, 0, td);
 1849                 return;
 1850         }
 1851         MALLOC(snapblklist, void *, snaplistsize * sizeof(daddr_t),
 1852             M_UFSMNT, M_WAITOK);
 1853         auio.uio_iovcnt = 1;
 1854         aiov.iov_base = snapblklist;
 1855         aiov.iov_len = snaplistsize * sizeof (daddr_t);
 1856         auio.uio_resid = aiov.iov_len;
 1857         auio.uio_offset -= sizeof(snaplistsize);
 1858         if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) {
 1859                 printf("ffs_snapshot_mount: read_2 failed %d\n", error);
 1860                 VOP_UNLOCK(vp, 0, td);
 1861                 FREE(snapblklist, M_UFSMNT);
 1862                 return;
 1863         }
 1864         VOP_UNLOCK(vp, 0, td);
 1865         VI_LOCK(devvp);
 1866         ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_mount");
 1867         devvp->v_rdev->si_snaplistsize = snaplistsize;
 1868         devvp->v_rdev->si_snapblklist = (daddr_t *)snapblklist;
 1869         devvp->v_rdev->si_copyonwrite = ffs_copyonwrite;
 1870         devvp->v_vflag |= VV_COPYONWRITE;
 1871         VI_UNLOCK(devvp);
 1872 }
 1873 
 1874 /*
 1875  * Disassociate snapshot files when unmounting.
 1876  */
 1877 void
 1878 ffs_snapshot_unmount(mp)
 1879         struct mount *mp;
 1880 {
 1881         struct vnode *devvp = VFSTOUFS(mp)->um_devvp;
 1882         struct snaphead *snaphead = &devvp->v_rdev->si_snapshots;
 1883         struct lock *lkp = NULL;
 1884         struct inode *xp;
 1885         struct vnode *vp;
 1886 
 1887         VI_LOCK(devvp);
 1888         while ((xp = TAILQ_FIRST(snaphead)) != 0) {
 1889                 vp = ITOV(xp);
 1890                 lkp = vp->v_vnlock;
 1891                 vp->v_vnlock = &vp->v_lock;
 1892                 TAILQ_REMOVE(snaphead, xp, i_nextsnap);
 1893                 xp->i_nextsnap.tqe_prev = 0;
 1894                 if (xp->i_effnlink > 0) {
 1895                         VI_UNLOCK(devvp);
 1896                         vrele(vp);
 1897                         VI_LOCK(devvp);
 1898                 }
 1899         }
 1900         if (devvp->v_rdev->si_snapblklist != NULL) {
 1901                 FREE(devvp->v_rdev->si_snapblklist, M_UFSMNT);
 1902                 devvp->v_rdev->si_snapblklist = NULL;
 1903                 devvp->v_rdev->si_snaplistsize = 0;
 1904         }
 1905         if (lkp != NULL) {
 1906                 lockdestroy(lkp);
 1907                 FREE(lkp, M_UFSMNT);
 1908         }
 1909         ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_unmount");
 1910         devvp->v_rdev->si_copyonwrite = 0;
 1911         devvp->v_vflag &= ~VV_COPYONWRITE;
 1912         VI_UNLOCK(devvp);
 1913 }
 1914 
 1915 /*
 1916  * Check for need to copy block that is about to be written,
 1917  * copying the block if necessary.
 1918  */
 1919 static int
 1920 ffs_copyonwrite(devvp, bp)
 1921         struct vnode *devvp;
 1922         struct buf *bp;
 1923 {
 1924         struct snaphead *snaphead;
 1925         struct buf *ibp, *cbp, *savedcbp = 0;
 1926         struct thread *td = curthread;
 1927         struct fs *fs;
 1928         struct inode *ip;
 1929         struct vnode *vp = 0;
 1930         ufs2_daddr_t lbn, blkno, *snapblklist;
 1931         int lower, upper, mid, indiroff, snapshot_locked = 0, error = 0;
 1932 
 1933         if (td->td_pflags & TDP_COWINPROGRESS)
 1934                 panic("ffs_copyonwrite: recursive call");
 1935         /*
 1936          * First check to see if it is in the preallocated list.
 1937          * By doing this check we avoid several potential deadlocks.
 1938          */
 1939         VI_LOCK(devvp);
 1940         snaphead = &devvp->v_rdev->si_snapshots;
 1941         ip = TAILQ_FIRST(snaphead);
 1942         fs = ip->i_fs;
 1943         lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno));
 1944         snapblklist = devvp->v_rdev->si_snapblklist;
 1945         upper = devvp->v_rdev->si_snaplistsize - 1;
 1946         lower = 1;
 1947         while (lower <= upper) {
 1948                 mid = (lower + upper) / 2;
 1949                 if (snapblklist[mid] == lbn)
 1950                         break;
 1951                 if (snapblklist[mid] < lbn)
 1952                         lower = mid + 1;
 1953                 else
 1954                         upper = mid - 1;
 1955         }
 1956         if (lower <= upper) {
 1957                 VI_UNLOCK(devvp);
 1958                 return (0);
 1959         }
 1960         /*
 1961          * Not in the precomputed list, so check the snapshots.
 1962          */
 1963 retry:
 1964         TAILQ_FOREACH(ip, snaphead, i_nextsnap) {
 1965                 vp = ITOV(ip);
 1966                 /*
 1967                  * We ensure that everything of our own that needs to be
 1968                  * copied will be done at the time that ffs_snapshot is
 1969                  * called. Thus we can skip the check here which can
 1970                  * deadlock in doing the lookup in UFS_BALLOC.
 1971                  */
 1972                 if (bp->b_vp == vp)
 1973                         continue;
 1974                 /*
 1975                  * Check to see if block needs to be copied. We do not have
 1976                  * to hold the snapshot lock while doing this lookup as it
 1977                  * will never require any additional allocations for the
 1978                  * snapshot inode.
 1979                  */
 1980                 if (lbn < NDADDR) {
 1981                         blkno = DIP(ip, i_db[lbn]);
 1982                 } else {
 1983                         if (snapshot_locked == 0 &&
 1984                             lockmgr(vp->v_vnlock,
 1985                               LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
 1986                               VI_MTX(devvp), td) != 0) {
 1987                                 VI_LOCK(devvp);
 1988                                 goto retry;
 1989                         }
 1990                         snapshot_locked = 1;
 1991                         td->td_pflags |= TDP_COWINPROGRESS;
 1992                         error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
 1993                            fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
 1994                         td->td_pflags &= ~TDP_COWINPROGRESS;
 1995                         if (error)
 1996                                 break;
 1997                         indiroff = (lbn - NDADDR) % NINDIR(fs);
 1998                         if (ip->i_ump->um_fstype == UFS1)
 1999                                 blkno=((ufs1_daddr_t *)(ibp->b_data))[indiroff];
 2000                         else
 2001                                 blkno=((ufs2_daddr_t *)(ibp->b_data))[indiroff];
 2002                         bqrelse(ibp);
 2003                 }
 2004 #ifdef DIAGNOSTIC
 2005                 if (blkno == BLK_SNAP && bp->b_lblkno >= 0)
 2006                         panic("ffs_copyonwrite: bad copy block");
 2007 #endif
 2008                 if (blkno != 0)
 2009                         continue;
 2010                 /*
 2011                  * Allocate the block into which to do the copy. Since
 2012                  * multiple processes may all try to copy the same block,
 2013                  * we have to recheck our need to do a copy if we sleep
 2014                  * waiting for the lock.
 2015                  *
 2016                  * Because all snapshots on a filesystem share a single
 2017                  * lock, we ensure that we will never be in competition
 2018                  * with another process to allocate a block.
 2019                  */
 2020                 if (snapshot_locked == 0 &&
 2021                     lockmgr(vp->v_vnlock,
 2022                       LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL,
 2023                       VI_MTX(devvp), td) != 0) {
 2024                         VI_LOCK(devvp);
 2025                         goto retry;
 2026                 }
 2027                 snapshot_locked = 1;
 2028                 td->td_pflags |= TDP_COWINPROGRESS;
 2029                 error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
 2030                     fs->fs_bsize, KERNCRED, 0, &cbp);
 2031                 td->td_pflags &= ~TDP_COWINPROGRESS;
 2032                 if (error)
 2033                         break;
 2034 #ifdef DEBUG
 2035                 if (snapdebug) {
 2036                         printf("Copyonwrite: snapino %d lbn %jd for ",
 2037                             ip->i_number, (intmax_t)lbn);
 2038                         if (bp->b_vp == devvp)
 2039                                 printf("fs metadata");
 2040                         else
 2041                                 printf("inum %d", VTOI(bp->b_vp)->i_number);
 2042                         printf(" lblkno %jd to blkno %jd\n",
 2043                             (intmax_t)bp->b_lblkno, (intmax_t)cbp->b_blkno);
 2044                 }
 2045 #endif
 2046                 /*
 2047                  * If we have already read the old block contents, then
 2048                  * simply copy them to the new block. Note that we need
 2049                  * to synchronously write snapshots that have not been
 2050                  * unlinked, and hence will be visible after a crash,
 2051                  * to ensure their integrity.
 2052                  */
 2053                 if (savedcbp != 0) {
 2054                         bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
 2055                         bawrite(cbp);
 2056                         if (dopersistence && ip->i_effnlink > 0)
 2057                                 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td);
 2058                         continue;
 2059                 }
 2060                 /*
 2061                  * Otherwise, read the old block contents into the buffer.
 2062                  */
 2063                 if ((error = readblock(cbp, lbn)) != 0) {
 2064                         bzero(cbp->b_data, fs->fs_bsize);
 2065                         bawrite(cbp);
 2066                         if (dopersistence && ip->i_effnlink > 0)
 2067                                 (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td);
 2068                         break;
 2069                 }
 2070                 savedcbp = cbp;
 2071         }
 2072         /*
 2073          * Note that we need to synchronously write snapshots that
 2074          * have not been unlinked, and hence will be visible after
 2075          * a crash, to ensure their integrity.
 2076          */
 2077         if (savedcbp) {
 2078                 vp = savedcbp->b_vp;
 2079                 bawrite(savedcbp);
 2080                 if (dopersistence && VTOI(vp)->i_effnlink > 0)
 2081                         (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, td);
 2082         }
 2083         if (snapshot_locked)
 2084                 VOP_UNLOCK(vp, 0, td);
 2085         else
 2086                 VI_UNLOCK(devvp);
 2087         return (error);
 2088 }
 2089 
 2090 /*
 2091  * Read the specified block into the given buffer.
 2092  * Much of this boiler-plate comes from bwrite().
 2093  */
 2094 static int
 2095 readblock(bp, lbn)
 2096         struct buf *bp;
 2097         ufs2_daddr_t lbn;
 2098 {
 2099         struct uio auio;
 2100         struct iovec aiov;
 2101         struct thread *td = curthread;
 2102         struct inode *ip = VTOI(bp->b_vp);
 2103 
 2104         aiov.iov_base = bp->b_data;
 2105         aiov.iov_len = bp->b_bcount;
 2106         auio.uio_iov = &aiov;
 2107         auio.uio_iovcnt = 1;
 2108         auio.uio_offset = dbtob(fsbtodb(ip->i_fs, blkstofrags(ip->i_fs, lbn)));
 2109         auio.uio_resid = bp->b_bcount;
 2110         auio.uio_rw = UIO_READ;
 2111         auio.uio_segflg = UIO_SYSSPACE;
 2112         auio.uio_td = td;
 2113         return (physio(ip->i_devvp->v_rdev, &auio, 0));
 2114 }
Cache object: b331614e97d93af43a3a9e84e335c991
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/ufs/ffs/ffs_snapshot.c

FreeBSD/Linux Kernel Cross Reference
sys/ufs/ffs/ffs_snapshot.c