The following corrects a bug in snapshots. The bug causes snapshots to fail to copy certain blocks which in turn causes background fsck to panic the kernel with "freeing free block". It can also lead to filesystem deadlock. Though a more extensive change than you probably want at this point, I think it is important to include. ~Kirk =-=-=-=-=-= Only the most recent snapshot contains the complete list of blocks that were copied in all of the earlier snapshots, thus its precomputed list must be used in the copyonwrite test. Using incomplete lists may lead to deadlock. Also do not include the blocks used for the indirect pointers in the indirect pointers as this may lead to inconsistent snapshots. Sponsored by: DARPA & NAI Labs. Approved by: re =-=-=-=-=-= Index: ufs/ufs/inode.h =================================================================== RCS file: /usr/ncvs/src/sys/ufs/ufs/inode.h,v retrieving revision 1.41 diff -c -r1.41 inode.h *** ufs/ufs/inode.h 2002/10/09 07:28:35 1.41 --- ufs/ufs/inode.h 2002/12/12 14:36:19 *************** *** 63,70 **** struct inode { LIST_ENTRY(inode) i_hash;/* Hash chain. */ TAILQ_ENTRY(inode) i_nextsnap; /* snapshot file list. */ - daddr_t i_snaplistsize; /* size of snapblklist. */ - daddr_t *i_snapblklist; /* list of known snapshot blocks. */ struct vnode *i_vnode;/* Vnode associated with this inode. */ struct ufsmount *i_ump;/* Ufsmount point associated with this inode. */ struct vnode *i_devvp;/* Vnode for block I/O. */ --- 63,68 ---- Index: sys/conf.h =================================================================== RCS file: /usr/ncvs/src/sys/sys/conf.h,v retrieving revision 1.150 diff -c -r1.150 conf.h *** sys/conf.h 2002/10/24 19:38:56 1.150 --- sys/conf.h 2002/12/12 14:36:53 *************** *** 72,79 **** LIST_HEAD(, cdev) si_children; LIST_ENTRY(cdev) si_siblings; dev_t si_parent; - struct snaphead si_snapshots; - int (*si_copyonwrite)(struct vnode *, struct buf *); u_int si_inode; char si_name[SPECNAMELEN + 1]; void *si_drv1, *si_drv2; --- 72,77 ---- *************** *** 92,97 **** --- 90,99 ---- struct mount *__sid_mountpoint; int __sid_bsize_phys; /* min physical block size */ int __sid_bsize_best; /* optimal block size */ + struct snaphead __sid_snapshots; + daddr_t __sid_snaplistsize; /* size of snapblklist. */ + daddr_t *__sid_snapblklist; /* known snapshot blocks. */ + int (*__sid_copyonwrite)(struct vnode *, struct buf *); } __si_disk; } __si_u; }; *************** *** 101,106 **** --- 103,112 ---- #define si_mountpoint __si_u.__si_disk.__sid_mountpoint #define si_bsize_phys __si_u.__si_disk.__sid_bsize_phys #define si_bsize_best __si_u.__si_disk.__sid_bsize_best + #define si_snapshots __si_u.__si_disk.__sid_snapshots + #define si_snaplistsize __si_u.__si_disk.__sid_snaplistsize + #define si_snapblklist __si_u.__si_disk.__sid_snapblklist + #define si_copyonwrite __si_u.__si_disk.__sid_copyonwrite /* * Special device management Index: ufs/ffs/ffs_snapshot.c =================================================================== RCS file: /usr/ncvs/src/sys/ufs/ffs/ffs_snapshot.c,v retrieving revision 1.53 diff -c -r1.53 ffs_snapshot.c *** ufs/ffs/ffs_snapshot.c 2002/12/03 18:19:27 1.53 --- ufs/ffs/ffs_snapshot.c 2002/12/13 08:14:03 *************** *** 125,134 **** int flag = mp->mnt_flag; struct timespec starttime = {0, 0}, endtime; char saved_nice = 0; ! long redo = 0; int32_t *lp; void *space; ! daddr_t *listhd; struct fs *copy_fs = NULL, *fs = VFSTOUFS(mp)->um_fs; struct snaphead *snaphead; struct thread *td = curthread; --- 125,134 ---- int flag = mp->mnt_flag; struct timespec starttime = {0, 0}, endtime; char saved_nice = 0; ! long redo = 0, snaplistsize; int32_t *lp; void *space; ! daddr_t *snapblklist; struct fs *copy_fs = NULL, *fs = VFSTOUFS(mp)->um_fs; struct snaphead *snaphead; struct thread *td = curthread; *************** *** 532,547 **** } /* * Allocate the space for the list of preallocated snapshot blocks. */ ! ip->i_snaplistsize = fragstoblks(fs, dbtofsb(fs, DIP(ip,i_blocks))) + 1; ! MALLOC(listhd, daddr_t *, ip->i_snaplistsize * sizeof(daddr_t), M_UFSMNT, M_WAITOK); ! ip->i_snapblklist = listhd; ! *ip->i_snapblklist++ = ip->i_snaplistsize; /* * Expunge the blocks used by the snapshots from the set of * blocks marked as used in the snapshot bitmaps. Also, collect ! * the list of allocated blocks in i_snapblklist. */ if (ip->i_ump->um_fstype == UFS1) error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP); --- 532,549 ---- } /* * Allocate the space for the list of preallocated snapshot blocks. + * The i_offset field is borrowed to pass the value of snapblklist + * down into the expunge functions. */ ! snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) + ! FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */; ! MALLOC(snapblklist, daddr_t *, snaplistsize * sizeof(daddr_t), M_UFSMNT, M_WAITOK); ! ((daddr_t *)(ip->i_offset)) = &snapblklist[1]; /* * Expunge the blocks used by the snapshots from the set of * blocks marked as used in the snapshot bitmaps. Also, collect ! * the list of allocated blocks in i_offset. */ if (ip->i_ump->um_fstype == UFS1) error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP); *************** *** 549,568 **** error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP); if (error) { fs->fs_snapinum[snaploc] = 0; ! FREE(listhd, M_UFSMNT); goto done; } /* * Write out the list of allocated blocks to the end of the snapshot. */ - if (ip->i_snapblklist - listhd != ip->i_snaplistsize) - printf("Snaplist mismatch, got %jd should be %jd\n", - (intmax_t)(ip->i_snapblklist - listhd), - (intmax_t)ip->i_snaplistsize); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; ! aiov.iov_base = (void *)listhd; ! aiov.iov_len = ip->i_snaplistsize * sizeof(daddr_t); auio.uio_resid = aiov.iov_len;; auio.uio_offset = ip->i_size; auio.uio_segflg = UIO_SYSSPACE; --- 551,569 ---- error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP); if (error) { fs->fs_snapinum[snaploc] = 0; ! FREE(snapblklist, M_UFSMNT); goto done; } + snaplistsize = ((daddr_t *)(ip->i_offset)) - snapblklist; + snapblklist[0] = snaplistsize; + ip->i_offset = 0; /* * Write out the list of allocated blocks to the end of the snapshot. */ auio.uio_iov = &aiov; auio.uio_iovcnt = 1; ! aiov.iov_base = (void *)snapblklist; ! aiov.iov_len = snaplistsize * sizeof(daddr_t); auio.uio_resid = aiov.iov_len;; auio.uio_offset = ip->i_size; auio.uio_segflg = UIO_SYSSPACE; *************** *** 570,579 **** auio.uio_td = td; if ((error = VOP_WRITE(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { fs->fs_snapinum[snaploc] = 0; ! FREE(listhd, M_UFSMNT); goto done; } - ip->i_snapblklist = listhd; /* * Write the superblock and its summary information * to the snapshot. --- 571,579 ---- auio.uio_td = td; if ((error = VOP_WRITE(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { fs->fs_snapinum[snaploc] = 0; ! FREE(snapblklist, M_UFSMNT); goto done; } /* * Write the superblock and its summary information * to the snapshot. *************** *** 586,599 **** if (error) { brelse(nbp); fs->fs_snapinum[snaploc] = 0; ! FREE(listhd, M_UFSMNT); ! ip->i_snapblklist = NULL; goto done; } bcopy(space, nbp->b_data, fs->fs_bsize); space = (char *)space + fs->fs_bsize; bawrite(nbp); } done: free(copy_fs->fs_csp, M_UFSMNT); bawrite(sbp); --- 586,607 ---- if (error) { brelse(nbp); fs->fs_snapinum[snaploc] = 0; ! FREE(snapblklist, M_UFSMNT); goto done; } bcopy(space, nbp->b_data, fs->fs_bsize); space = (char *)space + fs->fs_bsize; bawrite(nbp); } + /* + * As this is the newest list, it is the most inclusive, so + * should replace the previous list. + */ + VI_LOCK(devvp); + FREE(devvp->v_rdev->si_snapblklist, M_UFSMNT); + devvp->v_rdev->si_snapblklist = snapblklist; + devvp->v_rdev->si_snaplistsize = snaplistsize; + VI_UNLOCK(devvp); done: free(copy_fs->fs_csp, M_UFSMNT); bawrite(sbp); *************** *** 747,771 **** struct thread *td = curthread; struct buf *bp; - numblks = howmany(cancelip->i_size, fs->fs_bsize); - if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_db[0], - &cancelip->i_din1->di_ib[NIADDR], fs, 0, expungetype))) - return (error); - blksperindir = 1; - lbn = -NDADDR; - len = numblks - NDADDR; - rlbn = NDADDR; - for (i = 0; len > 0 && i < NIADDR; i++) { - error = indiracct_ufs1(snapvp, ITOV(cancelip), i, - cancelip->i_din1->di_ib[i], lbn, rlbn, len, - blksperindir, fs, acctfunc, expungetype); - if (error) - return (error); - blksperindir *= NINDIR(fs); - lbn -= blksperindir + 1; - len -= blksperindir; - rlbn += blksperindir; - } /* * Prepare to expunge the inode. If its inode block has not * yet been copied, then allocate and fill the copy. --- 755,760 ---- *************** *** 809,814 **** --- 798,829 ---- dip->di_flags &= ~SF_SNAPSHOT; bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs1_daddr_t)); bdwrite(bp); + /* + * Now go through and expunge all the blocks in the file + * using the function requested. + */ + numblks = howmany(cancelip->i_size, fs->fs_bsize); + if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_db[0], + &cancelip->i_din1->di_db[NDADDR], fs, 0, expungetype))) + return (error); + if ((error = (*acctfunc)(snapvp, &cancelip->i_din1->di_ib[0], + &cancelip->i_din1->di_ib[NIADDR], fs, -1, expungetype))) + return (error); + blksperindir = 1; + lbn = -NDADDR; + len = numblks - NDADDR; + rlbn = NDADDR; + for (i = 0; len > 0 && i < NIADDR; i++) { + error = indiracct_ufs1(snapvp, ITOV(cancelip), i, + cancelip->i_din1->di_ib[i], lbn, rlbn, len, + blksperindir, fs, acctfunc, expungetype); + if (error) + return (error); + blksperindir *= NINDIR(fs); + lbn -= blksperindir + 1; + len -= blksperindir; + rlbn += blksperindir; + } return (0); } *************** *** 862,868 **** MALLOC(bap, ufs1_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK); bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); bqrelse(bp); ! error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, rlbn, expungetype); if (error || level == 0) goto out; /* --- 877,884 ---- MALLOC(bap, ufs1_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK); bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); bqrelse(bp); ! error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, ! level == 0 ? rlbn : -1, expungetype); if (error || level == 0) goto out; /* *************** *** 970,975 **** --- 986,997 ---- struct inode *ip; ino_t inum; + /* + * We only care about the leaf block numbers, not the + * meta-block numbers. + */ + if (lblkno == -1) + return (0); ip = VTOI(vp); inum = ip->i_number; for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { *************** *** 977,983 **** if (blkno == 0 || blkno == BLK_NOCOPY) continue; if (expungetype == BLK_SNAP && blkno != BLK_SNAP) ! *ip->i_snapblklist++ = lblkno; if (blkno == BLK_SNAP) blkno = blkstofrags(fs, lblkno); ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum); --- 999,1005 ---- if (blkno == 0 || blkno == BLK_NOCOPY) continue; if (expungetype == BLK_SNAP && blkno != BLK_SNAP) ! *((daddr_t *)(ip->i_offset))++ = lblkno; if (blkno == BLK_SNAP) blkno = blkstofrags(fs, lblkno); ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum); *************** *** 1009,1033 **** struct thread *td = curthread; struct buf *bp; - numblks = howmany(cancelip->i_size, fs->fs_bsize); - if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_db[0], - &cancelip->i_din2->di_ib[NIADDR], fs, 0, expungetype))) - return (error); - blksperindir = 1; - lbn = -NDADDR; - len = numblks - NDADDR; - rlbn = NDADDR; - for (i = 0; len > 0 && i < NIADDR; i++) { - error = indiracct_ufs2(snapvp, ITOV(cancelip), i, - cancelip->i_din2->di_ib[i], lbn, rlbn, len, - blksperindir, fs, acctfunc, expungetype); - if (error) - return (error); - blksperindir *= NINDIR(fs); - lbn -= blksperindir + 1; - len -= blksperindir; - rlbn += blksperindir; - } /* * Prepare to expunge the inode. If its inode block has not * yet been copied, then allocate and fill the copy. --- 1031,1036 ---- *************** *** 1071,1076 **** --- 1074,1105 ---- dip->di_flags &= ~SF_SNAPSHOT; bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs2_daddr_t)); bdwrite(bp); + /* + * Now go through and expunge all the blocks in the file + * using the function requested. + */ + numblks = howmany(cancelip->i_size, fs->fs_bsize); + if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_db[0], + &cancelip->i_din2->di_db[NDADDR], fs, 0, expungetype))) + return (error); + if ((error = (*acctfunc)(snapvp, &cancelip->i_din2->di_ib[0], + &cancelip->i_din2->di_ib[NIADDR], fs, -1, expungetype))) + return (error); + blksperindir = 1; + lbn = -NDADDR; + len = numblks - NDADDR; + rlbn = NDADDR; + for (i = 0; len > 0 && i < NIADDR; i++) { + error = indiracct_ufs2(snapvp, ITOV(cancelip), i, + cancelip->i_din2->di_ib[i], lbn, rlbn, len, + blksperindir, fs, acctfunc, expungetype); + if (error) + return (error); + blksperindir *= NINDIR(fs); + lbn -= blksperindir + 1; + len -= blksperindir; + rlbn += blksperindir; + } return (0); } *************** *** 1124,1130 **** MALLOC(bap, ufs2_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK); bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); bqrelse(bp); ! error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, rlbn, expungetype); if (error || level == 0) goto out; /* --- 1153,1160 ---- MALLOC(bap, ufs2_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK); bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize); bqrelse(bp); ! error = (*acctfunc)(snapvp, &bap[0], &bap[last], fs, ! level == 0 ? rlbn : -1, expungetype); if (error || level == 0) goto out; /* *************** *** 1232,1237 **** --- 1262,1273 ---- struct inode *ip; ino_t inum; + /* + * We only care about the leaf block numbers, not the + * meta-block numbers. + */ + if (lblkno == -1) + return (0); ip = VTOI(vp); inum = ip->i_number; for ( ; oldblkp < lastblkp; oldblkp++, lblkno++) { *************** *** 1239,1245 **** if (blkno == 0 || blkno == BLK_NOCOPY) continue; if (expungetype == BLK_SNAP && blkno != BLK_SNAP) ! *ip->i_snapblklist++ = lblkno; if (blkno == BLK_SNAP) blkno = blkstofrags(fs, lblkno); ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum); --- 1275,1281 ---- if (blkno == 0 || blkno == BLK_NOCOPY) continue; if (expungetype == BLK_SNAP && blkno != BLK_SNAP) ! *((daddr_t *)(ip->i_offset))++ = lblkno; if (blkno == BLK_SNAP) blkno = blkstofrags(fs, lblkno); ffs_blkfree(fs, vp, blkno, fs->fs_bsize, inum); *************** *** 1265,1275 **** TAILQ_FOREACH(xp, &ip->i_devvp->v_rdev->si_snapshots, i_nextsnap) if (xp == ip) break; ! if (xp == 0) printf("ffs_snapgone: lost snapshot vnode %d\n", ip->i_number); - else - vrele(ITOV(ip)); /* * Delete snapshot inode from superblock. Keep list dense. */ --- 1301,1311 ---- TAILQ_FOREACH(xp, &ip->i_devvp->v_rdev->si_snapshots, i_nextsnap) if (xp == ip) break; ! if (xp != NULL) ! vrele(ITOV(ip)); ! else if (snapdebug) printf("ffs_snapgone: lost snapshot vnode %d\n", ip->i_number); /* * Delete snapshot inode from superblock. Keep list dense. */ *************** *** 1300,1306 **** struct buf *ibp; struct fs *fs; struct thread *td = curthread; ! ufs2_daddr_t numblks, blkno, dblk; int error, loc, last; ip = VTOI(vp); --- 1336,1342 ---- struct buf *ibp; struct fs *fs; struct thread *td = curthread; ! ufs2_daddr_t numblks, blkno, dblk, *snapblklist; int error, loc, last; ip = VTOI(vp); *************** *** 1326,1347 **** if (TAILQ_FIRST(&devvp->v_rdev->si_snapshots) != 0) { VI_UNLOCK(devvp); } else { devvp->v_rdev->si_copyonwrite = 0; devvp->v_vflag &= ~VV_COPYONWRITE; lockmgr(lkp, LK_DRAIN|LK_INTERLOCK, VI_MTX(devvp), td); lockmgr(lkp, LK_RELEASE, NULL, td); lockdestroy(lkp); FREE(lkp, M_UFSMNT); } } /* - * Get rid of its hints list. - */ - if (ip->i_snapblklist != NULL) { - FREE(ip->i_snapblklist, M_UFSMNT); - ip->i_snapblklist = NULL; - } - /* * Clear all BLK_NOCOPY fields. Pass any block claims to other * snapshots that want them (see ffs_snapblkfree below). */ --- 1362,1380 ---- if (TAILQ_FIRST(&devvp->v_rdev->si_snapshots) != 0) { VI_UNLOCK(devvp); } else { + snapblklist = devvp->v_rdev->si_snapblklist; + devvp->v_rdev->si_snapblklist = 0; + devvp->v_rdev->si_snaplistsize = 0; devvp->v_rdev->si_copyonwrite = 0; devvp->v_vflag &= ~VV_COPYONWRITE; lockmgr(lkp, LK_DRAIN|LK_INTERLOCK, VI_MTX(devvp), td); lockmgr(lkp, LK_RELEASE, NULL, td); lockdestroy(lkp); FREE(lkp, M_UFSMNT); + FREE(snapblklist, M_UFSMNT); } } /* * Clear all BLK_NOCOPY fields. Pass any block claims to other * snapshots that want them (see ffs_snapblkfree below). */ *************** *** 1448,1465 **** blkno = DIP(ip, i_db[lbn]); } else { if (snapshot_locked == 0 && ! lockmgr(&vp->v_lock, LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, VI_MTX(devvp), td) != 0) goto retry; td->td_proc->p_flag |= P_COWINPROGRESS; error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); td->td_proc->p_flag &= ~P_COWINPROGRESS; - if (snapshot_locked == 0) { - VI_LOCK(devvp); - lockmgr(&vp->v_lock, LK_RELEASE, NULL, td); - } if (error) break; indiroff = (lbn - NDADDR) % NINDIR(fs); --- 1481,1495 ---- blkno = DIP(ip, i_db[lbn]); } else { if (snapshot_locked == 0 && ! lockmgr(vp->v_vnlock, LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, VI_MTX(devvp), td) != 0) goto retry; + snapshot_locked = 1; td->td_proc->p_flag |= P_COWINPROGRESS; error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); td->td_proc->p_flag &= ~P_COWINPROGRESS; if (error) break; indiroff = (lbn - NDADDR) % NINDIR(fs); *************** *** 1642,1649 **** struct inode *ip, *xp; struct uio auio; struct iovec aiov; ! void *listhd; char *reason; int error, snaploc, loc; /* --- 1672,1680 ---- struct inode *ip, *xp; struct uio auio; struct iovec aiov; ! void *snapblklist; char *reason; + daddr_t snaplistsize; int error, snaploc, loc; /* *************** *** 1654,1663 **** /* * Process each snapshot listed in the superblock. */ snaphead = &devvp->v_rdev->si_snapshots; for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) { if (fs->fs_snapinum[snaploc] == 0) ! return; if ((error = VFS_VGET(mp, fs->fs_snapinum[snaploc], LK_EXCLUSIVE, &vp)) != 0){ printf("ffs_snapshot_mount: vget failed %d\n", error); --- 1685,1695 ---- /* * Process each snapshot listed in the superblock. */ + vp = NULL; snaphead = &devvp->v_rdev->si_snapshots; for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) { if (fs->fs_snapinum[snaploc] == 0) ! break; if ((error = VFS_VGET(mp, fs->fs_snapinum[snaploc], LK_EXCLUSIVE, &vp)) != 0){ printf("ffs_snapshot_mount: vget failed %d\n", error); *************** *** 1676,1681 **** --- 1708,1714 ---- printf("ffs_snapshot_mount: %s inode %d\n", reason, fs->fs_snapinum[snaploc]); vput(vp); + vp = NULL; for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) { if (fs->fs_snapinum[loc] == 0) break; *************** *** 1686,1721 **** continue; } /* - * Allocate the space for the block hints list. - */ - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - aiov.iov_base = (void *)&ip->i_snaplistsize; - aiov.iov_len = sizeof(ip->i_snaplistsize); - auio.uio_resid = aiov.iov_len; - auio.uio_offset = - lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)); - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = UIO_READ; - auio.uio_td = td; - if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { - printf("ffs_snapshot_mount: read_1 failed %d\n", error); - continue; - } - MALLOC(listhd, void *, ip->i_snaplistsize * sizeof(daddr_t), - M_UFSMNT, M_WAITOK); - auio.uio_iovcnt = 1; - aiov.iov_base = listhd; - aiov.iov_len = ip->i_snaplistsize * sizeof (daddr_t); - auio.uio_resid = aiov.iov_len; - auio.uio_offset -= sizeof(ip->i_snaplistsize); - if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { - printf("ffs_snapshot_mount: read_2 failed %d\n", error); - FREE(listhd, M_UFSMNT); - continue; - } - ip->i_snapblklist = (daddr_t *)listhd; - /* * If there already exist snapshots on this filesystem, grab a * reference to their shared lock. If this is the first snapshot * on this filesystem, we need to allocate a lock for the --- 1719,1724 ---- *************** *** 1751,1762 **** else TAILQ_INSERT_TAIL(snaphead, ip, i_nextsnap); vp->v_vflag |= VV_SYSTEM; - devvp->v_rdev->si_copyonwrite = ffs_copyonwrite; - ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_mount"); - devvp->v_vflag |= VV_COPYONWRITE; VI_UNLOCK(devvp); VOP_UNLOCK(vp, 0, td); } } /* --- 1754,1808 ---- else TAILQ_INSERT_TAIL(snaphead, ip, i_nextsnap); vp->v_vflag |= VV_SYSTEM; VI_UNLOCK(devvp); VOP_UNLOCK(vp, 0, td); } + /* + * No usable snapshots found. + */ + if (vp == NULL) + return; + /* + * Allocate the space for the block hints list. We always want to + * use the list from the newest snapshot. + */ + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + aiov.iov_base = (void *)&snaplistsize; + aiov.iov_len = sizeof(snaplistsize); + auio.uio_resid = aiov.iov_len; + auio.uio_offset = + lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)); + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_READ; + auio.uio_td = td; + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { + printf("ffs_snapshot_mount: read_1 failed %d\n", error); + VOP_UNLOCK(vp, 0, td); + return; + } + MALLOC(snapblklist, void *, snaplistsize * sizeof(daddr_t), + M_UFSMNT, M_WAITOK); + auio.uio_iovcnt = 1; + aiov.iov_base = snapblklist; + aiov.iov_len = snaplistsize * sizeof (daddr_t); + auio.uio_resid = aiov.iov_len; + auio.uio_offset -= sizeof(snaplistsize); + if ((error = VOP_READ(vp, &auio, IO_UNIT, td->td_ucred)) != 0) { + printf("ffs_snapshot_mount: read_2 failed %d\n", error); + VOP_UNLOCK(vp, 0, td); + FREE(snapblklist, M_UFSMNT); + return; + } + VOP_UNLOCK(vp, 0, td); + VI_LOCK(devvp); + ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_mount"); + devvp->v_rdev->si_snaplistsize = snaplistsize; + devvp->v_rdev->si_snapblklist = (daddr_t *)snapblklist; + devvp->v_rdev->si_copyonwrite = ffs_copyonwrite; + devvp->v_vflag |= VV_COPYONWRITE; + VI_UNLOCK(devvp); } /* *************** *** 1778,1787 **** lkp = vp->v_vnlock; vp->v_vnlock = &vp->v_lock; TAILQ_REMOVE(snaphead, xp, i_nextsnap); - if (xp->i_snapblklist != NULL) { - FREE(xp->i_snapblklist, M_UFSMNT); - xp->i_snapblklist = NULL; - } xp->i_nextsnap.tqe_prev = 0; if (xp->i_effnlink > 0) { VI_UNLOCK(devvp); --- 1824,1829 ---- *************** *** 1789,1794 **** --- 1831,1841 ---- VI_LOCK(devvp); } } + if (devvp->v_rdev->si_snapblklist != NULL) { + FREE(devvp->v_rdev->si_snapblklist, M_UFSMNT); + devvp->v_rdev->si_snapblklist = NULL; + devvp->v_rdev->si_snaplistsize = 0; + } if (lkp != NULL) { lockdestroy(lkp); FREE(lkp, M_UFSMNT); *************** *** 1814,1829 **** struct fs *fs; struct inode *ip; struct vnode *vp = 0; ! ufs2_daddr_t lbn, blkno; int lower, upper, mid, indiroff, snapshot_locked = 0, error = 0; if (td->td_proc->p_flag & P_COWINPROGRESS) panic("ffs_copyonwrite: recursive call"); VI_LOCK(devvp); snaphead = &devvp->v_rdev->si_snapshots; ip = TAILQ_FIRST(snaphead); fs = ip->i_fs; lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno)); retry: TAILQ_FOREACH(ip, snaphead, i_nextsnap) { vp = ITOV(ip); --- 1861,1899 ---- struct fs *fs; struct inode *ip; struct vnode *vp = 0; ! ufs2_daddr_t lbn, blkno, *snapblklist; int lower, upper, mid, indiroff, snapshot_locked = 0, error = 0; if (td->td_proc->p_flag & P_COWINPROGRESS) panic("ffs_copyonwrite: recursive call"); + /* + * First check to see if it is in the preallocated list. + * By doing this check we avoid several potential deadlocks. + */ VI_LOCK(devvp); snaphead = &devvp->v_rdev->si_snapshots; ip = TAILQ_FIRST(snaphead); fs = ip->i_fs; lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno)); + snapblklist = devvp->v_rdev->si_snapblklist; + upper = devvp->v_rdev->si_snaplistsize - 1; + lower = 1; + while (lower <= upper) { + mid = (lower + upper) / 2; + if (snapblklist[mid] == lbn) + break; + if (snapblklist[mid] < lbn) + lower = mid + 1; + else + upper = mid - 1; + } + if (lower <= upper) { + VI_UNLOCK(devvp); + return (0); + } + /* + * Not in the precomputed list, so check the snapshots. + */ retry: TAILQ_FOREACH(ip, snaphead, i_nextsnap) { vp = ITOV(ip); *************** *** 1836,1858 **** if (bp->b_vp == vp) continue; /* - * First check to see if it is in the preallocated list. - * By doing this check we avoid several potential deadlocks. - */ - lower = 1; - upper = ip->i_snaplistsize - 1; - while (lower <= upper) { - mid = (lower + upper) / 2; - if (ip->i_snapblklist[mid] == lbn) - break; - if (ip->i_snapblklist[mid] < lbn) - lower = mid + 1; - else - upper = mid - 1; - } - if (lower <= upper) - continue; - /* * Check to see if block needs to be copied. We do not have * to hold the snapshot lock while doing this lookup as it * will never require any additional allocations for the --- 1906,1911 ---- *************** *** 1862,1881 **** blkno = DIP(ip, i_db[lbn]); } else { if (snapshot_locked == 0 && ! lockmgr(&vp->v_lock, LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, VI_MTX(devvp), td) != 0) { VI_LOCK(devvp); goto retry; } td->td_proc->p_flag |= P_COWINPROGRESS; error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); td->td_proc->p_flag &= ~P_COWINPROGRESS; - if (snapshot_locked == 0) { - VI_LOCK(devvp); - lockmgr(&vp->v_lock, LK_RELEASE, NULL, td); - } if (error) break; indiroff = (lbn - NDADDR) % NINDIR(fs); --- 1915,1931 ---- blkno = DIP(ip, i_db[lbn]); } else { if (snapshot_locked == 0 && ! lockmgr(vp->v_vnlock, LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, VI_MTX(devvp), td) != 0) { VI_LOCK(devvp); goto retry; } + snapshot_locked = 1; td->td_proc->p_flag |= P_COWINPROGRESS; error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp); td->td_proc->p_flag &= ~P_COWINPROGRESS; if (error) break; indiroff = (lbn - NDADDR) % NINDIR(fs);