The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/fss.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: fss.c,v 1.13.2.4 2006/04/21 12:08:50 tron Exp $        */
    2 
    3 /*-
    4  * Copyright (c) 2003 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Juergen Hannken-Illjes.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed by the NetBSD
   21  *      Foundation, Inc. and its contributors.
   22  * 4. Neither the name of The NetBSD Foundation nor the names of its
   23  *    contributors may be used to endorse or promote products derived
   24  *    from this software without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   36  * POSSIBILITY OF SUCH DAMAGE.
   37  */
   38 
   39 /*
   40  * File system snapshot disk driver.
   41  *
   42  * Block/character interface to the snapshot of a mounted file system.
   43  */
   44 
   45 #include <sys/cdefs.h>
   46 __KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.13.2.4 2006/04/21 12:08:50 tron Exp $");
   47 
   48 #include "fss.h"
   49 
   50 #include <sys/param.h>
   51 #include <sys/systm.h>
   52 #include <sys/namei.h>
   53 #include <sys/proc.h>
   54 #include <sys/errno.h>
   55 #include <sys/buf.h>
   56 #include <sys/malloc.h>
   57 #include <sys/ioctl.h>
   58 #include <sys/disklabel.h>
   59 #include <sys/device.h>
   60 #include <sys/disk.h>
   61 #include <sys/stat.h>
   62 #include <sys/mount.h>
   63 #include <sys/vnode.h>
   64 #include <sys/file.h>
   65 #include <sys/uio.h>
   66 #include <sys/conf.h>
   67 #include <sys/kthread.h>
   68 
   69 #include <miscfs/specfs/specdev.h>
   70 
   71 #include <dev/fssvar.h>
   72 
   73 #include <machine/stdarg.h>
   74 
   75 #ifdef DEBUG
   76 #define FSS_STATISTICS
   77 #endif
   78 
   79 #ifdef FSS_STATISTICS
   80 struct fss_stat {
   81         u_int64_t       cow_calls;
   82         u_int64_t       cow_copied;
   83         u_int64_t       cow_cache_full;
   84         u_int64_t       indir_read;
   85         u_int64_t       indir_write;
   86 };
   87 
   88 static struct fss_stat fss_stat[NFSS];
   89 
   90 #define FSS_STAT_INC(sc, field) \
   91                         do { \
   92                                 fss_stat[sc->sc_unit].field++; \
   93                         } while (0)
   94 #define FSS_STAT_SET(sc, field, value) \
   95                         do { \
   96                                 fss_stat[sc->sc_unit].field = value; \
   97                         } while (0)
   98 #define FSS_STAT_ADD(sc, field, value) \
   99                         do { \
  100                                 fss_stat[sc->sc_unit].field += value; \
  101                         } while (0)
  102 #define FSS_STAT_VAL(sc, field) fss_stat[sc->sc_unit].field
  103 #define FSS_STAT_CLEAR(sc) \
  104                         do { \
  105                                 memset(&fss_stat[sc->sc_unit], 0, \
  106                                     sizeof(struct fss_stat)); \
  107                         } while (0)
  108 #else /* FSS_STATISTICS */
  109 #define FSS_STAT_INC(sc, field)
  110 #define FSS_STAT_SET(sc, field, value)
  111 #define FSS_STAT_ADD(sc, field, value)
  112 #define FSS_STAT_CLEAR(sc)
  113 #endif /* FSS_STATISTICS */
  114 
  115 static struct fss_softc fss_softc[NFSS];
  116 
  117 void fssattach(int);
  118 
  119 dev_type_open(fss_open);
  120 dev_type_close(fss_close);
  121 dev_type_read(fss_read);
  122 dev_type_write(fss_write);
  123 dev_type_ioctl(fss_ioctl);
  124 dev_type_strategy(fss_strategy);
  125 dev_type_dump(fss_dump);
  126 dev_type_size(fss_size);
  127 
  128 static int fss_copy_on_write(void *, struct buf *);
  129 static inline void fss_error(struct fss_softc *, const char *, ...);
  130 static int fss_create_files(struct fss_softc *, struct fss_set *,
  131     off_t *, struct proc *);
  132 static int fss_create_snapshot(struct fss_softc *, struct fss_set *,
  133     struct proc *);
  134 static int fss_delete_snapshot(struct fss_softc *, struct proc *);
  135 static int fss_softc_alloc(struct fss_softc *);
  136 static void fss_softc_free(struct fss_softc *);
  137 static void fss_cluster_iodone(struct buf *);
  138 static void fss_read_cluster(struct fss_softc *, u_int32_t);
  139 static int fss_write_cluster(struct fss_cache *, u_int32_t);
  140 static void fss_bs_thread(void *);
  141 static int fss_bmap(struct fss_softc *, off_t, int,
  142     struct vnode **, daddr_t *, int *);
  143 static int fss_bs_io(struct fss_softc *, fss_io_type,
  144     u_int32_t, off_t, int, caddr_t);
  145 static u_int32_t *fss_bs_indir(struct fss_softc *, u_int32_t);
  146 
  147 const struct bdevsw fss_bdevsw = {
  148         fss_open, fss_close, fss_strategy, fss_ioctl,
  149         fss_dump, fss_size, D_DISK
  150 };
  151 
  152 const struct cdevsw fss_cdevsw = {
  153         fss_open, fss_close, fss_read, fss_write, fss_ioctl,
  154         nostop, notty, nopoll, nommap, nokqfilter, D_DISK
  155 };
  156 
  157 void
  158 fssattach(int num)
  159 {
  160         int i;
  161         struct fss_softc *sc;
  162 
  163         for (i = 0; i < NFSS; i++) {
  164                 sc = &fss_softc[i];
  165                 sc->sc_unit = i;
  166                 sc->sc_bdev = NODEV;
  167                 simple_lock_init(&sc->sc_slock);
  168                 lockinit(&sc->sc_lock, PRIBIO, "fsslock", 0, 0);
  169                 bufq_alloc(&sc->sc_bufq, BUFQ_FCFS|BUFQ_SORT_RAWBLOCK);
  170         }
  171 }
  172 
  173 int
  174 fss_open(dev_t dev, int flags, int mode, struct proc *p)
  175 {
  176         int s, mflag;
  177         struct fss_softc *sc;
  178 
  179         mflag = (mode == S_IFCHR ? FSS_CDEV_OPEN : FSS_BDEV_OPEN);
  180 
  181         if ((sc = FSS_DEV_TO_SOFTC(dev)) == NULL)
  182                 return ENODEV;
  183 
  184         FSS_LOCK(sc, s);
  185 
  186         sc->sc_flags |= mflag;
  187 
  188         FSS_UNLOCK(sc, s);
  189 
  190         return 0;
  191 }
  192 
  193 int
  194 fss_close(dev_t dev, int flags, int mode, struct proc *p)
  195 {
  196         int s, mflag, error;
  197         struct fss_softc *sc;
  198 
  199         mflag = (mode == S_IFCHR ? FSS_CDEV_OPEN : FSS_BDEV_OPEN);
  200 
  201         if ((sc = FSS_DEV_TO_SOFTC(dev)) == NULL)
  202                 return ENODEV;
  203 
  204         FSS_LOCK(sc, s); 
  205 
  206         if ((sc->sc_flags & (FSS_CDEV_OPEN|FSS_BDEV_OPEN)) == mflag) {
  207                 if ((sc->sc_uflags & FSS_UNCONFIG_ON_CLOSE) != 0 &&
  208                     (sc->sc_flags & FSS_ACTIVE) != 0) {
  209                         FSS_UNLOCK(sc, s);
  210                         error = fss_ioctl(dev, FSSIOCCLR, NULL, FWRITE, p);
  211                         if (error)
  212                                 return error;
  213                         FSS_LOCK(sc, s);
  214                 }
  215                 sc->sc_uflags &= ~FSS_UNCONFIG_ON_CLOSE;
  216         }
  217 
  218         sc->sc_flags &= ~mflag;
  219 
  220         FSS_UNLOCK(sc, s);
  221 
  222         return 0;
  223 }
  224 
  225 void
  226 fss_strategy(struct buf *bp)
  227 {
  228         int s;
  229         struct fss_softc *sc;
  230 
  231         sc = FSS_DEV_TO_SOFTC(bp->b_dev);
  232 
  233         FSS_LOCK(sc, s);
  234 
  235         if ((bp->b_flags & B_READ) != B_READ ||
  236             sc == NULL || !FSS_ISVALID(sc)) {
  237 
  238                 FSS_UNLOCK(sc, s);
  239 
  240                 bp->b_error = (sc == NULL ? ENODEV : EROFS);
  241                 bp->b_flags |= B_ERROR;
  242                 bp->b_resid = bp->b_bcount;
  243                 biodone(bp);
  244                 return;
  245         }
  246 
  247         bp->b_rawblkno = bp->b_blkno;
  248         BUFQ_PUT(&sc->sc_bufq, bp);
  249         wakeup(&sc->sc_bs_proc);
  250 
  251         FSS_UNLOCK(sc, s);
  252 }
  253 
  254 int
  255 fss_read(dev_t dev, struct uio *uio, int flags)
  256 {
  257         return physio(fss_strategy, NULL, dev, B_READ, minphys, uio);
  258 }
  259 
  260 int
  261 fss_write(dev_t dev, struct uio *uio, int flags)
  262 {
  263         return physio(fss_strategy, NULL, dev, B_WRITE, minphys, uio);
  264 }
  265 
  266 int
  267 fss_ioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
  268 {
  269         int error;
  270         struct fss_softc *sc;
  271         struct fss_set *fss = (struct fss_set *)data;
  272         struct fss_get *fsg = (struct fss_get *)data;
  273 
  274         if ((sc = FSS_DEV_TO_SOFTC(dev)) == NULL)
  275                 return ENODEV;
  276 
  277         switch (cmd) {
  278         case FSSIOCSET:
  279                 lockmgr(&sc->sc_lock, LK_EXCLUSIVE, NULL);
  280                 if ((flag & FWRITE) == 0)
  281                         error = EPERM;
  282                 else if ((sc->sc_flags & FSS_ACTIVE) != 0)
  283                         error = EBUSY;
  284                 else
  285                         error = fss_create_snapshot(sc, fss, p);
  286                 lockmgr(&sc->sc_lock, LK_RELEASE, NULL);
  287                 break;
  288 
  289         case FSSIOCCLR:
  290                 lockmgr(&sc->sc_lock, LK_EXCLUSIVE, NULL);
  291                 if ((flag & FWRITE) == 0)
  292                         error = EPERM;
  293                 else if ((sc->sc_flags & FSS_ACTIVE) == 0)
  294                         error = ENXIO;
  295                 else
  296                         error = fss_delete_snapshot(sc, p);
  297                 lockmgr(&sc->sc_lock, LK_RELEASE, NULL);
  298                 break;
  299 
  300         case FSSIOCGET:
  301                 lockmgr(&sc->sc_lock, LK_EXCLUSIVE, NULL);
  302                 switch (sc->sc_flags & (FSS_PERSISTENT | FSS_ACTIVE)) {
  303                 case FSS_ACTIVE:
  304                         memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN);
  305                         fsg->fsg_csize = FSS_CLSIZE(sc);
  306                         fsg->fsg_time = sc->sc_time;
  307                         fsg->fsg_mount_size = sc->sc_clcount;
  308                         fsg->fsg_bs_size = sc->sc_clnext;
  309                         error = 0;
  310                         break;
  311                 case FSS_PERSISTENT | FSS_ACTIVE:
  312                         memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN);
  313                         fsg->fsg_csize = 0;
  314                         fsg->fsg_time = sc->sc_time;
  315                         fsg->fsg_mount_size = 0;
  316                         fsg->fsg_bs_size = 0;
  317                         error = 0;
  318                         break;
  319                 default:
  320                         error = ENXIO;
  321                         break;
  322                 }
  323                 lockmgr(&sc->sc_lock, LK_RELEASE, NULL);
  324                 break;
  325 
  326         case FSSIOFSET:
  327                 sc->sc_uflags = *(int *)data;
  328                 error = 0;
  329                 break;
  330 
  331         case FSSIOFGET:
  332                 *(int *)data = sc->sc_uflags;
  333                 error = 0;
  334                 break;
  335 
  336         default:
  337                 error = EINVAL;
  338                 break;
  339         }
  340 
  341         return error;
  342 }
  343 
  344 int
  345 fss_size(dev_t dev)
  346 {
  347         return -1;
  348 }
  349 
  350 int
  351 fss_dump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
  352 {
  353         return EROFS;
  354 }
  355 
  356 /*
  357  * An error occurred reading or writing the snapshot or backing store.
  358  * If it is the first error log to console.
  359  * The caller holds the simplelock.
  360  */
  361 static inline void
  362 fss_error(struct fss_softc *sc, const char *fmt, ...)
  363 {
  364         va_list ap;
  365 
  366         if ((sc->sc_flags & (FSS_ACTIVE|FSS_ERROR)) == FSS_ACTIVE) {
  367                 va_start(ap, fmt);
  368                 printf("fss%d: snapshot invalid: ", sc->sc_unit);
  369                 vprintf(fmt, ap);
  370                 printf("\n");
  371                 va_end(ap);
  372         }
  373         if ((sc->sc_flags & FSS_ACTIVE) == FSS_ACTIVE)
  374                 sc->sc_flags |= FSS_ERROR;
  375 }
  376 
  377 /*
  378  * Allocate the variable sized parts of the softc and
  379  * fork the kernel thread.
  380  *
  381  * The fields sc_clcount, sc_clshift, sc_cache_size and sc_indir_size
  382  * must be initialized.
  383  */
  384 static int
  385 fss_softc_alloc(struct fss_softc *sc)
  386 {
  387         int i, len, error;
  388 
  389         len = (sc->sc_clcount+NBBY-1)/NBBY;
  390         sc->sc_copied = malloc(len, M_TEMP, M_ZERO|M_WAITOK|M_CANFAIL);
  391         if (sc->sc_copied == NULL)
  392                 return(ENOMEM);
  393 
  394         len = sc->sc_cache_size*sizeof(struct fss_cache);
  395         sc->sc_cache = malloc(len, M_TEMP, M_ZERO|M_WAITOK|M_CANFAIL);
  396         if (sc->sc_cache == NULL)
  397                 return(ENOMEM);
  398 
  399         len = FSS_CLSIZE(sc);
  400         for (i = 0; i < sc->sc_cache_size; i++) {
  401                 sc->sc_cache[i].fc_type = FSS_CACHE_FREE;
  402                 sc->sc_cache[i].fc_softc = sc;
  403                 sc->sc_cache[i].fc_xfercount = 0;
  404                 sc->sc_cache[i].fc_data = malloc(len, M_TEMP,
  405                     M_WAITOK|M_CANFAIL);
  406                 if (sc->sc_cache[i].fc_data == NULL)
  407                         return(ENOMEM);
  408         }
  409 
  410         len = (sc->sc_indir_size+NBBY-1)/NBBY;
  411         sc->sc_indir_valid = malloc(len, M_TEMP, M_ZERO|M_WAITOK|M_CANFAIL);
  412         if (sc->sc_indir_valid == NULL)
  413                 return(ENOMEM);
  414 
  415         len = FSS_CLSIZE(sc);
  416         sc->sc_indir_data = malloc(len, M_TEMP, M_ZERO|M_WAITOK|M_CANFAIL);
  417         if (sc->sc_indir_data == NULL)
  418                 return(ENOMEM);
  419 
  420         if ((error = kthread_create1(fss_bs_thread, sc, &sc->sc_bs_proc,
  421             "fssbs%d", sc->sc_unit)) != 0)
  422                 return error;
  423 
  424         sc->sc_flags |= FSS_BS_THREAD;
  425         return 0;
  426 }
  427 
  428 /*
  429  * Free the variable sized parts of the softc.
  430  */
  431 static void
  432 fss_softc_free(struct fss_softc *sc)
  433 {
  434         int s, i;
  435 
  436         if ((sc->sc_flags & FSS_BS_THREAD) != 0) {
  437                 FSS_LOCK(sc, s);
  438                 sc->sc_flags &= ~FSS_BS_THREAD;
  439                 wakeup(&sc->sc_bs_proc);
  440                 while (sc->sc_bs_proc != NULL)
  441                         ltsleep(&sc->sc_bs_proc, PRIBIO, "fssthread", 0,
  442                             &sc->sc_slock);
  443                 FSS_UNLOCK(sc, s);
  444         }
  445 
  446         if (sc->sc_copied != NULL)
  447                 free(sc->sc_copied, M_TEMP);
  448         sc->sc_copied = NULL;
  449 
  450         if (sc->sc_cache != NULL) {
  451                 for (i = 0; i < sc->sc_cache_size; i++)
  452                         if (sc->sc_cache[i].fc_data != NULL)
  453                                 free(sc->sc_cache[i].fc_data, M_TEMP);
  454                 free(sc->sc_cache, M_TEMP);
  455         }
  456         sc->sc_cache = NULL;
  457 
  458         if (sc->sc_indir_valid != NULL)
  459                 free(sc->sc_indir_valid, M_TEMP);
  460         sc->sc_indir_valid = NULL;
  461 
  462         if (sc->sc_indir_data != NULL)
  463                 free(sc->sc_indir_data, M_TEMP);
  464         sc->sc_indir_data = NULL;
  465 }
  466 
  467 /*
  468  * Check if an unmount is ok. If forced, set this snapshot into ERROR state.
  469  */
  470 int
  471 fss_umount_hook(struct mount *mp, int forced)
  472 {
  473         int i, s;
  474 
  475         for (i = 0; i < NFSS; i++) {
  476                 FSS_LOCK(&fss_softc[i], s);
  477                 if ((fss_softc[i].sc_flags & FSS_ACTIVE) != 0 &&
  478                     fss_softc[i].sc_mount == mp) {
  479                         if (forced)
  480                                 fss_error(&fss_softc[i], "forced unmount");
  481                         else {
  482                                 FSS_UNLOCK(&fss_softc[i], s);
  483                                 return EBUSY;
  484                         }
  485                 }
  486                 FSS_UNLOCK(&fss_softc[i], s);
  487         }
  488 
  489         return 0;
  490 }
  491 
  492 /*
  493  * A buffer is written to the snapshotted block device. Copy to
  494  * backing store if needed.
  495  */
  496 static int
  497 fss_copy_on_write(void *v, struct buf *bp)
  498 {
  499         int s;
  500         u_int32_t cl, ch, c;
  501         struct fss_softc *sc = v;
  502 
  503         FSS_LOCK(sc, s);
  504         if (!FSS_ISVALID(sc)) {
  505                 FSS_UNLOCK(sc, s);
  506                 return 0;
  507         }
  508 
  509         FSS_UNLOCK(sc, s);
  510 
  511         FSS_STAT_INC(sc, cow_calls);
  512 
  513         cl = FSS_BTOCL(sc, dbtob(bp->b_blkno));
  514         ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1);
  515 
  516         for (c = cl; c <= ch; c++)
  517                 fss_read_cluster(sc, c);
  518 
  519         return 0;
  520 }
  521 
  522 /*
  523  * Lookup and open needed files.
  524  *
  525  * For file system internal snapshot initializes sc_mntname, sc_mount,
  526  * sc_bs_vp and sc_time.
  527  *
  528  * Otherwise returns dev and size of the underlying block device.
  529  * Initializes sc_mntname, sc_mount_vp, sc_bdev, sc_bs_vp and sc_mount
  530  */
  531 static int
  532 fss_create_files(struct fss_softc *sc, struct fss_set *fss,
  533     off_t *bsize, struct proc *p)
  534 {
  535         int error, bits, fsbsize;
  536         struct timespec ts;
  537         struct partinfo dpart;
  538         struct vattr va;
  539         struct nameidata nd;
  540 
  541         /*
  542          * Get the mounted file system.
  543          */
  544 
  545         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fss->fss_mount, p);
  546         if ((error = namei(&nd)) != 0)
  547                 return error;
  548 
  549         if ((nd.ni_vp->v_flag & VROOT) != VROOT) {
  550                 vrele(nd.ni_vp);
  551                 return EINVAL;
  552         }
  553 
  554         sc->sc_mount = nd.ni_vp->v_mount;
  555         memcpy(sc->sc_mntname, sc->sc_mount->mnt_stat.f_mntonname, MNAMELEN);
  556 
  557         vrele(nd.ni_vp);
  558 
  559         /*
  560          * Check for file system internal snapshot.
  561          */
  562 
  563         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fss->fss_bstore, p);
  564         if ((error = namei(&nd)) != 0)
  565                 return error;
  566 
  567         if (nd.ni_vp->v_type == VREG && nd.ni_vp->v_mount == sc->sc_mount) {
  568                 vrele(nd.ni_vp);
  569                 sc->sc_flags |= FSS_PERSISTENT;
  570 
  571                 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fss->fss_bstore, p);
  572                 if ((error = vn_open(&nd, FREAD, 0)) != 0)
  573                         return error;
  574                 sc->sc_bs_vp = nd.ni_vp;
  575 
  576                 fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize;
  577                 bits = sizeof(sc->sc_bs_bshift)*NBBY;
  578                 for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < bits;
  579                     sc->sc_bs_bshift++)
  580                         if (FSS_FSBSIZE(sc) == fsbsize)
  581                                 break;
  582                 if (sc->sc_bs_bshift >= bits) {
  583                         VOP_UNLOCK(sc->sc_bs_vp, 0);
  584                         return EINVAL;
  585                 }
  586 
  587                 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1;
  588                 sc->sc_clshift = 0;
  589 
  590                 error = VFS_SNAPSHOT(sc->sc_mount, sc->sc_bs_vp, &ts);
  591                 TIMESPEC_TO_TIMEVAL(&sc->sc_time, &ts);
  592 
  593                 VOP_UNLOCK(sc->sc_bs_vp, 0);
  594 
  595                 return error;
  596         }
  597         vrele(nd.ni_vp);
  598 
  599         /*
  600          * Get the block device it is mounted on.
  601          */
  602 
  603         NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE,
  604             sc->sc_mount->mnt_stat.f_mntfromname, p);
  605         if ((error = namei(&nd)) != 0)
  606                 return error;
  607 
  608         if (nd.ni_vp->v_type != VBLK) {
  609                 vrele(nd.ni_vp);
  610                 return EINVAL;
  611         }
  612 
  613         error = VOP_IOCTL(nd.ni_vp, DIOCGPART, &dpart, FREAD, p->p_ucred, p);
  614         if (error) {
  615                 vrele(nd.ni_vp);
  616                 return error;
  617         }
  618 
  619         sc->sc_mount_vp = nd.ni_vp;
  620         sc->sc_bdev = nd.ni_vp->v_rdev;
  621         *bsize = (off_t)dpart.disklab->d_secsize*dpart.part->p_size;
  622         vrele(nd.ni_vp);
  623 
  624         /*
  625          * Get the backing store
  626          */
  627 
  628         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fss->fss_bstore, p);
  629         if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0)
  630                 return error;
  631         VOP_UNLOCK(nd.ni_vp, 0);
  632 
  633         sc->sc_bs_vp = nd.ni_vp;
  634 
  635         if (nd.ni_vp->v_type != VREG && nd.ni_vp->v_type != VCHR)
  636                 return EINVAL;
  637 
  638         if (sc->sc_bs_vp->v_type == VREG) {
  639                 error = VOP_GETATTR(sc->sc_bs_vp, &va, p->p_ucred, p);
  640                 if (error != 0)
  641                         return error;
  642                 sc->sc_bs_size = va.va_size;
  643                 fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize;
  644                 if (fsbsize & (fsbsize-1))      /* No power of two */
  645                         return EINVAL;
  646                 for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < 32;
  647                     sc->sc_bs_bshift++)
  648                         if (FSS_FSBSIZE(sc) == fsbsize)
  649                                 break;
  650                 if (sc->sc_bs_bshift >= 32)
  651                         return EINVAL;
  652                 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1;
  653                 sc->sc_flags |= FSS_BS_ALLOC;
  654         } else {
  655                 sc->sc_bs_bshift = DEV_BSHIFT;
  656                 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1;
  657                 sc->sc_flags &= ~FSS_BS_ALLOC;
  658         }
  659 
  660         /*
  661          * As all IO to from/to the backing store goes through
  662          * VOP_STRATEGY() clean the buffer cache to prevent
  663          * cache incoherencies.
  664          */
  665         if ((error = vinvalbuf(sc->sc_bs_vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0)
  666                 return error;
  667 
  668         return 0;
  669 }
  670 
  671 /*
  672  * Create a snapshot.
  673  */
  674 static int
  675 fss_create_snapshot(struct fss_softc *sc, struct fss_set *fss, struct proc *p)
  676 {
  677         int len, error;
  678         u_int32_t csize;
  679         off_t bsize;
  680 
  681         /*
  682          * Open needed files.
  683          */
  684         if ((error = fss_create_files(sc, fss, &bsize, p)) != 0)
  685                 goto bad;
  686 
  687         if (sc->sc_flags & FSS_PERSISTENT) {
  688                 fss_softc_alloc(sc);
  689                 sc->sc_flags |= FSS_ACTIVE;
  690                 return 0;
  691         }
  692 
  693         /*
  694          * Set cluster size. Must be a power of two and
  695          * a multiple of backing store block size.
  696          */
  697         if (fss->fss_csize <= 0)
  698                 csize = MAXPHYS;
  699         else
  700                 csize = fss->fss_csize;
  701         if (bsize/csize > FSS_CLUSTER_MAX)
  702                 csize = bsize/FSS_CLUSTER_MAX+1;
  703 
  704         for (sc->sc_clshift = sc->sc_bs_bshift; sc->sc_clshift < 32;
  705             sc->sc_clshift++)
  706                 if (FSS_CLSIZE(sc) >= csize)
  707                         break;
  708         if (sc->sc_clshift >= 32) {
  709                 error = EINVAL;
  710                 goto bad;
  711         }
  712         sc->sc_clmask = FSS_CLSIZE(sc)-1;
  713 
  714         /*
  715          * Set number of cache slots.
  716          */
  717         if (FSS_CLSIZE(sc) <= 8192)
  718                 sc->sc_cache_size = 32;
  719         else if (FSS_CLSIZE(sc) <= 65536)
  720                 sc->sc_cache_size = 8;
  721         else
  722                 sc->sc_cache_size = 4;
  723 
  724         /*
  725          * Set number of clusters and size of last cluster.
  726          */
  727         sc->sc_clcount = FSS_BTOCL(sc, bsize-1)+1;
  728         sc->sc_clresid = FSS_CLOFF(sc, bsize-1)+1;
  729 
  730         /*
  731          * Set size of indirect table.
  732          */
  733         len = sc->sc_clcount*sizeof(u_int32_t);
  734         sc->sc_indir_size = FSS_BTOCL(sc, len)+1;
  735         sc->sc_clnext = sc->sc_indir_size;
  736         sc->sc_indir_cur = 0;
  737 
  738         if ((error = fss_softc_alloc(sc)) != 0)
  739                 goto bad;
  740 
  741         /*
  742          * Activate the snapshot.
  743          */
  744 
  745         if ((error = vfs_write_suspend(sc->sc_mount, PUSER|PCATCH, 0)) != 0)
  746                 goto bad;
  747 
  748         microtime(&sc->sc_time);
  749 
  750         if (error == 0)
  751                 error = vn_cow_establish(sc->sc_mount_vp,
  752                     fss_copy_on_write, sc);
  753         if (error == 0)
  754                 sc->sc_flags |= FSS_ACTIVE;
  755 
  756         vfs_write_resume(sc->sc_mount);
  757 
  758         if (error != 0)
  759                 goto bad;
  760 
  761 #ifdef DEBUG
  762         printf("fss%d: %s snapshot active\n", sc->sc_unit, sc->sc_mntname);
  763         printf("fss%d: %u clusters of %u, %u cache slots, %u indir clusters\n",
  764             sc->sc_unit, sc->sc_clcount, FSS_CLSIZE(sc),
  765             sc->sc_cache_size, sc->sc_indir_size);
  766 #endif
  767 
  768         return 0;
  769 
  770 bad:
  771         fss_softc_free(sc);
  772         if (sc->sc_bs_vp != NULL) {
  773                 if (sc->sc_flags & FSS_PERSISTENT)
  774                         vn_close(sc->sc_bs_vp, FREAD, p->p_ucred, p);
  775                 else
  776                         vn_close(sc->sc_bs_vp, FREAD|FWRITE, p->p_ucred, p);
  777         }
  778         sc->sc_bs_vp = NULL;
  779 
  780         return error;
  781 }
  782 
  783 /*
  784  * Delete a snapshot.
  785  */
  786 static int
  787 fss_delete_snapshot(struct fss_softc *sc, struct proc *p)
  788 {
  789         int s;
  790 
  791         if ((sc->sc_flags & FSS_PERSISTENT) == 0)
  792                 vn_cow_disestablish(sc->sc_mount_vp, fss_copy_on_write, sc);
  793 
  794         FSS_LOCK(sc, s);
  795         sc->sc_flags &= ~(FSS_ACTIVE|FSS_ERROR);
  796         sc->sc_mount = NULL;
  797         sc->sc_bdev = NODEV;
  798         FSS_UNLOCK(sc, s);
  799 
  800         fss_softc_free(sc);
  801         if (sc->sc_flags & FSS_PERSISTENT)
  802                 vn_close(sc->sc_bs_vp, FREAD, p->p_ucred, p);
  803         else
  804                 vn_close(sc->sc_bs_vp, FREAD|FWRITE, p->p_ucred, p);
  805         sc->sc_bs_vp = NULL;
  806         sc->sc_flags &= ~(FSS_PERSISTENT|FSS_BS_ALLOC);
  807 
  808         FSS_STAT_CLEAR(sc);
  809 
  810         return 0;
  811 }
  812 
  813 /*
  814  * Get the block address and number of contiguous blocks.
  815  * If the file contains a hole, try to allocate.
  816  * Backing store is locked by caller.
  817  */
  818 static int
  819 fss_bmap(struct fss_softc *sc, off_t start, int len,
  820     struct vnode **vpp, daddr_t *bnp, int *runp)
  821 {
  822         int l, s, error;
  823         struct buf *bp, **bpp;
  824 
  825         if ((sc->sc_bs_vp->v_mount->mnt_flag & MNT_SOFTDEP) != 0)
  826                 bpp = &bp;
  827         else
  828                 bpp = NULL;
  829 
  830         error = VOP_BMAP(sc->sc_bs_vp, FSS_BTOFSB(sc, start), vpp, bnp, runp);
  831         if ((error == 0 && *bnp != (daddr_t)-1) ||
  832             (sc->sc_flags & FSS_BS_ALLOC) == 0)
  833                 goto out;
  834 
  835         if (start+len >= sc->sc_bs_size) {
  836                 error = ENOSPC;
  837                 goto out;
  838         }
  839 
  840         for (l = 0; l < len; l += FSS_FSBSIZE(sc)) {
  841                 error = VOP_BALLOC(sc->sc_bs_vp, start+l, FSS_FSBSIZE(sc),
  842                     sc->sc_bs_proc->p_ucred, 0, bpp);
  843                 if (error)
  844                         goto out;
  845 
  846                 if (bpp == NULL)
  847                         continue;
  848 
  849                 s = splbio();
  850                 simple_lock(&bp->b_interlock);
  851 
  852                 if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_start)
  853                         (*bioops.io_start)(bp);
  854                 if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_complete)
  855                         (*bioops.io_complete)(bp);
  856 
  857                 bp->b_flags |= B_INVAL;
  858                 simple_unlock(&bp->b_interlock);
  859                 splx(s);
  860 
  861                 brelse(bp);
  862         }
  863 
  864         error = VOP_BMAP(sc->sc_bs_vp, FSS_BTOFSB(sc, start), vpp, bnp, runp);
  865 
  866 out:
  867 
  868         if ((sc->sc_flags & FSS_PERSISTENT) == 0 &&
  869             error == 0 && *bnp == (daddr_t)-1)
  870                 error = ENOSPC;
  871 
  872         return error;
  873 }
  874 
  875 /*
  876  * A read from the snapshotted block device has completed.
  877  */
  878 static void
  879 fss_cluster_iodone(struct buf *bp)
  880 {
  881         int s;
  882         struct fss_cache *scp = bp->b_private;
  883 
  884         FSS_LOCK(scp->fc_softc, s);
  885 
  886         if (bp->b_flags & B_EINTR)
  887                 fss_error(scp->fc_softc, "fs read interrupted");
  888         if (bp->b_flags & B_ERROR)
  889                 fss_error(scp->fc_softc, "fs read error %d", bp->b_error);
  890 
  891         if (bp->b_vp != NULL)
  892                 brelvp(bp);
  893 
  894         if (--scp->fc_xfercount == 0)
  895                 wakeup(&scp->fc_data);
  896 
  897         FSS_UNLOCK(scp->fc_softc, s);
  898 
  899         s = splbio();
  900         pool_put(&bufpool, bp);
  901         splx(s);
  902 }
  903 
  904 /*
  905  * Read a cluster from the snapshotted block device to the cache.
  906  */
  907 static void
  908 fss_read_cluster(struct fss_softc *sc, u_int32_t cl)
  909 {
  910         int s, todo, len;
  911         caddr_t addr;
  912         daddr_t dblk;
  913         struct buf *bp;
  914         struct fss_cache *scp, *scl;
  915 
  916         /*
  917          * Get a free cache slot.
  918          */
  919         scl = sc->sc_cache+sc->sc_cache_size;
  920 
  921         FSS_LOCK(sc, s);
  922 
  923 restart:
  924         if (isset(sc->sc_copied, cl) || !FSS_ISVALID(sc)) {
  925                 FSS_UNLOCK(sc, s);
  926                 return;
  927         }
  928 
  929         for (scp = sc->sc_cache; scp < scl; scp++)
  930                 if (scp->fc_type != FSS_CACHE_FREE &&
  931                     scp->fc_cluster == cl) {
  932                         ltsleep(&scp->fc_type, PRIBIO, "cowwait2", 0,
  933                             &sc->sc_slock);
  934                         goto restart;
  935                 }
  936 
  937         for (scp = sc->sc_cache; scp < scl; scp++)
  938                 if (scp->fc_type == FSS_CACHE_FREE) {
  939                         scp->fc_type = FSS_CACHE_BUSY;
  940                         scp->fc_cluster = cl;
  941                         break;
  942                 }
  943         if (scp >= scl) {
  944                 FSS_STAT_INC(sc, cow_cache_full);
  945                 ltsleep(&sc->sc_cache, PRIBIO, "cowwait3", 0, &sc->sc_slock);
  946                 goto restart;
  947         }
  948 
  949         FSS_UNLOCK(sc, s);
  950 
  951         /*
  952          * Start the read.
  953          */
  954         FSS_STAT_INC(sc, cow_copied);
  955 
  956         dblk = btodb(FSS_CLTOB(sc, cl));
  957         addr = scp->fc_data;
  958         if (cl == sc->sc_clcount-1) {
  959                 todo = sc->sc_clresid;
  960                 memset(addr+todo, 0, FSS_CLSIZE(sc)-todo);
  961         } else
  962                 todo = FSS_CLSIZE(sc);
  963         while (todo > 0) {
  964                 len = todo;
  965                 if (len > MAXPHYS)
  966                         len = MAXPHYS;
  967 
  968                 s = splbio();
  969                 bp = pool_get(&bufpool, PR_WAITOK);
  970                 splx(s);
  971 
  972                 BUF_INIT(bp);
  973                 bp->b_flags = B_READ|B_CALL;
  974                 bp->b_bcount = len;
  975                 bp->b_bufsize = bp->b_bcount;
  976                 bp->b_error = 0;
  977                 bp->b_data = addr;
  978                 bp->b_blkno = bp->b_rawblkno = dblk;
  979                 bp->b_proc = NULL;
  980                 bp->b_dev = sc->sc_bdev;
  981                 bp->b_vp = NULLVP;
  982                 bp->b_private = scp;
  983                 bp->b_iodone = fss_cluster_iodone;
  984 
  985                 DEV_STRATEGY(bp);
  986 
  987                 FSS_LOCK(sc, s);
  988                 scp->fc_xfercount++;
  989                 FSS_UNLOCK(sc, s);
  990 
  991                 dblk += btodb(len);
  992                 addr += len;
  993                 todo -= len;
  994         }
  995 
  996         /*
  997          * Wait for all read requests to complete.
  998          */
  999         FSS_LOCK(sc, s);
 1000         while (scp->fc_xfercount > 0)
 1001                 ltsleep(&scp->fc_data, PRIBIO, "cowwait", 0, &sc->sc_slock);
 1002 
 1003         scp->fc_type = FSS_CACHE_VALID;
 1004         setbit(sc->sc_copied, scp->fc_cluster);
 1005         FSS_UNLOCK(sc, s);
 1006 
 1007         wakeup(&sc->sc_bs_proc);
 1008 }
 1009 
 1010 /*
 1011  * Write a cluster from the cache to the backing store.
 1012  */
 1013 static int
 1014 fss_write_cluster(struct fss_cache *scp, u_int32_t cl)
 1015 {
 1016         int s, error, todo, len, nra;
 1017         daddr_t nbn;
 1018         caddr_t addr;
 1019         off_t pos;
 1020         struct buf *bp;
 1021         struct vnode *vp;
 1022         struct fss_softc *sc;
 1023 
 1024         error = 0;
 1025         sc = scp->fc_softc;
 1026 
 1027         pos = FSS_CLTOB(sc, cl);
 1028         addr = scp->fc_data;
 1029         todo = FSS_CLSIZE(sc);
 1030 
 1031         vn_lock(sc->sc_bs_vp, LK_EXCLUSIVE|LK_RETRY);
 1032         simple_lock(&sc->sc_bs_vp->v_interlock);
 1033         error = VOP_PUTPAGES(sc->sc_bs_vp, trunc_page(pos),
 1034             round_page(pos+todo), PGO_CLEANIT|PGO_SYNCIO|PGO_FREE);
 1035 
 1036         while (error == 0 && todo > 0) {
 1037                 error = fss_bmap(sc, pos, todo, &vp, &nbn, &nra);
 1038                 if (error)
 1039                         break;
 1040 
 1041                 len = FSS_FSBTOB(sc, nra+1)-FSS_FSBOFF(sc, pos);
 1042                 if (len > todo)
 1043                         len = todo;
 1044 
 1045                 s = splbio();
 1046                 bp = pool_get(&bufpool, PR_WAITOK);
 1047                 splx(s);
 1048 
 1049                 BUF_INIT(bp);
 1050                 bp->b_flags = B_CALL;
 1051                 bp->b_bcount = len;
 1052                 bp->b_bufsize = bp->b_bcount;
 1053                 bp->b_error = 0;
 1054                 bp->b_data = addr;
 1055                 bp->b_blkno = bp->b_rawblkno = nbn+btodb(FSS_FSBOFF(sc, pos));
 1056                 bp->b_proc = NULL;
 1057                 bp->b_vp = NULLVP;
 1058                 bp->b_private = scp;
 1059                 bp->b_iodone = fss_cluster_iodone;
 1060                 bgetvp(vp, bp);
 1061                 bp->b_vp->v_numoutput++;
 1062 
 1063                 BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
 1064                 VOP_STRATEGY(vp, bp);
 1065 
 1066                 FSS_LOCK(sc, s);
 1067                 scp->fc_xfercount++;
 1068                 FSS_UNLOCK(sc, s);
 1069 
 1070                 pos += len;
 1071                 addr += len;
 1072                 todo -= len;
 1073         }
 1074 
 1075         /*
 1076          * Wait for all write requests to complete.
 1077          */
 1078         FSS_LOCK(sc, s);
 1079         while (scp->fc_xfercount > 0)
 1080                 ltsleep(&scp->fc_data, PRIBIO, "bswwait", 0, &sc->sc_slock);
 1081         FSS_UNLOCK(sc, s);
 1082 
 1083         VOP_UNLOCK(sc->sc_bs_vp, 0);
 1084 
 1085         return error;
 1086 }
 1087 
 1088 /*
 1089  * Read/write clusters from/to backing store.
 1090  * For persistent snapshots must be called with cl == 0. off is the
 1091  * offset into the snapshot.
 1092  */
 1093 static int
 1094 fss_bs_io(struct fss_softc *sc, fss_io_type rw,
 1095     u_int32_t cl, off_t off, int len, caddr_t data)
 1096 {
 1097         int s, error, todo, count, nra;
 1098         off_t pos;
 1099         daddr_t nbn;
 1100         struct buf *bp;
 1101         struct vnode *vp;
 1102 
 1103         todo = len;
 1104         pos = FSS_CLTOB(sc, cl)+off;
 1105         error = 0;
 1106 
 1107         vn_lock(sc->sc_bs_vp, LK_EXCLUSIVE|LK_RETRY);
 1108         simple_lock(&sc->sc_bs_vp->v_interlock);
 1109         error = VOP_PUTPAGES(sc->sc_bs_vp, trunc_page(pos),
 1110             round_page(pos+todo), PGO_CLEANIT|PGO_SYNCIO|PGO_FREE);
 1111 
 1112         while (error == 0 && todo > 0) {
 1113                 error = fss_bmap(sc, pos, todo, &vp, &nbn, &nra);
 1114                 if (error)
 1115                         break;
 1116 
 1117                 count = FSS_FSBTOB(sc, nra+1)-FSS_FSBOFF(sc, pos);
 1118                 if (count > todo)
 1119                         count = todo;
 1120 
 1121                 if (nbn == (daddr_t)-1) {
 1122                         bzero(data, count);
 1123                         todo -= count;
 1124                         data += count;
 1125                         pos += count;
 1126                         continue;
 1127                 }
 1128 
 1129                 s = splbio();
 1130                 bp = pool_get(&bufpool, PR_WAITOK);
 1131                 splx(s);
 1132 
 1133                 BUF_INIT(bp);
 1134                 bp->b_flags = (rw == FSS_READ ? B_READ : 0);
 1135                 bp->b_bcount = count;
 1136                 bp->b_bufsize = bp->b_bcount;
 1137                 bp->b_error = 0;
 1138                 bp->b_data = data;
 1139                 bp->b_blkno = bp->b_rawblkno = nbn+btodb(FSS_FSBOFF(sc, pos));
 1140                 bp->b_proc = NULL;
 1141                 bp->b_vp = NULLVP;
 1142                 bgetvp(vp, bp);
 1143                 if ((bp->b_flags & B_READ) == 0)
 1144                         bp->b_vp->v_numoutput++;
 1145 
 1146                 if ((bp->b_flags & B_READ) == 0 || cl < sc->sc_indir_size)
 1147                         BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
 1148                 VOP_STRATEGY(vp, bp);
 1149 
 1150                 error = biowait(bp);
 1151 
 1152                 if (bp->b_vp != NULL)
 1153                         brelvp(bp);
 1154 
 1155                 s = splbio();
 1156                 pool_put(&bufpool, bp);
 1157                 splx(s);
 1158 
 1159                 if (error)
 1160                         break;
 1161 
 1162                 todo -= count;
 1163                 data += count;
 1164                 pos += count;
 1165         }
 1166 
 1167         VOP_UNLOCK(sc->sc_bs_vp, 0);
 1168 
 1169         return error;
 1170 }
 1171 
 1172 /*
 1173  * Get a pointer to the indirect slot for this cluster.
 1174  */
 1175 static u_int32_t *
 1176 fss_bs_indir(struct fss_softc *sc, u_int32_t cl)
 1177 {
 1178         u_int32_t icl;
 1179         int ioff;
 1180 
 1181         icl = cl/(FSS_CLSIZE(sc)/sizeof(u_int32_t));
 1182         ioff = cl%(FSS_CLSIZE(sc)/sizeof(u_int32_t));
 1183 
 1184         if (sc->sc_indir_cur == icl)
 1185                 return &sc->sc_indir_data[ioff];
 1186 
 1187         if (sc->sc_indir_dirty) {
 1188                 FSS_STAT_INC(sc, indir_write);
 1189                 if (fss_bs_io(sc, FSS_WRITE, sc->sc_indir_cur, 0,
 1190                     FSS_CLSIZE(sc), (caddr_t)sc->sc_indir_data) != 0)
 1191                         return NULL;
 1192                 setbit(sc->sc_indir_valid, sc->sc_indir_cur);
 1193         }
 1194 
 1195         sc->sc_indir_dirty = 0;
 1196         sc->sc_indir_cur = icl;
 1197 
 1198         if (isset(sc->sc_indir_valid, sc->sc_indir_cur)) {
 1199                 FSS_STAT_INC(sc, indir_read);
 1200                 if (fss_bs_io(sc, FSS_READ, sc->sc_indir_cur, 0,
 1201                     FSS_CLSIZE(sc), (caddr_t)sc->sc_indir_data) != 0)
 1202                         return NULL;
 1203         } else
 1204                 memset(sc->sc_indir_data, 0, FSS_CLSIZE(sc));
 1205 
 1206         return &sc->sc_indir_data[ioff];
 1207 }
 1208 
 1209 /*
 1210  * The kernel thread (one for every active snapshot).
 1211  *
 1212  * After wakeup it cleans the cache and runs the I/O requests.
 1213  */
 1214 static void
 1215 fss_bs_thread(void *arg)
 1216 {
 1217         int error, len, nfreed, nio, s;
 1218         long off;
 1219         caddr_t addr;
 1220         u_int32_t c, cl, ch, *indirp;
 1221         struct buf *bp, *nbp;
 1222         struct fss_softc *sc;
 1223         struct fss_cache *scp, *scl;
 1224 
 1225         sc = arg;
 1226 
 1227         scl = sc->sc_cache+sc->sc_cache_size;
 1228 
 1229         s = splbio();
 1230         nbp = pool_get(&bufpool, PR_WAITOK);
 1231         splx(s);
 1232 
 1233         nfreed = nio = 1;               /* Dont sleep the first time */
 1234 
 1235         FSS_LOCK(sc, s);
 1236 
 1237         for (;;) {
 1238                 if (nfreed == 0 && nio == 0)
 1239                         ltsleep(&sc->sc_bs_proc, PVM-1, "fssbs", 0,
 1240                             &sc->sc_slock);
 1241 
 1242                 if ((sc->sc_flags & FSS_BS_THREAD) == 0) {
 1243                         sc->sc_bs_proc = NULL;
 1244                         wakeup(&sc->sc_bs_proc);
 1245 
 1246                         FSS_UNLOCK(sc, s);
 1247 
 1248                         s = splbio();
 1249                         pool_put(&bufpool, nbp);
 1250                         splx(s);
 1251 #ifdef FSS_STATISTICS
 1252                         if ((sc->sc_flags & FSS_PERSISTENT) == 0) {
 1253                                 printf("fss%d: cow called %" PRId64 " times,"
 1254                                     " copied %" PRId64 " clusters,"
 1255                                     " cache full %" PRId64 " times\n",
 1256                                     sc->sc_unit,
 1257                                     FSS_STAT_VAL(sc, cow_calls),
 1258                                     FSS_STAT_VAL(sc, cow_copied),
 1259                                     FSS_STAT_VAL(sc, cow_cache_full));
 1260                                 printf("fss%d: %" PRId64 " indir reads,"
 1261                                     " %" PRId64 " indir writes\n",
 1262                                     sc->sc_unit,
 1263                                     FSS_STAT_VAL(sc, indir_read),
 1264                                     FSS_STAT_VAL(sc, indir_write));
 1265                         }
 1266 #endif /* FSS_STATISTICS */
 1267                         kthread_exit(0);
 1268                 }
 1269 
 1270                 /*
 1271                  * Process I/O requests (persistent)
 1272                  */
 1273 
 1274                 if (sc->sc_flags & FSS_PERSISTENT) {
 1275                         nfreed = nio = 0;
 1276 
 1277                         if ((bp = BUFQ_GET(&sc->sc_bufq)) == NULL)
 1278                                 continue;
 1279 
 1280                         nio++;
 1281 
 1282                         if (FSS_ISVALID(sc)) {
 1283                                 FSS_UNLOCK(sc, s);
 1284 
 1285                                 error = fss_bs_io(sc, FSS_READ, 0,
 1286                                     dbtob(bp->b_blkno), bp->b_bcount,
 1287                                     bp->b_data);
 1288 
 1289                                 FSS_LOCK(sc, s);
 1290                         } else
 1291                                 error = ENXIO;
 1292 
 1293                         if (error) {
 1294                                 bp->b_error = error;
 1295                                 bp->b_flags |= B_ERROR;
 1296                                 bp->b_resid = bp->b_bcount;
 1297                         }
 1298                         biodone(bp);
 1299 
 1300                         continue;
 1301                 }
 1302 
 1303                 /*
 1304                  * Clean the cache
 1305                  */
 1306                 nfreed = 0;
 1307                 for (scp = sc->sc_cache; scp < scl; scp++) {
 1308                         if (scp->fc_type != FSS_CACHE_VALID)
 1309                                 continue;
 1310 
 1311                         FSS_UNLOCK(sc, s);
 1312 
 1313                         indirp = fss_bs_indir(sc, scp->fc_cluster);
 1314                         if (indirp != NULL) {
 1315                                 error = fss_write_cluster(scp, sc->sc_clnext);
 1316                         } else
 1317                                 error = EIO;
 1318 
 1319                         FSS_LOCK(sc, s);
 1320 
 1321                         if (error == 0) {
 1322                                 *indirp = sc->sc_clnext++;
 1323                                 sc->sc_indir_dirty = 1;
 1324                         } else
 1325                                 fss_error(sc, "write bs error %d", error);
 1326 
 1327                         scp->fc_type = FSS_CACHE_FREE;
 1328                         nfreed++;
 1329                         wakeup(&scp->fc_type);
 1330                 }
 1331 
 1332                 if (nfreed)
 1333                         wakeup(&sc->sc_cache);
 1334 
 1335                 /*
 1336                  * Process I/O requests
 1337                  */
 1338                 nio = 0;
 1339 
 1340                 if ((bp = BUFQ_GET(&sc->sc_bufq)) == NULL)
 1341                         continue;
 1342 
 1343                 nio++;
 1344 
 1345                 if (!FSS_ISVALID(sc)) {
 1346                         bp->b_error = ENXIO;
 1347                         bp->b_flags |= B_ERROR;
 1348                         bp->b_resid = bp->b_bcount;
 1349                         biodone(bp);
 1350                         continue;
 1351                 }
 1352 
 1353                 /*
 1354                  * First read from the snapshotted block device.
 1355                  * XXX Split to only read those parts that have not
 1356                  * been saved to backing store?
 1357                  */
 1358 
 1359                 FSS_UNLOCK(sc, s);
 1360 
 1361                 BUF_INIT(nbp);
 1362                 nbp->b_flags = B_READ;
 1363                 nbp->b_bcount = bp->b_bcount;
 1364                 nbp->b_bufsize = bp->b_bcount;
 1365                 nbp->b_error = 0;
 1366                 nbp->b_data = bp->b_data;
 1367                 nbp->b_blkno = nbp->b_rawblkno = bp->b_blkno;
 1368                 nbp->b_proc = bp->b_proc;
 1369                 nbp->b_dev = sc->sc_bdev;
 1370                 nbp->b_vp = NULLVP;
 1371 
 1372                 DEV_STRATEGY(nbp);
 1373 
 1374                 if (biowait(nbp) != 0) {
 1375                         bp->b_resid = bp->b_bcount;
 1376                         bp->b_error = nbp->b_error;
 1377                         bp->b_flags |= B_ERROR;
 1378                         biodone(bp);
 1379                         FSS_LOCK(sc, s);
 1380                         continue;
 1381                 }
 1382 
 1383                 cl = FSS_BTOCL(sc, dbtob(bp->b_blkno));
 1384                 off = FSS_CLOFF(sc, dbtob(bp->b_blkno));
 1385                 ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1);
 1386                 bp->b_resid = bp->b_bcount;
 1387                 addr = bp->b_data;
 1388 
 1389                 FSS_LOCK(sc, s);
 1390 
 1391                 /*
 1392                  * Replace those parts that have been saved to backing store.
 1393                  */
 1394 
 1395                 for (c = cl; c <= ch;
 1396                     c++, off = 0, bp->b_resid -= len, addr += len) {
 1397                         len = FSS_CLSIZE(sc)-off;
 1398                         if (len > bp->b_resid)
 1399                                 len = bp->b_resid;
 1400 
 1401                         if (isclr(sc->sc_copied, c))
 1402                                 continue;
 1403 
 1404                         FSS_UNLOCK(sc, s);
 1405 
 1406                         indirp = fss_bs_indir(sc, c);
 1407 
 1408                         FSS_LOCK(sc, s);
 1409 
 1410                         if (indirp == NULL || *indirp == 0) {
 1411                                 /*
 1412                                  * Not on backing store. Either in cache
 1413                                  * or hole in the snapshotted block device.
 1414                                  */
 1415                                 for (scp = sc->sc_cache; scp < scl; scp++)
 1416                                         if (scp->fc_type == FSS_CACHE_VALID &&
 1417                                             scp->fc_cluster == c)
 1418                                                 break;
 1419                                 if (scp < scl)
 1420                                         memcpy(addr, scp->fc_data+off, len);
 1421                                 else
 1422                                         memset(addr, 0, len);
 1423                                 continue;
 1424                         }
 1425                         /*
 1426                          * Read from backing store.
 1427                          */
 1428 
 1429                         FSS_UNLOCK(sc, s);
 1430 
 1431                         if ((error = fss_bs_io(sc, FSS_READ, *indirp,
 1432                             off, len, addr)) != 0) {
 1433                                 bp->b_resid = bp->b_bcount;
 1434                                 bp->b_error = error;
 1435                                 bp->b_flags |= B_ERROR;
 1436                                 FSS_LOCK(sc, s);
 1437                                 break;
 1438                         }
 1439 
 1440                         FSS_LOCK(sc, s);
 1441 
 1442                 }
 1443 
 1444                 biodone(bp);
 1445         }
 1446 }

Cache object: 5792f16b8fcba99a750c5c94135e8590


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.