The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/fss.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: fss.c,v 1.113 2022/09/24 23:18:54 thorpej Exp $        */
    2 
    3 /*-
    4  * Copyright (c) 2003 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Juergen Hannken-Illjes.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   29  * POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 /*
   33  * File system snapshot disk driver.
   34  *
   35  * Block/character interface to the snapshot of a mounted file system.
   36  */
   37 
   38 #include <sys/cdefs.h>
   39 __KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.113 2022/09/24 23:18:54 thorpej Exp $");
   40 
   41 #include <sys/param.h>
   42 #include <sys/systm.h>
   43 #include <sys/namei.h>
   44 #include <sys/proc.h>
   45 #include <sys/errno.h>
   46 #include <sys/kmem.h>
   47 #include <sys/buf.h>
   48 #include <sys/ioctl.h>
   49 #include <sys/disklabel.h>
   50 #include <sys/device.h>
   51 #include <sys/disk.h>
   52 #include <sys/stat.h>
   53 #include <sys/mount.h>
   54 #include <sys/vnode.h>
   55 #include <sys/file.h>
   56 #include <sys/uio.h>
   57 #include <sys/conf.h>
   58 #include <sys/kthread.h>
   59 #include <sys/fstrans.h>
   60 #include <sys/vfs_syscalls.h>           /* For do_sys_unlink(). */
   61 
   62 #include <miscfs/specfs/specdev.h>
   63 
   64 #include <dev/fssvar.h>
   65 
   66 #include <uvm/uvm.h>
   67 
   68 #include "ioconf.h"
   69 
   70 dev_type_open(fss_open);
   71 dev_type_close(fss_close);
   72 dev_type_read(fss_read);
   73 dev_type_write(fss_write);
   74 dev_type_ioctl(fss_ioctl);
   75 dev_type_strategy(fss_strategy);
   76 dev_type_dump(fss_dump);
   77 dev_type_size(fss_size);
   78 
   79 static void fss_unmount_hook(struct mount *);
   80 static int fss_copy_on_write(void *, struct buf *, bool);
   81 static inline void fss_error(struct fss_softc *, const char *);
   82 static int fss_create_files(struct fss_softc *, struct fss_set *,
   83     off_t *, struct lwp *);
   84 static int fss_create_snapshot(struct fss_softc *, struct fss_set *,
   85     struct lwp *);
   86 static int fss_delete_snapshot(struct fss_softc *, struct lwp *);
   87 static int fss_softc_alloc(struct fss_softc *);
   88 static void fss_softc_free(struct fss_softc *);
   89 static int fss_read_cluster(struct fss_softc *, u_int32_t);
   90 static void fss_bs_thread(void *);
   91 static int fss_bs_io(struct fss_softc *, fss_io_type,
   92     u_int32_t, off_t, int, void *, size_t *);
   93 static u_int32_t *fss_bs_indir(struct fss_softc *, u_int32_t);
   94 
   95 static kmutex_t fss_device_lock;        /* Protect all units. */
   96 static kcondvar_t fss_device_cv;        /* Serialize snapshot creation. */
   97 static bool fss_creating = false;       /* Currently creating a snapshot. */
   98 static int fss_num_attached = 0;        /* Number of attached devices. */
   99 static struct vfs_hooks fss_vfs_hooks = {
  100         .vh_unmount = fss_unmount_hook
  101 };
  102 
  103 const struct bdevsw fss_bdevsw = {
  104         .d_open = fss_open,
  105         .d_close = fss_close,
  106         .d_strategy = fss_strategy,
  107         .d_ioctl = fss_ioctl,
  108         .d_dump = fss_dump,
  109         .d_psize = fss_size,
  110         .d_discard = nodiscard,
  111         .d_flag = D_DISK | D_MPSAFE
  112 };
  113 
  114 const struct cdevsw fss_cdevsw = {
  115         .d_open = fss_open,
  116         .d_close = fss_close,
  117         .d_read = fss_read,
  118         .d_write = fss_write,
  119         .d_ioctl = fss_ioctl,
  120         .d_stop = nostop,
  121         .d_tty = notty,
  122         .d_poll = nopoll,
  123         .d_mmap = nommap,
  124         .d_kqfilter = nokqfilter,
  125         .d_discard = nodiscard,
  126         .d_flag = D_DISK | D_MPSAFE
  127 };
  128 
  129 static int fss_match(device_t, cfdata_t, void *);
  130 static void fss_attach(device_t, device_t, void *);
  131 static int fss_detach(device_t, int);
  132 
  133 CFATTACH_DECL_NEW(fss, sizeof(struct fss_softc),
  134     fss_match, fss_attach, fss_detach, NULL);
  135 
  136 void
  137 fssattach(int num)
  138 {
  139 
  140         mutex_init(&fss_device_lock, MUTEX_DEFAULT, IPL_NONE);
  141         cv_init(&fss_device_cv, "snapwait");
  142         if (config_cfattach_attach(fss_cd.cd_name, &fss_ca))
  143                 aprint_error("%s: unable to register\n", fss_cd.cd_name);
  144 }
  145 
  146 static int
  147 fss_match(device_t self, cfdata_t cfdata, void *aux)
  148 {
  149         return 1;
  150 }
  151 
  152 static void
  153 fss_attach(device_t parent, device_t self, void *aux)
  154 {
  155         struct fss_softc *sc = device_private(self);
  156 
  157         sc->sc_dev = self;
  158         sc->sc_bdev = NODEV;
  159         mutex_init(&sc->sc_slock, MUTEX_DEFAULT, IPL_NONE);
  160         cv_init(&sc->sc_work_cv, "fssbs");
  161         cv_init(&sc->sc_cache_cv, "cowwait");
  162         bufq_alloc(&sc->sc_bufq, "fcfs", 0);
  163         sc->sc_dkdev = kmem_zalloc(sizeof(*sc->sc_dkdev), KM_SLEEP);
  164         sc->sc_dkdev->dk_info = NULL;
  165         disk_init(sc->sc_dkdev, device_xname(self), NULL);
  166         if (!pmf_device_register(self, NULL, NULL))
  167                 aprint_error_dev(self, "couldn't establish power handler\n");
  168 
  169         if (fss_num_attached++ == 0)
  170                 vfs_hooks_attach(&fss_vfs_hooks);
  171 }
  172 
  173 static int
  174 fss_detach(device_t self, int flags)
  175 {
  176         struct fss_softc *sc = device_private(self);
  177 
  178         mutex_enter(&sc->sc_slock);
  179         if (sc->sc_state != FSS_IDLE) {
  180                 mutex_exit(&sc->sc_slock);
  181                 return EBUSY;
  182         }
  183         mutex_exit(&sc->sc_slock);
  184 
  185         if (--fss_num_attached == 0)
  186                 vfs_hooks_detach(&fss_vfs_hooks);
  187 
  188         pmf_device_deregister(self);
  189         mutex_destroy(&sc->sc_slock);
  190         cv_destroy(&sc->sc_work_cv);
  191         cv_destroy(&sc->sc_cache_cv);
  192         bufq_drain(sc->sc_bufq);
  193         bufq_free(sc->sc_bufq);
  194         disk_destroy(sc->sc_dkdev);
  195         kmem_free(sc->sc_dkdev, sizeof(*sc->sc_dkdev));
  196 
  197         return 0;
  198 }
  199 
  200 int
  201 fss_open(dev_t dev, int flags, int mode, struct lwp *l)
  202 {
  203         int mflag;
  204         cfdata_t cf;
  205         struct fss_softc *sc;
  206 
  207         mflag = (mode == S_IFCHR ? FSS_CDEV_OPEN : FSS_BDEV_OPEN);
  208 
  209         mutex_enter(&fss_device_lock);
  210 
  211         sc = device_lookup_private(&fss_cd, minor(dev));
  212         if (sc == NULL) {
  213                 cf = kmem_zalloc(sizeof(*cf), KM_SLEEP);
  214                 cf->cf_name = fss_cd.cd_name;
  215                 cf->cf_atname = fss_cd.cd_name;
  216                 cf->cf_unit = minor(dev);
  217                 cf->cf_fstate = FSTATE_STAR;
  218                 sc = device_private(config_attach_pseudo(cf));
  219                 if (sc == NULL) {
  220                         mutex_exit(&fss_device_lock);
  221                         return ENOMEM;
  222                 }
  223                 sc->sc_state = FSS_IDLE;
  224         }
  225 
  226         mutex_enter(&sc->sc_slock);
  227 
  228         sc->sc_flags |= mflag;
  229 
  230         mutex_exit(&sc->sc_slock);
  231         mutex_exit(&fss_device_lock);
  232 
  233         return 0;
  234 }
  235 
  236 int
  237 fss_close(dev_t dev, int flags, int mode, struct lwp *l)
  238 {
  239         int mflag, error;
  240         cfdata_t cf;
  241         struct fss_softc *sc = device_lookup_private(&fss_cd, minor(dev));
  242 
  243         if (sc == NULL)
  244                 return ENXIO;
  245 
  246         mflag = (mode == S_IFCHR ? FSS_CDEV_OPEN : FSS_BDEV_OPEN);
  247         error = 0;
  248 
  249         mutex_enter(&fss_device_lock);
  250 restart:
  251         mutex_enter(&sc->sc_slock);
  252         if ((sc->sc_flags & (FSS_CDEV_OPEN|FSS_BDEV_OPEN)) != mflag) {
  253                 sc->sc_flags &= ~mflag;
  254                 mutex_exit(&sc->sc_slock);
  255                 mutex_exit(&fss_device_lock);
  256                 return 0;
  257         }
  258         if (sc->sc_state != FSS_IDLE &&
  259             (sc->sc_uflags & FSS_UNCONFIG_ON_CLOSE) != 0) {
  260                 sc->sc_uflags &= ~FSS_UNCONFIG_ON_CLOSE;
  261                 mutex_exit(&sc->sc_slock);
  262                 error = fss_ioctl(dev, FSSIOCCLR, NULL, FWRITE, l);
  263                 goto restart;
  264         }
  265         if (sc->sc_state != FSS_IDLE) {
  266                 mutex_exit(&sc->sc_slock);
  267                 mutex_exit(&fss_device_lock);
  268                 return error;
  269         }
  270 
  271         KASSERT(sc->sc_state == FSS_IDLE);
  272         KASSERT((sc->sc_flags & (FSS_CDEV_OPEN|FSS_BDEV_OPEN)) == mflag);
  273         mutex_exit(&sc->sc_slock);
  274         cf = device_cfdata(sc->sc_dev);
  275         error = config_detach(sc->sc_dev, DETACH_QUIET);
  276         if (! error)
  277                 kmem_free(cf, sizeof(*cf));
  278         mutex_exit(&fss_device_lock);
  279 
  280         return error;
  281 }
  282 
  283 void
  284 fss_strategy(struct buf *bp)
  285 {
  286         const bool write = ((bp->b_flags & B_READ) != B_READ);
  287         struct fss_softc *sc = device_lookup_private(&fss_cd, minor(bp->b_dev));
  288 
  289         if (sc == NULL) {
  290                 bp->b_error = ENXIO;
  291                 goto done;
  292         }
  293 
  294         mutex_enter(&sc->sc_slock);
  295 
  296         if (write || sc->sc_state != FSS_ACTIVE) {
  297                 bp->b_error = (write ? EROFS : ENXIO);
  298                 goto done;
  299         }
  300         /* Check bounds for non-persistent snapshots. */
  301         if ((sc->sc_flags & FSS_PERSISTENT) == 0 &&
  302             bounds_check_with_mediasize(bp, DEV_BSIZE,
  303             btodb(FSS_CLTOB(sc, sc->sc_clcount - 1) + sc->sc_clresid)) <= 0)
  304                 goto done;
  305 
  306         bp->b_rawblkno = bp->b_blkno;
  307         bufq_put(sc->sc_bufq, bp);
  308         cv_signal(&sc->sc_work_cv);
  309 
  310         mutex_exit(&sc->sc_slock);
  311         return;
  312 
  313 done:
  314         if (sc != NULL)
  315                 mutex_exit(&sc->sc_slock);
  316         bp->b_resid = bp->b_bcount;
  317         biodone(bp);
  318 }
  319 
  320 int
  321 fss_read(dev_t dev, struct uio *uio, int flags)
  322 {
  323         return physio(fss_strategy, NULL, dev, B_READ, minphys, uio);
  324 }
  325 
  326 int
  327 fss_write(dev_t dev, struct uio *uio, int flags)
  328 {
  329         return physio(fss_strategy, NULL, dev, B_WRITE, minphys, uio);
  330 }
  331 
  332 int
  333 fss_ioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
  334 {
  335         int error = 0;
  336         struct fss_softc *sc = device_lookup_private(&fss_cd, minor(dev));
  337         struct fss_set _fss;
  338         struct fss_set *fss = (struct fss_set *)data;
  339         struct fss_set50 *fss50 = (struct fss_set50 *)data;
  340         struct fss_get *fsg = (struct fss_get *)data;
  341 #ifndef _LP64
  342         struct fss_get50 *fsg50 = (struct fss_get50 *)data;
  343 #endif
  344 
  345         if (sc == NULL)
  346                 return ENXIO;
  347 
  348         switch (cmd) {
  349         case FSSIOCSET50:
  350                 fss = &_fss;
  351                 fss->fss_mount = fss50->fss_mount;
  352                 fss->fss_bstore = fss50->fss_bstore;
  353                 fss->fss_csize = fss50->fss_csize;
  354                 fss->fss_flags = 0;
  355                 /* Fall through */
  356         case FSSIOCSET:
  357                 mutex_enter(&sc->sc_slock);
  358                 if ((flag & FWRITE) == 0)
  359                         error = EPERM;
  360                 if (error == 0 && sc->sc_state != FSS_IDLE) {
  361                         error = EBUSY;
  362                 } else {
  363                         sc->sc_state = FSS_CREATING;
  364                         copyinstr(fss->fss_mount, sc->sc_mntname,
  365                             sizeof(sc->sc_mntname), NULL);
  366                         memset(&sc->sc_time, 0, sizeof(sc->sc_time));
  367                         sc->sc_clshift = 0;
  368                 }
  369                 mutex_exit(&sc->sc_slock);
  370                 if (error)
  371                         break;
  372 
  373                 /*
  374                  * Serialize snapshot creation.
  375                  */
  376                 mutex_enter(&fss_device_lock);
  377                 while (fss_creating) {
  378                         error = cv_wait_sig(&fss_device_cv, &fss_device_lock);
  379                         if (error) {
  380                                 mutex_enter(&sc->sc_slock);
  381                                 KASSERT(sc->sc_state == FSS_CREATING);
  382                                 sc->sc_state = FSS_IDLE;
  383                                 mutex_exit(&sc->sc_slock);
  384                                 mutex_exit(&fss_device_lock);
  385                                 break;
  386                         }
  387                 }
  388                 fss_creating = true;
  389                 mutex_exit(&fss_device_lock);
  390 
  391                 error = fss_create_snapshot(sc, fss, l);
  392                 mutex_enter(&sc->sc_slock);
  393                 if (error == 0) {
  394                         KASSERT(sc->sc_state == FSS_ACTIVE);
  395                         sc->sc_uflags = fss->fss_flags;
  396                 } else {
  397                         KASSERT(sc->sc_state == FSS_CREATING);
  398                         sc->sc_state = FSS_IDLE;
  399                 }
  400                 mutex_exit(&sc->sc_slock);
  401 
  402                 mutex_enter(&fss_device_lock);
  403                 fss_creating = false;
  404                 cv_broadcast(&fss_device_cv);
  405                 mutex_exit(&fss_device_lock);
  406 
  407                 break;
  408 
  409         case FSSIOCCLR:
  410                 mutex_enter(&sc->sc_slock);
  411                 if ((flag & FWRITE) == 0) {
  412                         error = EPERM;
  413                 } else if (sc->sc_state != FSS_ACTIVE) {
  414                         error = EBUSY;
  415                 } else {
  416                         sc->sc_state = FSS_DESTROYING;
  417                 }
  418                 mutex_exit(&sc->sc_slock);
  419                 if (error)
  420                         break;
  421 
  422                 error = fss_delete_snapshot(sc, l);
  423                 mutex_enter(&sc->sc_slock);
  424                 if (error)
  425                         fss_error(sc, "Failed to delete snapshot");
  426                 else
  427                         KASSERT(sc->sc_state == FSS_IDLE);
  428                 mutex_exit(&sc->sc_slock);
  429                 break;
  430 
  431 #ifndef _LP64
  432         case FSSIOCGET50:
  433                 mutex_enter(&sc->sc_slock);
  434                 if (sc->sc_state == FSS_IDLE) {
  435                         error = ENXIO;
  436                 } else if ((sc->sc_flags & FSS_PERSISTENT) == 0) {
  437                         memcpy(fsg50->fsg_mount, sc->sc_mntname, MNAMELEN);
  438                         fsg50->fsg_csize = FSS_CLSIZE(sc);
  439                         timeval_to_timeval50(&sc->sc_time, &fsg50->fsg_time);
  440                         fsg50->fsg_mount_size = sc->sc_clcount;
  441                         fsg50->fsg_bs_size = sc->sc_clnext;
  442                         error = 0;
  443                 } else {
  444                         memcpy(fsg50->fsg_mount, sc->sc_mntname, MNAMELEN);
  445                         fsg50->fsg_csize = 0;
  446                         timeval_to_timeval50(&sc->sc_time, &fsg50->fsg_time);
  447                         fsg50->fsg_mount_size = 0;
  448                         fsg50->fsg_bs_size = 0;
  449                         error = 0;
  450                 }
  451                 mutex_exit(&sc->sc_slock);
  452                 break;
  453 #endif /* _LP64 */
  454 
  455         case FSSIOCGET:
  456                 mutex_enter(&sc->sc_slock);
  457                 if (sc->sc_state == FSS_IDLE) {
  458                         error = ENXIO;
  459                 } else if ((sc->sc_flags & FSS_PERSISTENT) == 0) {
  460                         memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN);
  461                         fsg->fsg_csize = FSS_CLSIZE(sc);
  462                         fsg->fsg_time = sc->sc_time;
  463                         fsg->fsg_mount_size = sc->sc_clcount;
  464                         fsg->fsg_bs_size = sc->sc_clnext;
  465                         error = 0;
  466                 } else {
  467                         memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN);
  468                         fsg->fsg_csize = 0;
  469                         fsg->fsg_time = sc->sc_time;
  470                         fsg->fsg_mount_size = 0;
  471                         fsg->fsg_bs_size = 0;
  472                         error = 0;
  473                 }
  474                 mutex_exit(&sc->sc_slock);
  475                 break;
  476 
  477         case FSSIOFSET:
  478                 mutex_enter(&sc->sc_slock);
  479                 sc->sc_uflags = *(int *)data;
  480                 mutex_exit(&sc->sc_slock);
  481                 error = 0;
  482                 break;
  483 
  484         case FSSIOFGET:
  485                 mutex_enter(&sc->sc_slock);
  486                 *(int *)data = sc->sc_uflags;
  487                 mutex_exit(&sc->sc_slock);
  488                 error = 0;
  489                 break;
  490 
  491         default:
  492                 error = EINVAL;
  493                 break;
  494         }
  495 
  496         return error;
  497 }
  498 
  499 int
  500 fss_size(dev_t dev)
  501 {
  502         return -1;
  503 }
  504 
  505 int
  506 fss_dump(dev_t dev, daddr_t blkno, void *va,
  507     size_t size)
  508 {
  509         return EROFS;
  510 }
  511 
  512 /*
  513  * An error occurred reading or writing the snapshot or backing store.
  514  * If it is the first error log to console and disestablish cow handler.
  515  * The caller holds the mutex.
  516  */
  517 static inline void
  518 fss_error(struct fss_softc *sc, const char *msg)
  519 {
  520 
  521         KASSERT(mutex_owned(&sc->sc_slock));
  522 
  523         if ((sc->sc_flags & FSS_ERROR))
  524                 return;
  525 
  526         aprint_error_dev(sc->sc_dev, "snapshot invalid: %s\n", msg);
  527         if ((sc->sc_flags & FSS_PERSISTENT) == 0) {
  528                 mutex_exit(&sc->sc_slock);
  529                 fscow_disestablish(sc->sc_mount, fss_copy_on_write, sc);
  530                 mutex_enter(&sc->sc_slock);
  531         }
  532         sc->sc_flags |= FSS_ERROR;
  533 }
  534 
  535 /*
  536  * Allocate the variable sized parts of the softc and
  537  * fork the kernel thread.
  538  *
  539  * The fields sc_clcount, sc_clshift, sc_cache_size and sc_indir_size
  540  * must be initialized.
  541  */
  542 static int
  543 fss_softc_alloc(struct fss_softc *sc)
  544 {
  545         int i, error;
  546 
  547         if ((sc->sc_flags & FSS_PERSISTENT) == 0) {
  548                 sc->sc_copied =
  549                     kmem_zalloc(howmany(sc->sc_clcount, NBBY), KM_SLEEP);
  550                 sc->sc_cache = kmem_alloc(sc->sc_cache_size *
  551                     sizeof(struct fss_cache), KM_SLEEP);
  552                 for (i = 0; i < sc->sc_cache_size; i++) {
  553                         sc->sc_cache[i].fc_type = FSS_CACHE_FREE;
  554                         sc->sc_cache[i].fc_data =
  555                             kmem_alloc(FSS_CLSIZE(sc), KM_SLEEP);
  556                         cv_init(&sc->sc_cache[i].fc_state_cv, "cowwait1");
  557                 }
  558 
  559                 sc->sc_indir_valid =
  560                     kmem_zalloc(howmany(sc->sc_indir_size, NBBY), KM_SLEEP);
  561                 sc->sc_indir_data = kmem_zalloc(FSS_CLSIZE(sc), KM_SLEEP);
  562         } else {
  563                 sc->sc_copied = NULL;
  564                 sc->sc_cache = NULL;
  565                 sc->sc_indir_valid = NULL;
  566                 sc->sc_indir_data = NULL;
  567         }
  568 
  569         sc->sc_flags |= FSS_BS_THREAD;
  570         if ((error = kthread_create(PRI_BIO, KTHREAD_MUSTJOIN, NULL,
  571             fss_bs_thread, sc, &sc->sc_bs_lwp,
  572             "%s", device_xname(sc->sc_dev))) != 0) {
  573                 sc->sc_flags &= ~FSS_BS_THREAD;
  574                 return error;
  575         }
  576 
  577         disk_attach(sc->sc_dkdev);
  578 
  579         return 0;
  580 }
  581 
  582 /*
  583  * Free the variable sized parts of the softc.
  584  */
  585 static void
  586 fss_softc_free(struct fss_softc *sc)
  587 {
  588         int i;
  589 
  590         if ((sc->sc_flags & FSS_BS_THREAD) != 0) {
  591                 mutex_enter(&sc->sc_slock);
  592                 sc->sc_flags &= ~FSS_BS_THREAD;
  593                 cv_signal(&sc->sc_work_cv);
  594                 mutex_exit(&sc->sc_slock);
  595                 kthread_join(sc->sc_bs_lwp);
  596 
  597                 disk_detach(sc->sc_dkdev);
  598         }
  599 
  600         if (sc->sc_copied != NULL)
  601                 kmem_free(sc->sc_copied, howmany(sc->sc_clcount, NBBY));
  602         sc->sc_copied = NULL;
  603 
  604         if (sc->sc_cache != NULL) {
  605                 for (i = 0; i < sc->sc_cache_size; i++)
  606                         if (sc->sc_cache[i].fc_data != NULL) {
  607                                 cv_destroy(&sc->sc_cache[i].fc_state_cv);
  608                                 kmem_free(sc->sc_cache[i].fc_data,
  609                                     FSS_CLSIZE(sc));
  610                         }
  611                 kmem_free(sc->sc_cache,
  612                     sc->sc_cache_size*sizeof(struct fss_cache));
  613         }
  614         sc->sc_cache = NULL;
  615 
  616         if (sc->sc_indir_valid != NULL)
  617                 kmem_free(sc->sc_indir_valid, howmany(sc->sc_indir_size, NBBY));
  618         sc->sc_indir_valid = NULL;
  619 
  620         if (sc->sc_indir_data != NULL)
  621                 kmem_free(sc->sc_indir_data, FSS_CLSIZE(sc));
  622         sc->sc_indir_data = NULL;
  623 }
  624 
  625 /*
  626  * Set all active snapshots on this file system into ERROR state.
  627  */
  628 static void
  629 fss_unmount_hook(struct mount *mp)
  630 {
  631         int i;
  632         struct fss_softc *sc;
  633 
  634         mutex_enter(&fss_device_lock);
  635         for (i = 0; i < fss_cd.cd_ndevs; i++) {
  636                 if ((sc = device_lookup_private(&fss_cd, i)) == NULL)
  637                         continue;
  638                 mutex_enter(&sc->sc_slock);
  639                 if (sc->sc_state != FSS_IDLE && sc->sc_mount == mp)
  640                         fss_error(sc, "forced by unmount");
  641                 mutex_exit(&sc->sc_slock);
  642         }
  643         mutex_exit(&fss_device_lock);
  644 }
  645 
  646 /*
  647  * A buffer is written to the snapshotted block device. Copy to
  648  * backing store if needed.
  649  */
  650 static int
  651 fss_copy_on_write(void *v, struct buf *bp, bool data_valid)
  652 {
  653         int error;
  654         u_int32_t cl, ch, c;
  655         struct fss_softc *sc = v;
  656 
  657         mutex_enter(&sc->sc_slock);
  658         if (sc->sc_state != FSS_ACTIVE) {
  659                 mutex_exit(&sc->sc_slock);
  660                 return 0;
  661         }
  662 
  663         cl = FSS_BTOCL(sc, dbtob(bp->b_blkno));
  664         ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1);
  665         error = 0;
  666         if (curlwp == uvm.pagedaemon_lwp) {
  667                 for (c = cl; c <= ch; c++)
  668                         if (isclr(sc->sc_copied, c)) {
  669                                 error = ENOMEM;
  670                                 break;
  671                         }
  672         }
  673         mutex_exit(&sc->sc_slock);
  674 
  675         if (error == 0)
  676                 for (c = cl; c <= ch; c++) {
  677                         error = fss_read_cluster(sc, c);
  678                         if (error)
  679                                 break;
  680                 }
  681 
  682         return error;
  683 }
  684 
  685 /*
  686  * Lookup and open needed files.
  687  *
  688  * For file system internal snapshot initializes sc_mntname, sc_mount,
  689  * sc_bs_vp and sc_time.
  690  *
  691  * Otherwise returns dev and size of the underlying block device.
  692  * Initializes sc_mntname, sc_mount, sc_bdev, sc_bs_vp and sc_mount
  693  */
  694 static int
  695 fss_create_files(struct fss_softc *sc, struct fss_set *fss,
  696     off_t *bsize, struct lwp *l)
  697 {
  698         int error, bits, fsbsize;
  699         uint64_t numsec;
  700         unsigned int secsize;
  701         struct timespec ts;
  702         /* distinguish lookup 1 from lookup 2 to reduce mistakes */
  703         struct pathbuf *pb2;
  704         struct vnode *vp, *vp2;
  705 
  706         /*
  707          * Get the mounted file system.
  708          */
  709 
  710         error = namei_simple_user(fss->fss_mount,
  711                                 NSM_FOLLOW_NOEMULROOT, &vp);
  712         if (error != 0)
  713                 return error;
  714 
  715         if ((vp->v_vflag & VV_ROOT) != VV_ROOT) {
  716                 vrele(vp);
  717                 return EINVAL;
  718         }
  719 
  720         sc->sc_mount = vp->v_mount;
  721         memcpy(sc->sc_mntname, sc->sc_mount->mnt_stat.f_mntonname, MNAMELEN);
  722 
  723         vrele(vp);
  724 
  725         /*
  726          * Check for file system internal snapshot.
  727          */
  728 
  729         error = namei_simple_user(fss->fss_bstore,
  730                                 NSM_FOLLOW_NOEMULROOT, &vp);
  731         if (error != 0)
  732                 return error;
  733 
  734         if (vp->v_type == VREG && vp->v_mount == sc->sc_mount) {
  735                 sc->sc_flags |= FSS_PERSISTENT;
  736                 sc->sc_bs_vp = vp;
  737 
  738                 fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize;
  739                 bits = sizeof(sc->sc_bs_bshift)*NBBY;
  740                 for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < bits;
  741                     sc->sc_bs_bshift++)
  742                         if (FSS_FSBSIZE(sc) == fsbsize)
  743                                 break;
  744                 if (sc->sc_bs_bshift >= bits)
  745                         return EINVAL;
  746 
  747                 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1;
  748                 sc->sc_clshift = 0;
  749 
  750                 if ((fss->fss_flags & FSS_UNLINK_ON_CREATE) != 0) {
  751                         error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE);
  752                         if (error)
  753                                 return error;
  754                 }
  755                 error = vn_lock(vp, LK_EXCLUSIVE);
  756                 if (error != 0)
  757                         return error;
  758                 error = VFS_SNAPSHOT(sc->sc_mount, sc->sc_bs_vp, &ts);
  759                 TIMESPEC_TO_TIMEVAL(&sc->sc_time, &ts);
  760 
  761                 VOP_UNLOCK(sc->sc_bs_vp);
  762 
  763                 return error;
  764         }
  765         vrele(vp);
  766 
  767         /*
  768          * Get the block device it is mounted on and its size.
  769          */
  770 
  771         error = spec_node_lookup_by_mount(sc->sc_mount, &vp);
  772         if (error)
  773                 return error;
  774         sc->sc_bdev = vp->v_rdev;
  775 
  776         error = getdisksize(vp, &numsec, &secsize);
  777         vrele(vp);
  778         if (error)
  779                 return error;
  780 
  781         *bsize = (off_t)numsec*secsize;
  782 
  783         /*
  784          * Get the backing store
  785          */
  786 
  787         error = pathbuf_copyin(fss->fss_bstore, &pb2);
  788         if (error) {
  789                 return error;
  790         }
  791         error = vn_open(NULL, pb2, 0, FREAD|FWRITE, 0, &vp2, NULL, NULL);
  792         if (error != 0) {
  793                 pathbuf_destroy(pb2);
  794                 return error;
  795         }
  796         VOP_UNLOCK(vp2);
  797 
  798         sc->sc_bs_vp = vp2;
  799 
  800         if (vp2->v_type != VREG && vp2->v_type != VCHR) {
  801                 vrele(vp2);
  802                 pathbuf_destroy(pb2);
  803                 return EINVAL;
  804         }
  805         pathbuf_destroy(pb2);
  806 
  807         if ((fss->fss_flags & FSS_UNLINK_ON_CREATE) != 0) {
  808                 error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE);
  809                 if (error)
  810                         return error;
  811         }
  812         if (sc->sc_bs_vp->v_type == VREG) {
  813                 fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize;
  814                 if (fsbsize & (fsbsize-1))      /* No power of two */
  815                         return EINVAL;
  816                 for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < 32;
  817                     sc->sc_bs_bshift++)
  818                         if (FSS_FSBSIZE(sc) == fsbsize)
  819                                 break;
  820                 if (sc->sc_bs_bshift >= 32)
  821                         return EINVAL;
  822                 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1;
  823         } else {
  824                 sc->sc_bs_bshift = DEV_BSHIFT;
  825                 sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1;
  826         }
  827 
  828         return 0;
  829 }
  830 
  831 /*
  832  * Create a snapshot.
  833  */
  834 static int
  835 fss_create_snapshot(struct fss_softc *sc, struct fss_set *fss, struct lwp *l)
  836 {
  837         int len, error;
  838         u_int32_t csize;
  839         off_t bsize;
  840 
  841         bsize = 0;      /* XXX gcc */
  842 
  843         /*
  844          * Open needed files.
  845          */
  846         if ((error = fss_create_files(sc, fss, &bsize, l)) != 0)
  847                 goto bad;
  848 
  849         if (sc->sc_flags & FSS_PERSISTENT) {
  850                 fss_softc_alloc(sc);
  851                 mutex_enter(&sc->sc_slock);
  852                 sc->sc_state = FSS_ACTIVE;
  853                 mutex_exit(&sc->sc_slock);
  854                 return 0;
  855         }
  856 
  857         /*
  858          * Set cluster size. Must be a power of two and
  859          * a multiple of backing store block size.
  860          */
  861         if (fss->fss_csize <= 0)
  862                 csize = MAXPHYS;
  863         else
  864                 csize = fss->fss_csize;
  865         if (bsize/csize > FSS_CLUSTER_MAX)
  866                 csize = bsize/FSS_CLUSTER_MAX+1;
  867 
  868         for (sc->sc_clshift = sc->sc_bs_bshift; sc->sc_clshift < 32;
  869             sc->sc_clshift++)
  870                 if (FSS_CLSIZE(sc) >= csize)
  871                         break;
  872         if (sc->sc_clshift >= 32) {
  873                 error = EINVAL;
  874                 goto bad;
  875         }
  876         sc->sc_clmask = FSS_CLSIZE(sc)-1;
  877 
  878         /*
  879          * Set number of cache slots.
  880          */
  881         if (FSS_CLSIZE(sc) <= 8192)
  882                 sc->sc_cache_size = 32;
  883         else if (FSS_CLSIZE(sc) <= 65536)
  884                 sc->sc_cache_size = 8;
  885         else
  886                 sc->sc_cache_size = 4;
  887 
  888         /*
  889          * Set number of clusters and size of last cluster.
  890          */
  891         sc->sc_clcount = FSS_BTOCL(sc, bsize-1)+1;
  892         sc->sc_clresid = FSS_CLOFF(sc, bsize-1)+1;
  893 
  894         /*
  895          * Set size of indirect table.
  896          */
  897         len = sc->sc_clcount*sizeof(u_int32_t);
  898         sc->sc_indir_size = FSS_BTOCL(sc, len)+1;
  899         sc->sc_clnext = sc->sc_indir_size;
  900         sc->sc_indir_cur = 0;
  901 
  902         if ((error = fss_softc_alloc(sc)) != 0)
  903                 goto bad;
  904 
  905         /*
  906          * Activate the snapshot.
  907          */
  908 
  909         if ((error = vfs_suspend(sc->sc_mount, 0)) != 0)
  910                 goto bad;
  911 
  912         microtime(&sc->sc_time);
  913 
  914         vrele_flush(sc->sc_mount);
  915         error = VFS_SYNC(sc->sc_mount, MNT_WAIT, curlwp->l_cred);
  916         if (error == 0)
  917                 error = fscow_establish(sc->sc_mount, fss_copy_on_write, sc);
  918         if (error == 0) {
  919                 mutex_enter(&sc->sc_slock);
  920                 sc->sc_state = FSS_ACTIVE;
  921                 mutex_exit(&sc->sc_slock);
  922         }
  923 
  924         vfs_resume(sc->sc_mount);
  925 
  926         if (error != 0)
  927                 goto bad;
  928 
  929         aprint_debug_dev(sc->sc_dev, "%s snapshot active\n", sc->sc_mntname);
  930         aprint_debug_dev(sc->sc_dev,
  931             "%u clusters of %u, %u cache slots, %u indir clusters\n",
  932             sc->sc_clcount, FSS_CLSIZE(sc),
  933             sc->sc_cache_size, sc->sc_indir_size);
  934 
  935         return 0;
  936 
  937 bad:
  938         fss_softc_free(sc);
  939         if (sc->sc_bs_vp != NULL) {
  940                 if (sc->sc_flags & FSS_PERSISTENT)
  941                         vrele(sc->sc_bs_vp);
  942                 else
  943                         vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_cred);
  944         }
  945         sc->sc_bs_vp = NULL;
  946 
  947         return error;
  948 }
  949 
  950 /*
  951  * Delete a snapshot.
  952  */
  953 static int
  954 fss_delete_snapshot(struct fss_softc *sc, struct lwp *l)
  955 {
  956 
  957         mutex_enter(&sc->sc_slock);
  958         if ((sc->sc_flags & FSS_PERSISTENT) == 0 &&
  959             (sc->sc_flags & FSS_ERROR) == 0) {
  960                 mutex_exit(&sc->sc_slock);
  961                 fscow_disestablish(sc->sc_mount, fss_copy_on_write, sc);
  962         } else {
  963                 mutex_exit(&sc->sc_slock);
  964         }
  965 
  966         fss_softc_free(sc);
  967         if (sc->sc_flags & FSS_PERSISTENT)
  968                 vrele(sc->sc_bs_vp);
  969         else
  970                 vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_cred);
  971 
  972         mutex_enter(&sc->sc_slock);
  973         sc->sc_state = FSS_IDLE;
  974         sc->sc_mount = NULL;
  975         sc->sc_bdev = NODEV;
  976         sc->sc_bs_vp = NULL;
  977         sc->sc_flags &= ~FSS_PERSISTENT;
  978         mutex_exit(&sc->sc_slock);
  979 
  980         return 0;
  981 }
  982 
  983 /*
  984  * Read a cluster from the snapshotted block device to the cache.
  985  */
  986 static int
  987 fss_read_cluster(struct fss_softc *sc, u_int32_t cl)
  988 {
  989         int error, todo, offset, len;
  990         daddr_t dblk;
  991         struct buf *bp, *mbp;
  992         struct fss_cache *scp, *scl;
  993 
  994         /*
  995          * Get a free cache slot.
  996          */
  997         scl = sc->sc_cache+sc->sc_cache_size;
  998 
  999         mutex_enter(&sc->sc_slock);
 1000 
 1001 restart:
 1002         if (isset(sc->sc_copied, cl) || sc->sc_state != FSS_ACTIVE) {
 1003                 mutex_exit(&sc->sc_slock);
 1004                 return 0;
 1005         }
 1006 
 1007         for (scp = sc->sc_cache; scp < scl; scp++) {
 1008                 if (scp->fc_type == FSS_CACHE_VALID) {
 1009                         if (scp->fc_cluster == cl) {
 1010                                 mutex_exit(&sc->sc_slock);
 1011                                 return 0;
 1012                         }
 1013                 } else if (scp->fc_type == FSS_CACHE_BUSY) {
 1014                         if (scp->fc_cluster == cl) {
 1015                                 cv_wait(&scp->fc_state_cv, &sc->sc_slock);
 1016                                 goto restart;
 1017                         }
 1018                 }
 1019         }
 1020 
 1021         for (scp = sc->sc_cache; scp < scl; scp++)
 1022                 if (scp->fc_type == FSS_CACHE_FREE) {
 1023                         scp->fc_type = FSS_CACHE_BUSY;
 1024                         scp->fc_cluster = cl;
 1025                         break;
 1026                 }
 1027         if (scp >= scl) {
 1028                 cv_wait(&sc->sc_cache_cv, &sc->sc_slock);
 1029                 goto restart;
 1030         }
 1031 
 1032         mutex_exit(&sc->sc_slock);
 1033 
 1034         /*
 1035          * Start the read.
 1036          */
 1037         dblk = btodb(FSS_CLTOB(sc, cl));
 1038         if (cl == sc->sc_clcount-1) {
 1039                 todo = sc->sc_clresid;
 1040                 memset((char *)scp->fc_data + todo, 0, FSS_CLSIZE(sc) - todo);
 1041         } else
 1042                 todo = FSS_CLSIZE(sc);
 1043         offset = 0;
 1044         mbp = getiobuf(NULL, true);
 1045         mbp->b_bufsize = todo;
 1046         mbp->b_data = scp->fc_data;
 1047         mbp->b_resid = mbp->b_bcount = todo;
 1048         mbp->b_flags = B_READ;
 1049         mbp->b_cflags = BC_BUSY;
 1050         mbp->b_dev = sc->sc_bdev;
 1051         while (todo > 0) {
 1052                 len = todo;
 1053                 if (len > MAXPHYS)
 1054                         len = MAXPHYS;
 1055                 if (btodb(FSS_CLTOB(sc, cl)) == dblk && len == todo)
 1056                         bp = mbp;
 1057                 else {
 1058                         bp = getiobuf(NULL, true);
 1059                         nestiobuf_setup(mbp, bp, offset, len);
 1060                 }
 1061                 bp->b_lblkno = 0;
 1062                 bp->b_blkno = dblk;
 1063                 bdev_strategy(bp);
 1064                 dblk += btodb(len);
 1065                 offset += len;
 1066                 todo -= len;
 1067         }
 1068         error = biowait(mbp);
 1069         if (error == 0 && mbp->b_resid != 0)
 1070                 error = EIO;
 1071         putiobuf(mbp);
 1072 
 1073         mutex_enter(&sc->sc_slock);
 1074         scp->fc_type = (error ? FSS_CACHE_FREE : FSS_CACHE_VALID);
 1075         cv_broadcast(&scp->fc_state_cv);
 1076         if (error == 0) {
 1077                 setbit(sc->sc_copied, scp->fc_cluster);
 1078                 cv_signal(&sc->sc_work_cv);
 1079         }
 1080         mutex_exit(&sc->sc_slock);
 1081 
 1082         return error;
 1083 }
 1084 
 1085 /*
 1086  * Read/write clusters from/to backing store.
 1087  * For persistent snapshots must be called with cl == 0. off is the
 1088  * offset into the snapshot.
 1089  */
 1090 static int
 1091 fss_bs_io(struct fss_softc *sc, fss_io_type rw,
 1092     u_int32_t cl, off_t off, int len, void *data, size_t *resid)
 1093 {
 1094         int error;
 1095 
 1096         off += FSS_CLTOB(sc, cl);
 1097 
 1098         vn_lock(sc->sc_bs_vp, LK_EXCLUSIVE|LK_RETRY);
 1099 
 1100         error = vn_rdwr((rw == FSS_READ ? UIO_READ : UIO_WRITE), sc->sc_bs_vp,
 1101             data, len, off, UIO_SYSSPACE,
 1102             IO_ADV_ENCODE(POSIX_FADV_NOREUSE) | IO_NODELOCKED,
 1103             sc->sc_bs_lwp->l_cred, resid, NULL);
 1104         if (error == 0) {
 1105                 rw_enter(sc->sc_bs_vp->v_uobj.vmobjlock, RW_WRITER);
 1106                 error = VOP_PUTPAGES(sc->sc_bs_vp, trunc_page(off),
 1107                     round_page(off+len), PGO_CLEANIT | PGO_FREE | PGO_SYNCIO);
 1108         }
 1109 
 1110         VOP_UNLOCK(sc->sc_bs_vp);
 1111 
 1112         return error;
 1113 }
 1114 
 1115 /*
 1116  * Get a pointer to the indirect slot for this cluster.
 1117  */
 1118 static u_int32_t *
 1119 fss_bs_indir(struct fss_softc *sc, u_int32_t cl)
 1120 {
 1121         u_int32_t icl;
 1122         int ioff;
 1123 
 1124         icl = cl/(FSS_CLSIZE(sc)/sizeof(u_int32_t));
 1125         ioff = cl%(FSS_CLSIZE(sc)/sizeof(u_int32_t));
 1126 
 1127         if (sc->sc_indir_cur == icl)
 1128                 return &sc->sc_indir_data[ioff];
 1129 
 1130         if (sc->sc_indir_dirty) {
 1131                 if (fss_bs_io(sc, FSS_WRITE, sc->sc_indir_cur, 0,
 1132                     FSS_CLSIZE(sc), (void *)sc->sc_indir_data, NULL) != 0)
 1133                         return NULL;
 1134                 setbit(sc->sc_indir_valid, sc->sc_indir_cur);
 1135         }
 1136 
 1137         sc->sc_indir_dirty = 0;
 1138         sc->sc_indir_cur = icl;
 1139 
 1140         if (isset(sc->sc_indir_valid, sc->sc_indir_cur)) {
 1141                 if (fss_bs_io(sc, FSS_READ, sc->sc_indir_cur, 0,
 1142                     FSS_CLSIZE(sc), (void *)sc->sc_indir_data, NULL) != 0)
 1143                         return NULL;
 1144         } else
 1145                 memset(sc->sc_indir_data, 0, FSS_CLSIZE(sc));
 1146 
 1147         return &sc->sc_indir_data[ioff];
 1148 }
 1149 
 1150 /*
 1151  * The kernel thread (one for every active snapshot).
 1152  *
 1153  * After wakeup it cleans the cache and runs the I/O requests.
 1154  */
 1155 static void
 1156 fss_bs_thread(void *arg)
 1157 {
 1158         bool thread_idle, is_valid;
 1159         int error, i, todo, len, crotor, is_read;
 1160         long off;
 1161         char *addr;
 1162         u_int32_t c, cl, ch, *indirp;
 1163         size_t resid;
 1164         struct buf *bp, *nbp;
 1165         struct fss_softc *sc;
 1166         struct fss_cache *scp, *scl;
 1167 
 1168         sc = arg;
 1169         scl = sc->sc_cache+sc->sc_cache_size;
 1170         crotor = 0;
 1171         thread_idle = false;
 1172 
 1173         mutex_enter(&sc->sc_slock);
 1174 
 1175         for (;;) {
 1176                 if (thread_idle)
 1177                         cv_wait(&sc->sc_work_cv, &sc->sc_slock);
 1178                 thread_idle = true;
 1179                 if ((sc->sc_flags & FSS_BS_THREAD) == 0) {
 1180                         mutex_exit(&sc->sc_slock);
 1181                         kthread_exit(0);
 1182                 }
 1183 
 1184                 /*
 1185                  * Process I/O requests (persistent)
 1186                  */
 1187 
 1188                 if (sc->sc_flags & FSS_PERSISTENT) {
 1189                         if ((bp = bufq_get(sc->sc_bufq)) == NULL)
 1190                                 continue;
 1191                         is_valid = (sc->sc_state == FSS_ACTIVE);
 1192                         is_read = (bp->b_flags & B_READ);
 1193                         thread_idle = false;
 1194                         mutex_exit(&sc->sc_slock);
 1195 
 1196                         if (is_valid) {
 1197                                 disk_busy(sc->sc_dkdev);
 1198                                 error = fss_bs_io(sc, FSS_READ, 0,
 1199                                     dbtob(bp->b_blkno), bp->b_bcount,
 1200                                     bp->b_data, &resid);
 1201                                 if (error)
 1202                                         resid = bp->b_bcount;
 1203                                 disk_unbusy(sc->sc_dkdev,
 1204                                     (error ? 0 : bp->b_bcount), is_read);
 1205                         } else {
 1206                                 error = ENXIO;
 1207                                 resid = bp->b_bcount;
 1208                         }
 1209 
 1210                         bp->b_error = error;
 1211                         bp->b_resid = resid;
 1212                         biodone(bp);
 1213 
 1214                         mutex_enter(&sc->sc_slock);
 1215                         continue;
 1216                 }
 1217 
 1218                 /*
 1219                  * Clean the cache
 1220                  */
 1221                 for (i = 0; i < sc->sc_cache_size; i++) {
 1222                         crotor = (crotor + 1) % sc->sc_cache_size;
 1223                         scp = sc->sc_cache + crotor;
 1224                         if (scp->fc_type != FSS_CACHE_VALID)
 1225                                 continue;
 1226                         mutex_exit(&sc->sc_slock);
 1227 
 1228                         thread_idle = false;
 1229                         indirp = fss_bs_indir(sc, scp->fc_cluster);
 1230                         if (indirp != NULL) {
 1231                                 error = fss_bs_io(sc, FSS_WRITE, sc->sc_clnext,
 1232                                     0, FSS_CLSIZE(sc), scp->fc_data, NULL);
 1233                         } else
 1234                                 error = EIO;
 1235 
 1236                         mutex_enter(&sc->sc_slock);
 1237                         if (error == 0) {
 1238                                 *indirp = sc->sc_clnext++;
 1239                                 sc->sc_indir_dirty = 1;
 1240                         } else
 1241                                 fss_error(sc, "write error on backing store");
 1242 
 1243                         scp->fc_type = FSS_CACHE_FREE;
 1244                         cv_broadcast(&sc->sc_cache_cv);
 1245                         break;
 1246                 }
 1247 
 1248                 /*
 1249                  * Process I/O requests
 1250                  */
 1251                 if ((bp = bufq_get(sc->sc_bufq)) == NULL)
 1252                         continue;
 1253                 is_valid = (sc->sc_state == FSS_ACTIVE);
 1254                 is_read = (bp->b_flags & B_READ);
 1255                 thread_idle = false;
 1256 
 1257                 if (!is_valid) {
 1258                         mutex_exit(&sc->sc_slock);
 1259 
 1260                         bp->b_error = ENXIO;
 1261                         bp->b_resid = bp->b_bcount;
 1262                         biodone(bp);
 1263 
 1264                         mutex_enter(&sc->sc_slock);
 1265                         continue;
 1266                 }
 1267 
 1268                 disk_busy(sc->sc_dkdev);
 1269 
 1270                 /*
 1271                  * First read from the snapshotted block device unless
 1272                  * this request is completely covered by backing store.
 1273                  */
 1274 
 1275                 cl = FSS_BTOCL(sc, dbtob(bp->b_blkno));
 1276                 off = FSS_CLOFF(sc, dbtob(bp->b_blkno));
 1277                 ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1);
 1278                 error = 0;
 1279                 bp->b_resid = 0;
 1280                 bp->b_error = 0;
 1281                 for (c = cl; c <= ch; c++) {
 1282                         if (isset(sc->sc_copied, c))
 1283                                 continue;
 1284                         mutex_exit(&sc->sc_slock);
 1285 
 1286                         /* Not on backing store, read from device. */
 1287                         nbp = getiobuf(NULL, true);
 1288                         nbp->b_flags = B_READ;
 1289                         nbp->b_resid = nbp->b_bcount = bp->b_bcount;
 1290                         nbp->b_bufsize = bp->b_bcount;
 1291                         nbp->b_data = bp->b_data;
 1292                         nbp->b_blkno = bp->b_blkno;
 1293                         nbp->b_lblkno = 0;
 1294                         nbp->b_dev = sc->sc_bdev;
 1295                         SET(nbp->b_cflags, BC_BUSY);    /* mark buffer busy */
 1296 
 1297                         bdev_strategy(nbp);
 1298 
 1299                         error = biowait(nbp);
 1300                         if (error == 0 && nbp->b_resid != 0)
 1301                                 error = EIO;
 1302                         if (error != 0) {
 1303                                 bp->b_resid = bp->b_bcount;
 1304                                 bp->b_error = nbp->b_error;
 1305                                 disk_unbusy(sc->sc_dkdev, 0, is_read);
 1306                                 biodone(bp);
 1307                         }
 1308                         putiobuf(nbp);
 1309 
 1310                         mutex_enter(&sc->sc_slock);
 1311                         break;
 1312                 }
 1313                 if (error)
 1314                         continue;
 1315 
 1316                 /*
 1317                  * Replace those parts that have been saved to backing store.
 1318                  */
 1319 
 1320                 addr = bp->b_data;
 1321                 todo = bp->b_bcount;
 1322                 for (c = cl; c <= ch; c++, off = 0, todo -= len, addr += len) {
 1323                         len = FSS_CLSIZE(sc)-off;
 1324                         if (len > todo)
 1325                                 len = todo;
 1326                         if (isclr(sc->sc_copied, c))
 1327                                 continue;
 1328                         mutex_exit(&sc->sc_slock);
 1329 
 1330                         indirp = fss_bs_indir(sc, c);
 1331                         if (indirp == NULL || *indirp == 0) {
 1332                                 /*
 1333                                  * Not on backing store. Either in cache
 1334                                  * or hole in the snapshotted block device.
 1335                                  */
 1336 
 1337                                 mutex_enter(&sc->sc_slock);
 1338                                 for (scp = sc->sc_cache; scp < scl; scp++)
 1339                                         if (scp->fc_type == FSS_CACHE_VALID &&
 1340                                             scp->fc_cluster == c)
 1341                                                 break;
 1342                                 if (scp < scl)
 1343                                         memcpy(addr, (char *)scp->fc_data+off,
 1344                                             len);
 1345                                 else
 1346                                         memset(addr, 0, len);
 1347                                 continue;
 1348                         }
 1349 
 1350                         /*
 1351                          * Read from backing store.
 1352                          */
 1353                         error = fss_bs_io(sc, FSS_READ,
 1354                             *indirp, off, len, addr, NULL);
 1355 
 1356                         mutex_enter(&sc->sc_slock);
 1357                         if (error) {
 1358                                 bp->b_resid = bp->b_bcount;
 1359                                 bp->b_error = error;
 1360                                 break;
 1361                         }
 1362                 }
 1363                 mutex_exit(&sc->sc_slock);
 1364 
 1365                 disk_unbusy(sc->sc_dkdev, (error ? 0 : bp->b_bcount), is_read);
 1366                 biodone(bp);
 1367 
 1368                 mutex_enter(&sc->sc_slock);
 1369         }
 1370 }
 1371 
 1372 #ifdef _MODULE
 1373 
 1374 #include <sys/module.h>
 1375 
 1376 MODULE(MODULE_CLASS_DRIVER, fss, "bufq_fcfs");
 1377 CFDRIVER_DECL(fss, DV_DISK, NULL);
 1378 
 1379 devmajor_t fss_bmajor = -1, fss_cmajor = -1;
 1380 
 1381 static int
 1382 fss_modcmd(modcmd_t cmd, void *arg)
 1383 {
 1384         int error = 0;
 1385 
 1386         switch (cmd) {
 1387         case MODULE_CMD_INIT:
 1388                 mutex_init(&fss_device_lock, MUTEX_DEFAULT, IPL_NONE);
 1389                 cv_init(&fss_device_cv, "snapwait");
 1390 
 1391                 error = devsw_attach(fss_cd.cd_name,
 1392                     &fss_bdevsw, &fss_bmajor, &fss_cdevsw, &fss_cmajor);
 1393                 if (error) {
 1394                         mutex_destroy(&fss_device_lock);
 1395                         break;
 1396                 }
 1397 
 1398                 error = config_cfdriver_attach(&fss_cd);
 1399                 if (error) {
 1400                         devsw_detach(&fss_bdevsw, &fss_cdevsw);
 1401                         mutex_destroy(&fss_device_lock);
 1402                         break;
 1403                 }
 1404 
 1405                 error = config_cfattach_attach(fss_cd.cd_name, &fss_ca);
 1406                 if (error) {
 1407                         config_cfdriver_detach(&fss_cd);
 1408                         devsw_detach(&fss_bdevsw, &fss_cdevsw);
 1409                         mutex_destroy(&fss_device_lock);
 1410                         break;
 1411                 }
 1412 
 1413                 break;
 1414 
 1415         case MODULE_CMD_FINI:
 1416                 error = config_cfattach_detach(fss_cd.cd_name, &fss_ca);
 1417                 if (error) {
 1418                         break;
 1419                 }
 1420                 error = config_cfdriver_detach(&fss_cd);
 1421                 if (error) {
 1422                         config_cfattach_attach(fss_cd.cd_name, &fss_ca);
 1423                         break;
 1424                 }
 1425                 devsw_detach(&fss_bdevsw, &fss_cdevsw);
 1426                 cv_destroy(&fss_device_cv);
 1427                 mutex_destroy(&fss_device_lock);
 1428                 break;
 1429 
 1430         default:
 1431                 error = ENOTTY;
 1432                 break;
 1433         }
 1434 
 1435         return error;
 1436 }
 1437 
 1438 #endif /* _MODULE */

Cache object: 60df53d48215c33eb8710ec69702c3bd


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.