The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/virtio/block/virtio_blk.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice unmodified, this list of conditions, and the following
   12  *    disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   27  */
   28 
   29 /* Driver for VirtIO block devices. */
   30 
   31 #include <sys/cdefs.h>
   32 __FBSDID("$FreeBSD$");
   33 
   34 #include <sys/param.h>
   35 #include <sys/systm.h>
   36 #include <sys/kernel.h>
   37 #include <sys/bio.h>
   38 #include <sys/malloc.h>
   39 #include <sys/module.h>
   40 #include <sys/msan.h>
   41 #include <sys/sglist.h>
   42 #include <sys/sysctl.h>
   43 #include <sys/lock.h>
   44 #include <sys/mutex.h>
   45 #include <sys/queue.h>
   46 
   47 #include <geom/geom.h>
   48 #include <geom/geom_disk.h>
   49 
   50 #include <machine/bus.h>
   51 #include <machine/resource.h>
   52 #include <sys/bus.h>
   53 #include <sys/rman.h>
   54 
   55 #include <dev/virtio/virtio.h>
   56 #include <dev/virtio/virtqueue.h>
   57 #include <dev/virtio/block/virtio_blk.h>
   58 
   59 #include "virtio_if.h"
   60 
   61 struct vtblk_request {
   62         struct vtblk_softc              *vbr_sc;
   63         bus_dmamap_t                     vbr_mapp;
   64 
   65         /* Fields after this point are zeroed for each request. */
   66         struct virtio_blk_outhdr         vbr_hdr;
   67         struct bio                      *vbr_bp;
   68         uint8_t                          vbr_ack;
   69         uint8_t                          vbr_requeue_on_error;
   70         uint8_t                          vbr_busdma_wait;
   71         int                              vbr_error;
   72         TAILQ_ENTRY(vtblk_request)       vbr_link;
   73 };
   74 
   75 enum vtblk_cache_mode {
   76         VTBLK_CACHE_WRITETHROUGH,
   77         VTBLK_CACHE_WRITEBACK,
   78         VTBLK_CACHE_MAX
   79 };
   80 
   81 struct vtblk_softc {
   82         device_t                 vtblk_dev;
   83         struct mtx               vtblk_mtx;
   84         uint64_t                 vtblk_features;
   85         uint32_t                 vtblk_flags;
   86 #define VTBLK_FLAG_INDIRECT     0x0001
   87 #define VTBLK_FLAG_DETACH       0x0002
   88 #define VTBLK_FLAG_SUSPEND      0x0004
   89 #define VTBLK_FLAG_BARRIER      0x0008
   90 #define VTBLK_FLAG_WCE_CONFIG   0x0010
   91 #define VTBLK_FLAG_BUSDMA_WAIT  0x0020
   92 #define VTBLK_FLAG_BUSDMA_ALIGN 0x0040
   93 
   94         struct virtqueue        *vtblk_vq;
   95         struct sglist           *vtblk_sglist;
   96         bus_dma_tag_t            vtblk_dmat;
   97         struct disk             *vtblk_disk;
   98 
   99         struct bio_queue_head    vtblk_bioq;
  100         TAILQ_HEAD(, vtblk_request)
  101                                  vtblk_req_free;
  102         TAILQ_HEAD(, vtblk_request)
  103                                  vtblk_req_ready;
  104         struct vtblk_request    *vtblk_req_ordered;
  105 
  106         int                      vtblk_max_nsegs;
  107         int                      vtblk_request_count;
  108         enum vtblk_cache_mode    vtblk_write_cache;
  109 
  110         struct bio_queue         vtblk_dump_queue;
  111         struct vtblk_request     vtblk_dump_request;
  112 };
  113 
  114 static struct virtio_feature_desc vtblk_feature_desc[] = {
  115         { VIRTIO_BLK_F_BARRIER,         "HostBarrier"   },
  116         { VIRTIO_BLK_F_SIZE_MAX,        "MaxSegSize"    },
  117         { VIRTIO_BLK_F_SEG_MAX,         "MaxNumSegs"    },
  118         { VIRTIO_BLK_F_GEOMETRY,        "DiskGeometry"  },
  119         { VIRTIO_BLK_F_RO,              "ReadOnly"      },
  120         { VIRTIO_BLK_F_BLK_SIZE,        "BlockSize"     },
  121         { VIRTIO_BLK_F_SCSI,            "SCSICmds"      },
  122         { VIRTIO_BLK_F_FLUSH,           "FlushCmd"      },
  123         { VIRTIO_BLK_F_TOPOLOGY,        "Topology"      },
  124         { VIRTIO_BLK_F_CONFIG_WCE,      "ConfigWCE"     },
  125         { VIRTIO_BLK_F_MQ,              "Multiqueue"    },
  126         { VIRTIO_BLK_F_DISCARD,         "Discard"       },
  127         { VIRTIO_BLK_F_WRITE_ZEROES,    "WriteZeros"    },
  128 
  129         { 0, NULL }
  130 };
  131 
  132 static int      vtblk_modevent(module_t, int, void *);
  133 
  134 static int      vtblk_probe(device_t);
  135 static int      vtblk_attach(device_t);
  136 static int      vtblk_detach(device_t);
  137 static int      vtblk_suspend(device_t);
  138 static int      vtblk_resume(device_t);
  139 static int      vtblk_shutdown(device_t);
  140 static int      vtblk_attach_completed(device_t);
  141 static int      vtblk_config_change(device_t);
  142 
  143 static int      vtblk_open(struct disk *);
  144 static int      vtblk_close(struct disk *);
  145 static int      vtblk_ioctl(struct disk *, u_long, void *, int,
  146                     struct thread *);
  147 static int      vtblk_dump(void *, void *, off_t, size_t);
  148 static void     vtblk_strategy(struct bio *);
  149 
  150 static int      vtblk_negotiate_features(struct vtblk_softc *);
  151 static int      vtblk_setup_features(struct vtblk_softc *);
  152 static int      vtblk_maximum_segments(struct vtblk_softc *,
  153                     struct virtio_blk_config *);
  154 static int      vtblk_alloc_virtqueue(struct vtblk_softc *);
  155 static void     vtblk_resize_disk(struct vtblk_softc *, uint64_t);
  156 static void     vtblk_alloc_disk(struct vtblk_softc *,
  157                     struct virtio_blk_config *);
  158 static void     vtblk_create_disk(struct vtblk_softc *);
  159 
  160 static int      vtblk_request_prealloc(struct vtblk_softc *);
  161 static void     vtblk_request_free(struct vtblk_softc *);
  162 static struct vtblk_request *
  163                 vtblk_request_dequeue(struct vtblk_softc *);
  164 static void     vtblk_request_enqueue(struct vtblk_softc *,
  165                     struct vtblk_request *);
  166 static struct vtblk_request *
  167                 vtblk_request_next_ready(struct vtblk_softc *);
  168 static void     vtblk_request_requeue_ready(struct vtblk_softc *,
  169                     struct vtblk_request *);
  170 static struct vtblk_request *
  171                 vtblk_request_next(struct vtblk_softc *);
  172 static struct vtblk_request *
  173                 vtblk_request_bio(struct vtblk_softc *);
  174 static int      vtblk_request_execute(struct vtblk_request *, int);
  175 static void     vtblk_request_execute_cb(void *,
  176                     bus_dma_segment_t *, int, int);
  177 static int      vtblk_request_error(struct vtblk_request *);
  178 
  179 static void     vtblk_queue_completed(struct vtblk_softc *,
  180                     struct bio_queue *);
  181 static void     vtblk_done_completed(struct vtblk_softc *,
  182                     struct bio_queue *);
  183 static void     vtblk_drain_vq(struct vtblk_softc *);
  184 static void     vtblk_drain(struct vtblk_softc *);
  185 
  186 static void     vtblk_startio(struct vtblk_softc *);
  187 static void     vtblk_bio_done(struct vtblk_softc *, struct bio *, int);
  188 
  189 static void     vtblk_read_config(struct vtblk_softc *,
  190                     struct virtio_blk_config *);
  191 static void     vtblk_ident(struct vtblk_softc *);
  192 static int      vtblk_poll_request(struct vtblk_softc *,
  193                     struct vtblk_request *);
  194 static int      vtblk_quiesce(struct vtblk_softc *);
  195 static void     vtblk_vq_intr(void *);
  196 static void     vtblk_stop(struct vtblk_softc *);
  197 
  198 static void     vtblk_dump_quiesce(struct vtblk_softc *);
  199 static int      vtblk_dump_write(struct vtblk_softc *, void *, off_t, size_t);
  200 static int      vtblk_dump_flush(struct vtblk_softc *);
  201 static void     vtblk_dump_complete(struct vtblk_softc *);
  202 
  203 static void     vtblk_set_write_cache(struct vtblk_softc *, int);
  204 static int      vtblk_write_cache_enabled(struct vtblk_softc *sc,
  205                     struct virtio_blk_config *);
  206 static int      vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
  207 
  208 static void     vtblk_setup_sysctl(struct vtblk_softc *);
  209 static int      vtblk_tunable_int(struct vtblk_softc *, const char *, int);
  210 
  211 #define vtblk_modern(_sc) (((_sc)->vtblk_features & VIRTIO_F_VERSION_1) != 0)
  212 #define vtblk_htog16(_sc, _val) virtio_htog16(vtblk_modern(_sc), _val)
  213 #define vtblk_htog32(_sc, _val) virtio_htog32(vtblk_modern(_sc), _val)
  214 #define vtblk_htog64(_sc, _val) virtio_htog64(vtblk_modern(_sc), _val)
  215 #define vtblk_gtoh16(_sc, _val) virtio_gtoh16(vtblk_modern(_sc), _val)
  216 #define vtblk_gtoh32(_sc, _val) virtio_gtoh32(vtblk_modern(_sc), _val)
  217 #define vtblk_gtoh64(_sc, _val) virtio_gtoh64(vtblk_modern(_sc), _val)
  218 
  219 /* Tunables. */
  220 static int vtblk_no_ident = 0;
  221 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
  222 static int vtblk_writecache_mode = -1;
  223 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
  224 
  225 #define VTBLK_COMMON_FEATURES \
  226     (VIRTIO_BLK_F_SIZE_MAX              | \
  227      VIRTIO_BLK_F_SEG_MAX               | \
  228      VIRTIO_BLK_F_GEOMETRY              | \
  229      VIRTIO_BLK_F_RO                    | \
  230      VIRTIO_BLK_F_BLK_SIZE              | \
  231      VIRTIO_BLK_F_FLUSH                 | \
  232      VIRTIO_BLK_F_TOPOLOGY              | \
  233      VIRTIO_BLK_F_CONFIG_WCE            | \
  234      VIRTIO_BLK_F_DISCARD               | \
  235      VIRTIO_RING_F_INDIRECT_DESC)
  236 
  237 #define VTBLK_MODERN_FEATURES   (VTBLK_COMMON_FEATURES)
  238 #define VTBLK_LEGACY_FEATURES   (VIRTIO_BLK_F_BARRIER | VTBLK_COMMON_FEATURES)
  239 
  240 #define VTBLK_MTX(_sc)          &(_sc)->vtblk_mtx
  241 #define VTBLK_LOCK_INIT(_sc, _name) \
  242                                 mtx_init(VTBLK_MTX((_sc)), (_name), \
  243                                     "VirtIO Block Lock", MTX_DEF)
  244 #define VTBLK_LOCK(_sc)         mtx_lock(VTBLK_MTX((_sc)))
  245 #define VTBLK_UNLOCK(_sc)       mtx_unlock(VTBLK_MTX((_sc)))
  246 #define VTBLK_LOCK_DESTROY(_sc) mtx_destroy(VTBLK_MTX((_sc)))
  247 #define VTBLK_LOCK_ASSERT(_sc)  mtx_assert(VTBLK_MTX((_sc)), MA_OWNED)
  248 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
  249                                 mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
  250 
  251 #define VTBLK_DISK_NAME         "vtbd"
  252 #define VTBLK_QUIESCE_TIMEOUT   (30 * hz)
  253 #define VTBLK_BSIZE             512
  254 
  255 /*
  256  * Each block request uses at least two segments - one for the header
  257  * and one for the status.
  258  */
  259 #define VTBLK_MIN_SEGMENTS      2
  260 
  261 static device_method_t vtblk_methods[] = {
  262         /* Device methods. */
  263         DEVMETHOD(device_probe,         vtblk_probe),
  264         DEVMETHOD(device_attach,        vtblk_attach),
  265         DEVMETHOD(device_detach,        vtblk_detach),
  266         DEVMETHOD(device_suspend,       vtblk_suspend),
  267         DEVMETHOD(device_resume,        vtblk_resume),
  268         DEVMETHOD(device_shutdown,      vtblk_shutdown),
  269 
  270         /* VirtIO methods. */
  271         DEVMETHOD(virtio_attach_completed, vtblk_attach_completed),
  272         DEVMETHOD(virtio_config_change, vtblk_config_change),
  273 
  274         DEVMETHOD_END
  275 };
  276 
  277 static driver_t vtblk_driver = {
  278         "vtblk",
  279         vtblk_methods,
  280         sizeof(struct vtblk_softc)
  281 };
  282 
  283 VIRTIO_DRIVER_MODULE(virtio_blk, vtblk_driver, vtblk_modevent, NULL);
  284 MODULE_VERSION(virtio_blk, 1);
  285 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
  286 
  287 VIRTIO_SIMPLE_PNPINFO(virtio_blk, VIRTIO_ID_BLOCK, "VirtIO Block Adapter");
  288 
  289 static int
  290 vtblk_modevent(module_t mod, int type, void *unused)
  291 {
  292         int error;
  293 
  294         error = 0;
  295 
  296         switch (type) {
  297         case MOD_LOAD:
  298         case MOD_QUIESCE:
  299         case MOD_UNLOAD:
  300         case MOD_SHUTDOWN:
  301                 break;
  302         default:
  303                 error = EOPNOTSUPP;
  304                 break;
  305         }
  306 
  307         return (error);
  308 }
  309 
  310 static int
  311 vtblk_probe(device_t dev)
  312 {
  313         return (VIRTIO_SIMPLE_PROBE(dev, virtio_blk));
  314 }
  315 
  316 static int
  317 vtblk_attach(device_t dev)
  318 {
  319         struct vtblk_softc *sc;
  320         struct virtio_blk_config blkcfg;
  321         int error;
  322 
  323         sc = device_get_softc(dev);
  324         sc->vtblk_dev = dev;
  325         virtio_set_feature_desc(dev, vtblk_feature_desc);
  326 
  327         VTBLK_LOCK_INIT(sc, device_get_nameunit(dev));
  328         bioq_init(&sc->vtblk_bioq);
  329         TAILQ_INIT(&sc->vtblk_dump_queue);
  330         TAILQ_INIT(&sc->vtblk_req_free);
  331         TAILQ_INIT(&sc->vtblk_req_ready);
  332 
  333         vtblk_setup_sysctl(sc);
  334 
  335         error = vtblk_setup_features(sc);
  336         if (error) {
  337                 device_printf(dev, "cannot setup features\n");
  338                 goto fail;
  339         }
  340 
  341         vtblk_read_config(sc, &blkcfg);
  342 
  343         /*
  344          * With the current sglist(9) implementation, it is not easy
  345          * for us to support a maximum segment size as adjacent
  346          * segments are coalesced. For now, just make sure it's larger
  347          * than the maximum supported transfer size.
  348          */
  349         if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
  350                 if (blkcfg.size_max < maxphys) {
  351                         error = ENOTSUP;
  352                         device_printf(dev, "host requires unsupported "
  353                             "maximum segment size feature\n");
  354                         goto fail;
  355                 }
  356         }
  357 
  358         sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
  359         if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
  360                 error = EINVAL;
  361                 device_printf(dev, "fewer than minimum number of segments "
  362                     "allowed: %d\n", sc->vtblk_max_nsegs);
  363                 goto fail;
  364         }
  365 
  366         sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
  367         if (sc->vtblk_sglist == NULL) {
  368                 error = ENOMEM;
  369                 device_printf(dev, "cannot allocate sglist\n");
  370                 goto fail;
  371         }
  372 
  373         /*
  374          * If vtblk_max_nsegs == VTBLK_MIN_SEGMENTS + 1, the device only
  375          * supports a single data segment; in that case we need busdma to
  376          * align to a page boundary so we can send a *contiguous* page size
  377          * request to the host.
  378          */
  379         if (sc->vtblk_max_nsegs == VTBLK_MIN_SEGMENTS + 1)
  380                 sc->vtblk_flags |= VTBLK_FLAG_BUSDMA_ALIGN;
  381         error = bus_dma_tag_create(
  382             bus_get_dma_tag(dev),                       /* parent */
  383             (sc->vtblk_flags & VTBLK_FLAG_BUSDMA_ALIGN) ? PAGE_SIZE : 1,
  384             0,                                          /* boundary */
  385             BUS_SPACE_MAXADDR,                          /* lowaddr */
  386             BUS_SPACE_MAXADDR,                          /* highaddr */
  387             NULL, NULL,                                 /* filter, filterarg */
  388             maxphys,                                    /* max request size */
  389             sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS,   /* max # segments */
  390             maxphys,                                    /* maxsegsize */
  391             0,                                          /* flags */
  392             busdma_lock_mutex,                          /* lockfunc */
  393             &sc->vtblk_mtx,                             /* lockarg */
  394             &sc->vtblk_dmat);
  395         if (error) {
  396                 device_printf(dev, "cannot create bus dma tag\n");
  397                 goto fail;
  398         }
  399 
  400 #ifdef __powerpc__
  401         /*
  402          * Virtio uses physical addresses rather than bus addresses, so we
  403          * need to ask busdma to skip the iommu physical->bus mapping.  At
  404          * present, this is only a thing on the powerpc architectures.
  405          */
  406         bus_dma_tag_set_iommu(sc->vtblk_dmat, NULL, NULL);
  407 #endif
  408 
  409         error = vtblk_alloc_virtqueue(sc);
  410         if (error) {
  411                 device_printf(dev, "cannot allocate virtqueue\n");
  412                 goto fail;
  413         }
  414 
  415         error = vtblk_request_prealloc(sc);
  416         if (error) {
  417                 device_printf(dev, "cannot preallocate requests\n");
  418                 goto fail;
  419         }
  420 
  421         vtblk_alloc_disk(sc, &blkcfg);
  422 
  423         error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY);
  424         if (error) {
  425                 device_printf(dev, "cannot setup virtqueue interrupt\n");
  426                 goto fail;
  427         }
  428 
  429         virtqueue_enable_intr(sc->vtblk_vq);
  430 
  431 fail:
  432         if (error)
  433                 vtblk_detach(dev);
  434 
  435         return (error);
  436 }
  437 
  438 static int
  439 vtblk_detach(device_t dev)
  440 {
  441         struct vtblk_softc *sc;
  442 
  443         sc = device_get_softc(dev);
  444 
  445         VTBLK_LOCK(sc);
  446         sc->vtblk_flags |= VTBLK_FLAG_DETACH;
  447         if (device_is_attached(dev))
  448                 vtblk_stop(sc);
  449         VTBLK_UNLOCK(sc);
  450 
  451         vtblk_drain(sc);
  452 
  453         if (sc->vtblk_disk != NULL) {
  454                 disk_destroy(sc->vtblk_disk);
  455                 sc->vtblk_disk = NULL;
  456         }
  457 
  458         if (sc->vtblk_dmat != NULL) {
  459                 bus_dma_tag_destroy(sc->vtblk_dmat);
  460                 sc->vtblk_dmat = NULL;
  461         }
  462 
  463         if (sc->vtblk_sglist != NULL) {
  464                 sglist_free(sc->vtblk_sglist);
  465                 sc->vtblk_sglist = NULL;
  466         }
  467 
  468         VTBLK_LOCK_DESTROY(sc);
  469 
  470         return (0);
  471 }
  472 
  473 static int
  474 vtblk_suspend(device_t dev)
  475 {
  476         struct vtblk_softc *sc;
  477         int error;
  478 
  479         sc = device_get_softc(dev);
  480 
  481         VTBLK_LOCK(sc);
  482         sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
  483         /* XXX BMV: virtio_stop(), etc needed here? */
  484         error = vtblk_quiesce(sc);
  485         if (error)
  486                 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
  487         VTBLK_UNLOCK(sc);
  488 
  489         return (error);
  490 }
  491 
  492 static int
  493 vtblk_resume(device_t dev)
  494 {
  495         struct vtblk_softc *sc;
  496 
  497         sc = device_get_softc(dev);
  498 
  499         VTBLK_LOCK(sc);
  500         /* XXX BMV: virtio_reinit(), etc needed here? */
  501         sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
  502         vtblk_startio(sc);
  503         VTBLK_UNLOCK(sc);
  504 
  505         return (0);
  506 }
  507 
  508 static int
  509 vtblk_shutdown(device_t dev)
  510 {
  511 
  512         return (0);
  513 }
  514 
  515 static int
  516 vtblk_attach_completed(device_t dev)
  517 {
  518         struct vtblk_softc *sc;
  519 
  520         sc = device_get_softc(dev);
  521 
  522         /*
  523          * Create disk after attach as VIRTIO_BLK_T_GET_ID can only be
  524          * processed after the device acknowledged
  525          * VIRTIO_CONFIG_STATUS_DRIVER_OK.
  526          */
  527         vtblk_create_disk(sc);
  528         return (0);
  529 }
  530 
  531 static int
  532 vtblk_config_change(device_t dev)
  533 {
  534         struct vtblk_softc *sc;
  535         struct virtio_blk_config blkcfg;
  536         uint64_t capacity;
  537 
  538         sc = device_get_softc(dev);
  539 
  540         vtblk_read_config(sc, &blkcfg);
  541 
  542         /* Capacity is always in 512-byte units. */
  543         capacity = blkcfg.capacity * VTBLK_BSIZE;
  544 
  545         if (sc->vtblk_disk->d_mediasize != capacity)
  546                 vtblk_resize_disk(sc, capacity);
  547 
  548         return (0);
  549 }
  550 
  551 static int
  552 vtblk_open(struct disk *dp)
  553 {
  554         struct vtblk_softc *sc;
  555 
  556         if ((sc = dp->d_drv1) == NULL)
  557                 return (ENXIO);
  558 
  559         return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
  560 }
  561 
  562 static int
  563 vtblk_close(struct disk *dp)
  564 {
  565         struct vtblk_softc *sc;
  566 
  567         if ((sc = dp->d_drv1) == NULL)
  568                 return (ENXIO);
  569 
  570         return (0);
  571 }
  572 
  573 static int
  574 vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag,
  575     struct thread *td)
  576 {
  577         struct vtblk_softc *sc;
  578 
  579         if ((sc = dp->d_drv1) == NULL)
  580                 return (ENXIO);
  581 
  582         return (ENOTTY);
  583 }
  584 
  585 static int
  586 vtblk_dump(void *arg, void *virtual, off_t offset, size_t length)
  587 {
  588         struct disk *dp;
  589         struct vtblk_softc *sc;
  590         int error;
  591 
  592         dp = arg;
  593         error = 0;
  594 
  595         if ((sc = dp->d_drv1) == NULL)
  596                 return (ENXIO);
  597 
  598         VTBLK_LOCK(sc);
  599 
  600         vtblk_dump_quiesce(sc);
  601 
  602         if (length > 0)
  603                 error = vtblk_dump_write(sc, virtual, offset, length);
  604         if (error || (virtual == NULL && offset == 0))
  605                 vtblk_dump_complete(sc);
  606 
  607         VTBLK_UNLOCK(sc);
  608 
  609         return (error);
  610 }
  611 
  612 static void
  613 vtblk_strategy(struct bio *bp)
  614 {
  615         struct vtblk_softc *sc;
  616 
  617         if ((sc = bp->bio_disk->d_drv1) == NULL) {
  618                 vtblk_bio_done(NULL, bp, EINVAL);
  619                 return;
  620         }
  621 
  622         if ((bp->bio_cmd != BIO_READ) && (bp->bio_cmd != BIO_WRITE) &&
  623             (bp->bio_cmd != BIO_FLUSH) && (bp->bio_cmd != BIO_DELETE)) {
  624                 vtblk_bio_done(sc, bp, EOPNOTSUPP);
  625                 return;
  626         }
  627 
  628         VTBLK_LOCK(sc);
  629 
  630         if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
  631                 VTBLK_UNLOCK(sc);
  632                 vtblk_bio_done(sc, bp, ENXIO);
  633                 return;
  634         }
  635 
  636         bioq_insert_tail(&sc->vtblk_bioq, bp);
  637         vtblk_startio(sc);
  638 
  639         VTBLK_UNLOCK(sc);
  640 }
  641 
  642 static int
  643 vtblk_negotiate_features(struct vtblk_softc *sc)
  644 {
  645         device_t dev;
  646         uint64_t features;
  647 
  648         dev = sc->vtblk_dev;
  649         features = virtio_bus_is_modern(dev) ? VTBLK_MODERN_FEATURES :
  650             VTBLK_LEGACY_FEATURES;
  651 
  652         sc->vtblk_features = virtio_negotiate_features(dev, features);
  653         return (virtio_finalize_features(dev));
  654 }
  655 
  656 static int
  657 vtblk_setup_features(struct vtblk_softc *sc)
  658 {
  659         device_t dev;
  660         int error;
  661 
  662         dev = sc->vtblk_dev;
  663 
  664         error = vtblk_negotiate_features(sc);
  665         if (error)
  666                 return (error);
  667 
  668         if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
  669                 sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
  670         if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
  671                 sc->vtblk_flags |= VTBLK_FLAG_WCE_CONFIG;
  672 
  673         /* Legacy. */
  674         if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER))
  675                 sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
  676 
  677         return (0);
  678 }
  679 
  680 static int
  681 vtblk_maximum_segments(struct vtblk_softc *sc,
  682     struct virtio_blk_config *blkcfg)
  683 {
  684         device_t dev;
  685         int nsegs;
  686 
  687         dev = sc->vtblk_dev;
  688         nsegs = VTBLK_MIN_SEGMENTS;
  689 
  690         if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
  691                 nsegs += MIN(blkcfg->seg_max, maxphys / PAGE_SIZE + 1);
  692                 if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
  693                         nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
  694         } else
  695                 nsegs += 1;
  696 
  697         return (nsegs);
  698 }
  699 
  700 static int
  701 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
  702 {
  703         device_t dev;
  704         struct vq_alloc_info vq_info;
  705 
  706         dev = sc->vtblk_dev;
  707 
  708         VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
  709             vtblk_vq_intr, sc, &sc->vtblk_vq,
  710             "%s request", device_get_nameunit(dev));
  711 
  712         return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
  713 }
  714 
  715 static void
  716 vtblk_resize_disk(struct vtblk_softc *sc, uint64_t new_capacity)
  717 {
  718         device_t dev;
  719         struct disk *dp;
  720         int error;
  721 
  722         dev = sc->vtblk_dev;
  723         dp = sc->vtblk_disk;
  724 
  725         dp->d_mediasize = new_capacity;
  726         if (bootverbose) {
  727                 device_printf(dev, "resized to %juMB (%ju %u byte sectors)\n",
  728                     (uintmax_t) dp->d_mediasize >> 20,
  729                     (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
  730                     dp->d_sectorsize);
  731         }
  732 
  733         error = disk_resize(dp, M_NOWAIT);
  734         if (error) {
  735                 device_printf(dev,
  736                     "disk_resize(9) failed, error: %d\n", error);
  737         }
  738 }
  739 
  740 static void
  741 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
  742 {
  743         device_t dev;
  744         struct disk *dp;
  745 
  746         dev = sc->vtblk_dev;
  747 
  748         sc->vtblk_disk = dp = disk_alloc();
  749         dp->d_open = vtblk_open;
  750         dp->d_close = vtblk_close;
  751         dp->d_ioctl = vtblk_ioctl;
  752         dp->d_strategy = vtblk_strategy;
  753         dp->d_name = VTBLK_DISK_NAME;
  754         dp->d_unit = device_get_unit(dev);
  755         dp->d_drv1 = sc;
  756         dp->d_flags = DISKFLAG_UNMAPPED_BIO | DISKFLAG_DIRECT_COMPLETION;
  757         dp->d_hba_vendor = virtio_get_vendor(dev);
  758         dp->d_hba_device = virtio_get_device(dev);
  759         dp->d_hba_subvendor = virtio_get_subvendor(dev);
  760         dp->d_hba_subdevice = virtio_get_subdevice(dev);
  761 
  762         if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
  763                 dp->d_flags |= DISKFLAG_WRITE_PROTECT;
  764         else {
  765                 if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH))
  766                         dp->d_flags |= DISKFLAG_CANFLUSHCACHE;
  767                 dp->d_dump = vtblk_dump;
  768         }
  769 
  770         /* Capacity is always in 512-byte units. */
  771         dp->d_mediasize = blkcfg->capacity * VTBLK_BSIZE;
  772 
  773         if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
  774                 dp->d_sectorsize = blkcfg->blk_size;
  775         else
  776                 dp->d_sectorsize = VTBLK_BSIZE;
  777 
  778         /*
  779          * The VirtIO maximum I/O size is given in terms of segments.
  780          * However, FreeBSD limits I/O size by logical buffer size, not
  781          * by physically contiguous pages. Therefore, we have to assume
  782          * no pages are contiguous. This may impose an artificially low
  783          * maximum I/O size. But in practice, since QEMU advertises 128
  784          * segments, this gives us a maximum IO size of 125 * PAGE_SIZE,
  785          * which is typically greater than maxphys. Eventually we should
  786          * just advertise maxphys and split buffers that are too big.
  787          *
  788          * If we're not asking busdma to align data to page boundaries, the
  789          * maximum I/O size is reduced by PAGE_SIZE in order to accommodate
  790          * unaligned I/Os.
  791          */
  792         dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS) *
  793             PAGE_SIZE;
  794         if ((sc->vtblk_flags & VTBLK_FLAG_BUSDMA_ALIGN) == 0)
  795                 dp->d_maxsize -= PAGE_SIZE;
  796 
  797         if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) {
  798                 dp->d_fwsectors = blkcfg->geometry.sectors;
  799                 dp->d_fwheads = blkcfg->geometry.heads;
  800         }
  801 
  802         if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY) &&
  803             blkcfg->topology.physical_block_exp > 0) {
  804                 dp->d_stripesize = dp->d_sectorsize *
  805                     (1 << blkcfg->topology.physical_block_exp);
  806                 dp->d_stripeoffset = (dp->d_stripesize -
  807                     blkcfg->topology.alignment_offset * dp->d_sectorsize) %
  808                     dp->d_stripesize;
  809         }
  810 
  811         if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD)) {
  812                 dp->d_flags |= DISKFLAG_CANDELETE;
  813                 dp->d_delmaxsize = blkcfg->max_discard_sectors * VTBLK_BSIZE;
  814         }
  815 
  816         if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
  817                 sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
  818         else
  819                 sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH;
  820 }
  821 
  822 static void
  823 vtblk_create_disk(struct vtblk_softc *sc)
  824 {
  825         struct disk *dp;
  826 
  827         dp = sc->vtblk_disk;
  828 
  829         vtblk_ident(sc);
  830 
  831         device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n",
  832             (uintmax_t) dp->d_mediasize >> 20,
  833             (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
  834             dp->d_sectorsize);
  835 
  836         disk_create(dp, DISK_VERSION);
  837 }
  838 
  839 static int
  840 vtblk_request_prealloc(struct vtblk_softc *sc)
  841 {
  842         struct vtblk_request *req;
  843         int i, nreqs;
  844 
  845         nreqs = virtqueue_size(sc->vtblk_vq);
  846 
  847         /*
  848          * Preallocate sufficient requests to keep the virtqueue full. Each
  849          * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
  850          * the number allocated when indirect descriptors are not available.
  851          */
  852         if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
  853                 nreqs /= VTBLK_MIN_SEGMENTS;
  854 
  855         for (i = 0; i < nreqs; i++) {
  856                 req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
  857                 if (req == NULL)
  858                         return (ENOMEM);
  859 
  860                 req->vbr_sc = sc;
  861                 if (bus_dmamap_create(sc->vtblk_dmat, 0, &req->vbr_mapp)) {
  862                         free(req, M_DEVBUF);
  863                         return (ENOMEM);
  864                 }
  865 
  866                 MPASS(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) == 1);
  867                 MPASS(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) == 1);
  868 
  869                 sc->vtblk_request_count++;
  870                 vtblk_request_enqueue(sc, req);
  871         }
  872 
  873         return (0);
  874 }
  875 
  876 static void
  877 vtblk_request_free(struct vtblk_softc *sc)
  878 {
  879         struct vtblk_request *req;
  880 
  881         MPASS(TAILQ_EMPTY(&sc->vtblk_req_ready));
  882 
  883         while ((req = vtblk_request_dequeue(sc)) != NULL) {
  884                 sc->vtblk_request_count--;
  885                 bus_dmamap_destroy(sc->vtblk_dmat, req->vbr_mapp);
  886                 free(req, M_DEVBUF);
  887         }
  888 
  889         KASSERT(sc->vtblk_request_count == 0,
  890             ("%s: leaked %d requests", __func__, sc->vtblk_request_count));
  891 }
  892 
  893 static struct vtblk_request *
  894 vtblk_request_dequeue(struct vtblk_softc *sc)
  895 {
  896         struct vtblk_request *req;
  897 
  898         req = TAILQ_FIRST(&sc->vtblk_req_free);
  899         if (req != NULL) {
  900                 TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
  901                 bzero(&req->vbr_hdr, sizeof(struct vtblk_request) -
  902                     offsetof(struct vtblk_request, vbr_hdr));
  903         }
  904 
  905         return (req);
  906 }
  907 
  908 static void
  909 vtblk_request_enqueue(struct vtblk_softc *sc, struct vtblk_request *req)
  910 {
  911 
  912         TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
  913 }
  914 
  915 static struct vtblk_request *
  916 vtblk_request_next_ready(struct vtblk_softc *sc)
  917 {
  918         struct vtblk_request *req;
  919 
  920         req = TAILQ_FIRST(&sc->vtblk_req_ready);
  921         if (req != NULL)
  922                 TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
  923 
  924         return (req);
  925 }
  926 
  927 static void
  928 vtblk_request_requeue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
  929 {
  930 
  931         /* NOTE: Currently, there will be at most one request in the queue. */
  932         TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
  933 }
  934 
  935 static struct vtblk_request *
  936 vtblk_request_next(struct vtblk_softc *sc)
  937 {
  938         struct vtblk_request *req;
  939 
  940         req = vtblk_request_next_ready(sc);
  941         if (req != NULL)
  942                 return (req);
  943 
  944         return (vtblk_request_bio(sc));
  945 }
  946 
  947 static struct vtblk_request *
  948 vtblk_request_bio(struct vtblk_softc *sc)
  949 {
  950         struct bio_queue_head *bioq;
  951         struct vtblk_request *req;
  952         struct bio *bp;
  953 
  954         bioq = &sc->vtblk_bioq;
  955 
  956         if (bioq_first(bioq) == NULL)
  957                 return (NULL);
  958 
  959         req = vtblk_request_dequeue(sc);
  960         if (req == NULL)
  961                 return (NULL);
  962 
  963         bp = bioq_takefirst(bioq);
  964         req->vbr_bp = bp;
  965         req->vbr_ack = -1;
  966         req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
  967 
  968         switch (bp->bio_cmd) {
  969         case BIO_FLUSH:
  970                 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
  971                 req->vbr_hdr.sector = 0;
  972                 break;
  973         case BIO_READ:
  974                 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_IN);
  975                 req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
  976                 break;
  977         case BIO_WRITE:
  978                 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
  979                 req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
  980                 break;
  981         case BIO_DELETE:
  982                 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_DISCARD);
  983                 req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
  984                 break;
  985         default:
  986                 panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
  987         }
  988 
  989         if (bp->bio_flags & BIO_ORDERED)
  990                 req->vbr_hdr.type |= vtblk_gtoh32(sc, VIRTIO_BLK_T_BARRIER);
  991 
  992         return (req);
  993 }
  994 
  995 static int
  996 vtblk_request_execute(struct vtblk_request *req, int flags)
  997 {
  998         struct vtblk_softc *sc = req->vbr_sc;
  999         struct bio *bp = req->vbr_bp;
 1000         int error = 0;
 1001 
 1002         /*
 1003          * Call via bus_dmamap_load_bio or directly depending on whether we
 1004          * have a buffer we need to map.  If we don't have a busdma map,
 1005          * try to perform the I/O directly and hope that it works (this will
 1006          * happen when dumping).
 1007          */
 1008         if ((req->vbr_mapp != NULL) &&
 1009             (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
 1010                 error = bus_dmamap_load_bio(sc->vtblk_dmat, req->vbr_mapp,
 1011                     req->vbr_bp, vtblk_request_execute_cb, req, flags);
 1012                 if (error == EINPROGRESS) {
 1013                         req->vbr_busdma_wait = 1;
 1014                         sc->vtblk_flags |= VTBLK_FLAG_BUSDMA_WAIT;
 1015                 }
 1016         } else {
 1017                 vtblk_request_execute_cb(req, NULL, 0, 0);
 1018         }
 1019 
 1020         return (error ? error : req->vbr_error);
 1021 }
 1022 
 1023 static void
 1024 vtblk_request_execute_cb(void * callback_arg, bus_dma_segment_t * segs,
 1025     int nseg, int error)
 1026 {
 1027         struct vtblk_request *req;
 1028         struct vtblk_softc *sc;
 1029         struct virtqueue *vq;
 1030         struct sglist *sg;
 1031         struct bio *bp;
 1032         int ordered, readable, writable, i;
 1033 
 1034         req = (struct vtblk_request *)callback_arg;
 1035         sc = req->vbr_sc;
 1036         vq = sc->vtblk_vq;
 1037         sg = sc->vtblk_sglist;
 1038         bp = req->vbr_bp;
 1039         ordered = 0;
 1040         writable = 0;
 1041 
 1042         /*
 1043          * If we paused request queueing while we waited for busdma to call us
 1044          * asynchronously, unpause it now; this request made it through so we
 1045          * don't need to worry about others getting ahead of us.  (Note that we
 1046          * hold the device mutex so nothing will happen until after we return
 1047          * anyway.)
 1048          */
 1049         if (req->vbr_busdma_wait)
 1050                 sc->vtblk_flags &= ~VTBLK_FLAG_BUSDMA_WAIT;
 1051 
 1052         /* Fail on errors from busdma. */
 1053         if (error)
 1054                 goto out1;
 1055 
 1056         /*
 1057          * Some hosts (such as bhyve) do not implement the barrier feature,
 1058          * so we emulate it in the driver by allowing the barrier request
 1059          * to be the only one in flight.
 1060          */
 1061         if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) {
 1062                 if (sc->vtblk_req_ordered != NULL) {
 1063                         error = EBUSY;
 1064                         goto out;
 1065                 }
 1066                 if (bp->bio_flags & BIO_ORDERED) {
 1067                         if (!virtqueue_empty(vq)) {
 1068                                 error = EBUSY;
 1069                                 goto out;
 1070                         }
 1071                         ordered = 1;
 1072                         req->vbr_hdr.type &= vtblk_gtoh32(sc,
 1073                                 ~VIRTIO_BLK_T_BARRIER);
 1074                 }
 1075         }
 1076 
 1077         sglist_reset(sg);
 1078         sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr));
 1079 
 1080         if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
 1081                 /*
 1082                  * We cast bus_addr_t to vm_paddr_t here; since we skip the
 1083                  * iommu mapping (see vtblk_attach) this should be safe.
 1084                  */
 1085                 for (i = 0; i < nseg; i++) {
 1086                         error = sglist_append_phys(sg,
 1087                             (vm_paddr_t)segs[i].ds_addr, segs[i].ds_len);
 1088                         if (error || sg->sg_nseg == sg->sg_maxseg) {
 1089                                 panic("%s: bio %p data buffer too big %d",
 1090                                     __func__, bp, error);
 1091                         }
 1092                 }
 1093 
 1094                 /* Special handling for dump, which bypasses busdma. */
 1095                 if (req->vbr_mapp == NULL) {
 1096                         error = sglist_append_bio(sg, bp);
 1097                         if (error || sg->sg_nseg == sg->sg_maxseg) {
 1098                                 panic("%s: bio %p data buffer too big %d",
 1099                                     __func__, bp, error);
 1100                         }
 1101                 }
 1102 
 1103                 /* BIO_READ means the host writes into our buffer. */
 1104                 if (bp->bio_cmd == BIO_READ)
 1105                         writable = sg->sg_nseg - 1;
 1106         } else if (bp->bio_cmd == BIO_DELETE) {
 1107                 struct virtio_blk_discard_write_zeroes *discard;
 1108 
 1109                 discard = malloc(sizeof(*discard), M_DEVBUF, M_NOWAIT | M_ZERO);
 1110                 if (discard == NULL) {
 1111                         error = ENOMEM;
 1112                         goto out;
 1113                 }
 1114 
 1115                 bp->bio_driver1 = discard;
 1116                 discard->sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
 1117                 discard->num_sectors = vtblk_gtoh32(sc, bp->bio_bcount / VTBLK_BSIZE);
 1118                 error = sglist_append(sg, discard, sizeof(*discard));
 1119                 if (error || sg->sg_nseg == sg->sg_maxseg) {
 1120                         panic("%s: bio %p data buffer too big %d",
 1121                             __func__, bp, error);
 1122                 }
 1123         }
 1124 
 1125         writable++;
 1126         sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
 1127         readable = sg->sg_nseg - writable;
 1128 
 1129         if (req->vbr_mapp != NULL) {
 1130                 switch (bp->bio_cmd) {
 1131                 case BIO_READ:
 1132                         bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
 1133                             BUS_DMASYNC_PREREAD);
 1134                         break;
 1135                 case BIO_WRITE:
 1136                         bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
 1137                             BUS_DMASYNC_PREWRITE);
 1138                         break;
 1139                 }
 1140         }
 1141 
 1142         error = virtqueue_enqueue(vq, req, sg, readable, writable);
 1143         if (error == 0 && ordered)
 1144                 sc->vtblk_req_ordered = req;
 1145 
 1146         /*
 1147          * If we were called asynchronously, we need to notify the queue that
 1148          * we've added a new request, since the notification from startio was
 1149          * performed already.
 1150          */
 1151         if (error == 0 && req->vbr_busdma_wait)
 1152                 virtqueue_notify(vq);
 1153 
 1154 out:
 1155         if (error && (req->vbr_mapp != NULL))
 1156                 bus_dmamap_unload(sc->vtblk_dmat, req->vbr_mapp);
 1157 out1:
 1158         if (error && req->vbr_requeue_on_error)
 1159                 vtblk_request_requeue_ready(sc, req);
 1160         req->vbr_error = error;
 1161 }
 1162 
 1163 static int
 1164 vtblk_request_error(struct vtblk_request *req)
 1165 {
 1166         int error;
 1167 
 1168         switch (req->vbr_ack) {
 1169         case VIRTIO_BLK_S_OK:
 1170                 error = 0;
 1171                 break;
 1172         case VIRTIO_BLK_S_UNSUPP:
 1173                 error = ENOTSUP;
 1174                 break;
 1175         default:
 1176                 error = EIO;
 1177                 break;
 1178         }
 1179 
 1180         return (error);
 1181 }
 1182 
 1183 static void
 1184 vtblk_queue_completed(struct vtblk_softc *sc, struct bio_queue *queue)
 1185 {
 1186         struct vtblk_request *req;
 1187         struct bio *bp;
 1188 
 1189         while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
 1190                 if (sc->vtblk_req_ordered != NULL) {
 1191                         MPASS(sc->vtblk_req_ordered == req);
 1192                         sc->vtblk_req_ordered = NULL;
 1193                 }
 1194 
 1195                 bp = req->vbr_bp;
 1196                 if (req->vbr_mapp != NULL) {
 1197                         switch (bp->bio_cmd) {
 1198                         case BIO_READ:
 1199                                 bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
 1200                                     BUS_DMASYNC_POSTREAD);
 1201                                 bus_dmamap_unload(sc->vtblk_dmat,
 1202                                     req->vbr_mapp);
 1203                                 break;
 1204                         case BIO_WRITE:
 1205                                 bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
 1206                                     BUS_DMASYNC_POSTWRITE);
 1207                                 bus_dmamap_unload(sc->vtblk_dmat,
 1208                                     req->vbr_mapp);
 1209                                 break;
 1210                         }
 1211                 }
 1212                 bp->bio_error = vtblk_request_error(req);
 1213                 TAILQ_INSERT_TAIL(queue, bp, bio_queue);
 1214 
 1215                 vtblk_request_enqueue(sc, req);
 1216         }
 1217 }
 1218 
 1219 static void
 1220 vtblk_done_completed(struct vtblk_softc *sc, struct bio_queue *queue)
 1221 {
 1222         struct bio *bp, *tmp;
 1223 
 1224         TAILQ_FOREACH_SAFE(bp, queue, bio_queue, tmp) {
 1225                 if (bp->bio_error != 0)
 1226                         disk_err(bp, "hard error", -1, 1);
 1227                 vtblk_bio_done(sc, bp, bp->bio_error);
 1228         }
 1229 }
 1230 
 1231 static void
 1232 vtblk_drain_vq(struct vtblk_softc *sc)
 1233 {
 1234         struct virtqueue *vq;
 1235         struct vtblk_request *req;
 1236         int last;
 1237 
 1238         vq = sc->vtblk_vq;
 1239         last = 0;
 1240 
 1241         while ((req = virtqueue_drain(vq, &last)) != NULL) {
 1242                 vtblk_bio_done(sc, req->vbr_bp, ENXIO);
 1243                 vtblk_request_enqueue(sc, req);
 1244         }
 1245 
 1246         sc->vtblk_req_ordered = NULL;
 1247         KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
 1248 }
 1249 
 1250 static void
 1251 vtblk_drain(struct vtblk_softc *sc)
 1252 {
 1253         struct bio_queue_head *bioq;
 1254         struct vtblk_request *req;
 1255         struct bio *bp;
 1256 
 1257         bioq = &sc->vtblk_bioq;
 1258 
 1259         if (sc->vtblk_vq != NULL) {
 1260                 struct bio_queue queue;
 1261 
 1262                 TAILQ_INIT(&queue);
 1263                 vtblk_queue_completed(sc, &queue);
 1264                 vtblk_done_completed(sc, &queue);
 1265 
 1266                 vtblk_drain_vq(sc);
 1267         }
 1268 
 1269         while ((req = vtblk_request_next_ready(sc)) != NULL) {
 1270                 vtblk_bio_done(sc, req->vbr_bp, ENXIO);
 1271                 vtblk_request_enqueue(sc, req);
 1272         }
 1273 
 1274         while (bioq_first(bioq) != NULL) {
 1275                 bp = bioq_takefirst(bioq);
 1276                 vtblk_bio_done(sc, bp, ENXIO);
 1277         }
 1278 
 1279         vtblk_request_free(sc);
 1280 }
 1281 
 1282 static void
 1283 vtblk_startio(struct vtblk_softc *sc)
 1284 {
 1285         struct virtqueue *vq;
 1286         struct vtblk_request *req;
 1287         int enq;
 1288 
 1289         VTBLK_LOCK_ASSERT(sc);
 1290         vq = sc->vtblk_vq;
 1291         enq = 0;
 1292 
 1293         if (sc->vtblk_flags & (VTBLK_FLAG_SUSPEND | VTBLK_FLAG_BUSDMA_WAIT))
 1294                 return;
 1295 
 1296         while (!virtqueue_full(vq)) {
 1297                 req = vtblk_request_next(sc);
 1298                 if (req == NULL)
 1299                         break;
 1300 
 1301                 req->vbr_requeue_on_error = 1;
 1302                 if (vtblk_request_execute(req, BUS_DMA_WAITOK))
 1303                         break;
 1304 
 1305                 enq++;
 1306         }
 1307 
 1308         if (enq > 0)
 1309                 virtqueue_notify(vq);
 1310 }
 1311 
 1312 static void
 1313 vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, int error)
 1314 {
 1315 
 1316         /* Because of GEOM direct dispatch, we cannot hold any locks. */
 1317         if (sc != NULL)
 1318                 VTBLK_LOCK_ASSERT_NOTOWNED(sc);
 1319 
 1320         if (error) {
 1321                 bp->bio_resid = bp->bio_bcount;
 1322                 bp->bio_error = error;
 1323                 bp->bio_flags |= BIO_ERROR;
 1324         } else {
 1325                 kmsan_mark_bio(bp, KMSAN_STATE_INITED);
 1326         }
 1327 
 1328         if (bp->bio_driver1 != NULL) {
 1329                 free(bp->bio_driver1, M_DEVBUF);
 1330                 bp->bio_driver1 = NULL;
 1331         }
 1332 
 1333         biodone(bp);
 1334 }
 1335 
 1336 #define VTBLK_GET_CONFIG(_dev, _feature, _field, _cfg)                  \
 1337         if (virtio_with_feature(_dev, _feature)) {                      \
 1338                 virtio_read_device_config(_dev,                         \
 1339                     offsetof(struct virtio_blk_config, _field),         \
 1340                     &(_cfg)->_field, sizeof((_cfg)->_field));           \
 1341         }
 1342 
 1343 static void
 1344 vtblk_read_config(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
 1345 {
 1346         device_t dev;
 1347 
 1348         dev = sc->vtblk_dev;
 1349 
 1350         bzero(blkcfg, sizeof(struct virtio_blk_config));
 1351 
 1352         /* The capacity is always available. */
 1353         virtio_read_device_config(dev, offsetof(struct virtio_blk_config,
 1354             capacity), &blkcfg->capacity, sizeof(blkcfg->capacity));
 1355 
 1356         /* Read the configuration if the feature was negotiated. */
 1357         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SIZE_MAX, size_max, blkcfg);
 1358         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SEG_MAX, seg_max, blkcfg);
 1359         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
 1360             geometry.cylinders, blkcfg);
 1361         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
 1362             geometry.heads, blkcfg);
 1363         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
 1364             geometry.sectors, blkcfg);
 1365         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg);
 1366         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
 1367             topology.physical_block_exp, blkcfg);
 1368         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
 1369             topology.alignment_offset, blkcfg);
 1370         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
 1371             topology.min_io_size, blkcfg);
 1372         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
 1373             topology.opt_io_size, blkcfg);
 1374         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, wce, blkcfg);
 1375         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_sectors,
 1376             blkcfg);
 1377         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_seg, blkcfg);
 1378         VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, discard_sector_alignment,
 1379             blkcfg);
 1380 }
 1381 
 1382 #undef VTBLK_GET_CONFIG
 1383 
 1384 static void
 1385 vtblk_ident(struct vtblk_softc *sc)
 1386 {
 1387         struct bio buf;
 1388         struct disk *dp;
 1389         struct vtblk_request *req;
 1390         int len, error;
 1391 
 1392         dp = sc->vtblk_disk;
 1393         len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE);
 1394 
 1395         if (vtblk_tunable_int(sc, "no_ident", vtblk_no_ident) != 0)
 1396                 return;
 1397 
 1398         req = vtblk_request_dequeue(sc);
 1399         if (req == NULL)
 1400                 return;
 1401 
 1402         req->vbr_ack = -1;
 1403         req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_GET_ID);
 1404         req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
 1405         req->vbr_hdr.sector = 0;
 1406 
 1407         req->vbr_bp = &buf;
 1408         g_reset_bio(&buf);
 1409 
 1410         buf.bio_cmd = BIO_READ;
 1411         buf.bio_data = dp->d_ident;
 1412         buf.bio_bcount = len;
 1413 
 1414         VTBLK_LOCK(sc);
 1415         error = vtblk_poll_request(sc, req);
 1416         VTBLK_UNLOCK(sc);
 1417 
 1418         vtblk_request_enqueue(sc, req);
 1419 
 1420         if (error) {
 1421                 device_printf(sc->vtblk_dev,
 1422                     "error getting device identifier: %d\n", error);
 1423         }
 1424 }
 1425 
 1426 static int
 1427 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
 1428 {
 1429         struct virtqueue *vq;
 1430         int error;
 1431 
 1432         vq = sc->vtblk_vq;
 1433 
 1434         if (!virtqueue_empty(vq))
 1435                 return (EBUSY);
 1436 
 1437         error = vtblk_request_execute(req, BUS_DMA_NOWAIT);
 1438         if (error)
 1439                 return (error);
 1440 
 1441         virtqueue_notify(vq);
 1442         virtqueue_poll(vq, NULL);
 1443 
 1444         error = vtblk_request_error(req);
 1445         if (error && bootverbose) {
 1446                 device_printf(sc->vtblk_dev,
 1447                     "%s: IO error: %d\n", __func__, error);
 1448         }
 1449 
 1450         return (error);
 1451 }
 1452 
 1453 static int
 1454 vtblk_quiesce(struct vtblk_softc *sc)
 1455 {
 1456         int error;
 1457 
 1458         VTBLK_LOCK_ASSERT(sc);
 1459         error = 0;
 1460 
 1461         while (!virtqueue_empty(sc->vtblk_vq)) {
 1462                 if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
 1463                     VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
 1464                         error = EBUSY;
 1465                         break;
 1466                 }
 1467         }
 1468 
 1469         return (error);
 1470 }
 1471 
 1472 static void
 1473 vtblk_vq_intr(void *xsc)
 1474 {
 1475         struct vtblk_softc *sc;
 1476         struct virtqueue *vq;
 1477         struct bio_queue queue;
 1478 
 1479         sc = xsc;
 1480         vq = sc->vtblk_vq;
 1481         TAILQ_INIT(&queue);
 1482 
 1483         VTBLK_LOCK(sc);
 1484 
 1485 again:
 1486         if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
 1487                 goto out;
 1488 
 1489         vtblk_queue_completed(sc, &queue);
 1490         vtblk_startio(sc);
 1491 
 1492         if (virtqueue_enable_intr(vq) != 0) {
 1493                 virtqueue_disable_intr(vq);
 1494                 goto again;
 1495         }
 1496 
 1497         if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
 1498                 wakeup(&sc->vtblk_vq);
 1499 
 1500 out:
 1501         VTBLK_UNLOCK(sc);
 1502         vtblk_done_completed(sc, &queue);
 1503 }
 1504 
 1505 static void
 1506 vtblk_stop(struct vtblk_softc *sc)
 1507 {
 1508 
 1509         virtqueue_disable_intr(sc->vtblk_vq);
 1510         virtio_stop(sc->vtblk_dev);
 1511 }
 1512 
 1513 static void
 1514 vtblk_dump_quiesce(struct vtblk_softc *sc)
 1515 {
 1516 
 1517         /*
 1518          * Spin here until all the requests in-flight at the time of the
 1519          * dump are completed and queued. The queued requests will be
 1520          * biodone'd once the dump is finished.
 1521          */
 1522         while (!virtqueue_empty(sc->vtblk_vq))
 1523                 vtblk_queue_completed(sc, &sc->vtblk_dump_queue);
 1524 }
 1525 
 1526 static int
 1527 vtblk_dump_write(struct vtblk_softc *sc, void *virtual, off_t offset,
 1528     size_t length)
 1529 {
 1530         struct bio buf;
 1531         struct vtblk_request *req;
 1532 
 1533         req = &sc->vtblk_dump_request;
 1534         req->vbr_sc = sc;
 1535         req->vbr_ack = -1;
 1536         req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
 1537         req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
 1538         req->vbr_hdr.sector = vtblk_gtoh64(sc, offset / VTBLK_BSIZE);
 1539 
 1540         req->vbr_bp = &buf;
 1541         g_reset_bio(&buf);
 1542 
 1543         buf.bio_cmd = BIO_WRITE;
 1544         buf.bio_data = virtual;
 1545         buf.bio_bcount = length;
 1546 
 1547         return (vtblk_poll_request(sc, req));
 1548 }
 1549 
 1550 static int
 1551 vtblk_dump_flush(struct vtblk_softc *sc)
 1552 {
 1553         struct bio buf;
 1554         struct vtblk_request *req;
 1555 
 1556         req = &sc->vtblk_dump_request;
 1557         req->vbr_sc = sc;
 1558         req->vbr_ack = -1;
 1559         req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
 1560         req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
 1561         req->vbr_hdr.sector = 0;
 1562 
 1563         req->vbr_bp = &buf;
 1564         g_reset_bio(&buf);
 1565 
 1566         buf.bio_cmd = BIO_FLUSH;
 1567 
 1568         return (vtblk_poll_request(sc, req));
 1569 }
 1570 
 1571 static void
 1572 vtblk_dump_complete(struct vtblk_softc *sc)
 1573 {
 1574 
 1575         vtblk_dump_flush(sc);
 1576 
 1577         VTBLK_UNLOCK(sc);
 1578         vtblk_done_completed(sc, &sc->vtblk_dump_queue);
 1579         VTBLK_LOCK(sc);
 1580 }
 1581 
 1582 static void
 1583 vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
 1584 {
 1585 
 1586         /* Set either writeback (1) or writethrough (0) mode. */
 1587         virtio_write_dev_config_1(sc->vtblk_dev,
 1588             offsetof(struct virtio_blk_config, wce), wc);
 1589 }
 1590 
 1591 static int
 1592 vtblk_write_cache_enabled(struct vtblk_softc *sc,
 1593     struct virtio_blk_config *blkcfg)
 1594 {
 1595         int wc;
 1596 
 1597         if (sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) {
 1598                 wc = vtblk_tunable_int(sc, "writecache_mode",
 1599                     vtblk_writecache_mode);
 1600                 if (wc >= 0 && wc < VTBLK_CACHE_MAX)
 1601                         vtblk_set_write_cache(sc, wc);
 1602                 else
 1603                         wc = blkcfg->wce;
 1604         } else
 1605                 wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_FLUSH);
 1606 
 1607         return (wc);
 1608 }
 1609 
 1610 static int
 1611 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
 1612 {
 1613         struct vtblk_softc *sc;
 1614         int wc, error;
 1615 
 1616         sc = oidp->oid_arg1;
 1617         wc = sc->vtblk_write_cache;
 1618 
 1619         error = sysctl_handle_int(oidp, &wc, 0, req);
 1620         if (error || req->newptr == NULL)
 1621                 return (error);
 1622         if ((sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) == 0)
 1623                 return (EPERM);
 1624         if (wc < 0 || wc >= VTBLK_CACHE_MAX)
 1625                 return (EINVAL);
 1626 
 1627         VTBLK_LOCK(sc);
 1628         sc->vtblk_write_cache = wc;
 1629         vtblk_set_write_cache(sc, sc->vtblk_write_cache);
 1630         VTBLK_UNLOCK(sc);
 1631 
 1632         return (0);
 1633 }
 1634 
 1635 static void
 1636 vtblk_setup_sysctl(struct vtblk_softc *sc)
 1637 {
 1638         device_t dev;
 1639         struct sysctl_ctx_list *ctx;
 1640         struct sysctl_oid *tree;
 1641         struct sysctl_oid_list *child;
 1642 
 1643         dev = sc->vtblk_dev;
 1644         ctx = device_get_sysctl_ctx(dev);
 1645         tree = device_get_sysctl_tree(dev);
 1646         child = SYSCTL_CHILDREN(tree);
 1647 
 1648         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode",
 1649             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 1650             vtblk_write_cache_sysctl, "I",
 1651             "Write cache mode (writethrough (0) or writeback (1))");
 1652 }
 1653 
 1654 static int
 1655 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def)
 1656 {
 1657         char path[64];
 1658 
 1659         snprintf(path, sizeof(path),
 1660             "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob);
 1661         TUNABLE_INT_FETCH(path, &def);
 1662 
 1663         return (def);
 1664 }

Cache object: e8713247fc2dd8451afde4634b3ecf08


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.