1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /* Driver for VirtIO block devices. */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/bio.h>
38 #include <sys/malloc.h>
39 #include <sys/module.h>
40 #include <sys/msan.h>
41 #include <sys/sglist.h>
42 #include <sys/sysctl.h>
43 #include <sys/lock.h>
44 #include <sys/mutex.h>
45 #include <sys/queue.h>
46
47 #include <geom/geom.h>
48 #include <geom/geom_disk.h>
49
50 #include <machine/bus.h>
51 #include <machine/resource.h>
52 #include <sys/bus.h>
53 #include <sys/rman.h>
54
55 #include <dev/virtio/virtio.h>
56 #include <dev/virtio/virtqueue.h>
57 #include <dev/virtio/block/virtio_blk.h>
58
59 #include "virtio_if.h"
60
61 struct vtblk_request {
62 struct vtblk_softc *vbr_sc;
63 bus_dmamap_t vbr_mapp;
64
65 /* Fields after this point are zeroed for each request. */
66 struct virtio_blk_outhdr vbr_hdr;
67 struct bio *vbr_bp;
68 uint8_t vbr_ack;
69 uint8_t vbr_requeue_on_error;
70 uint8_t vbr_busdma_wait;
71 int vbr_error;
72 TAILQ_ENTRY(vtblk_request) vbr_link;
73 };
74
75 enum vtblk_cache_mode {
76 VTBLK_CACHE_WRITETHROUGH,
77 VTBLK_CACHE_WRITEBACK,
78 VTBLK_CACHE_MAX
79 };
80
81 struct vtblk_softc {
82 device_t vtblk_dev;
83 struct mtx vtblk_mtx;
84 uint64_t vtblk_features;
85 uint32_t vtblk_flags;
86 #define VTBLK_FLAG_INDIRECT 0x0001
87 #define VTBLK_FLAG_DETACH 0x0002
88 #define VTBLK_FLAG_SUSPEND 0x0004
89 #define VTBLK_FLAG_BARRIER 0x0008
90 #define VTBLK_FLAG_WCE_CONFIG 0x0010
91 #define VTBLK_FLAG_BUSDMA_WAIT 0x0020
92 #define VTBLK_FLAG_BUSDMA_ALIGN 0x0040
93
94 struct virtqueue *vtblk_vq;
95 struct sglist *vtblk_sglist;
96 bus_dma_tag_t vtblk_dmat;
97 struct disk *vtblk_disk;
98
99 struct bio_queue_head vtblk_bioq;
100 TAILQ_HEAD(, vtblk_request)
101 vtblk_req_free;
102 TAILQ_HEAD(, vtblk_request)
103 vtblk_req_ready;
104 struct vtblk_request *vtblk_req_ordered;
105
106 int vtblk_max_nsegs;
107 int vtblk_request_count;
108 enum vtblk_cache_mode vtblk_write_cache;
109
110 struct bio_queue vtblk_dump_queue;
111 struct vtblk_request vtblk_dump_request;
112 };
113
114 static struct virtio_feature_desc vtblk_feature_desc[] = {
115 { VIRTIO_BLK_F_BARRIER, "HostBarrier" },
116 { VIRTIO_BLK_F_SIZE_MAX, "MaxSegSize" },
117 { VIRTIO_BLK_F_SEG_MAX, "MaxNumSegs" },
118 { VIRTIO_BLK_F_GEOMETRY, "DiskGeometry" },
119 { VIRTIO_BLK_F_RO, "ReadOnly" },
120 { VIRTIO_BLK_F_BLK_SIZE, "BlockSize" },
121 { VIRTIO_BLK_F_SCSI, "SCSICmds" },
122 { VIRTIO_BLK_F_FLUSH, "FlushCmd" },
123 { VIRTIO_BLK_F_TOPOLOGY, "Topology" },
124 { VIRTIO_BLK_F_CONFIG_WCE, "ConfigWCE" },
125 { VIRTIO_BLK_F_MQ, "Multiqueue" },
126 { VIRTIO_BLK_F_DISCARD, "Discard" },
127 { VIRTIO_BLK_F_WRITE_ZEROES, "WriteZeros" },
128
129 { 0, NULL }
130 };
131
132 static int vtblk_modevent(module_t, int, void *);
133
134 static int vtblk_probe(device_t);
135 static int vtblk_attach(device_t);
136 static int vtblk_detach(device_t);
137 static int vtblk_suspend(device_t);
138 static int vtblk_resume(device_t);
139 static int vtblk_shutdown(device_t);
140 static int vtblk_attach_completed(device_t);
141 static int vtblk_config_change(device_t);
142
143 static int vtblk_open(struct disk *);
144 static int vtblk_close(struct disk *);
145 static int vtblk_ioctl(struct disk *, u_long, void *, int,
146 struct thread *);
147 static int vtblk_dump(void *, void *, off_t, size_t);
148 static void vtblk_strategy(struct bio *);
149
150 static int vtblk_negotiate_features(struct vtblk_softc *);
151 static int vtblk_setup_features(struct vtblk_softc *);
152 static int vtblk_maximum_segments(struct vtblk_softc *,
153 struct virtio_blk_config *);
154 static int vtblk_alloc_virtqueue(struct vtblk_softc *);
155 static void vtblk_resize_disk(struct vtblk_softc *, uint64_t);
156 static void vtblk_alloc_disk(struct vtblk_softc *,
157 struct virtio_blk_config *);
158 static void vtblk_create_disk(struct vtblk_softc *);
159
160 static int vtblk_request_prealloc(struct vtblk_softc *);
161 static void vtblk_request_free(struct vtblk_softc *);
162 static struct vtblk_request *
163 vtblk_request_dequeue(struct vtblk_softc *);
164 static void vtblk_request_enqueue(struct vtblk_softc *,
165 struct vtblk_request *);
166 static struct vtblk_request *
167 vtblk_request_next_ready(struct vtblk_softc *);
168 static void vtblk_request_requeue_ready(struct vtblk_softc *,
169 struct vtblk_request *);
170 static struct vtblk_request *
171 vtblk_request_next(struct vtblk_softc *);
172 static struct vtblk_request *
173 vtblk_request_bio(struct vtblk_softc *);
174 static int vtblk_request_execute(struct vtblk_request *, int);
175 static void vtblk_request_execute_cb(void *,
176 bus_dma_segment_t *, int, int);
177 static int vtblk_request_error(struct vtblk_request *);
178
179 static void vtblk_queue_completed(struct vtblk_softc *,
180 struct bio_queue *);
181 static void vtblk_done_completed(struct vtblk_softc *,
182 struct bio_queue *);
183 static void vtblk_drain_vq(struct vtblk_softc *);
184 static void vtblk_drain(struct vtblk_softc *);
185
186 static void vtblk_startio(struct vtblk_softc *);
187 static void vtblk_bio_done(struct vtblk_softc *, struct bio *, int);
188
189 static void vtblk_read_config(struct vtblk_softc *,
190 struct virtio_blk_config *);
191 static void vtblk_ident(struct vtblk_softc *);
192 static int vtblk_poll_request(struct vtblk_softc *,
193 struct vtblk_request *);
194 static int vtblk_quiesce(struct vtblk_softc *);
195 static void vtblk_vq_intr(void *);
196 static void vtblk_stop(struct vtblk_softc *);
197
198 static void vtblk_dump_quiesce(struct vtblk_softc *);
199 static int vtblk_dump_write(struct vtblk_softc *, void *, off_t, size_t);
200 static int vtblk_dump_flush(struct vtblk_softc *);
201 static void vtblk_dump_complete(struct vtblk_softc *);
202
203 static void vtblk_set_write_cache(struct vtblk_softc *, int);
204 static int vtblk_write_cache_enabled(struct vtblk_softc *sc,
205 struct virtio_blk_config *);
206 static int vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
207
208 static void vtblk_setup_sysctl(struct vtblk_softc *);
209 static int vtblk_tunable_int(struct vtblk_softc *, const char *, int);
210
211 #define vtblk_modern(_sc) (((_sc)->vtblk_features & VIRTIO_F_VERSION_1) != 0)
212 #define vtblk_htog16(_sc, _val) virtio_htog16(vtblk_modern(_sc), _val)
213 #define vtblk_htog32(_sc, _val) virtio_htog32(vtblk_modern(_sc), _val)
214 #define vtblk_htog64(_sc, _val) virtio_htog64(vtblk_modern(_sc), _val)
215 #define vtblk_gtoh16(_sc, _val) virtio_gtoh16(vtblk_modern(_sc), _val)
216 #define vtblk_gtoh32(_sc, _val) virtio_gtoh32(vtblk_modern(_sc), _val)
217 #define vtblk_gtoh64(_sc, _val) virtio_gtoh64(vtblk_modern(_sc), _val)
218
219 /* Tunables. */
220 static int vtblk_no_ident = 0;
221 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
222 static int vtblk_writecache_mode = -1;
223 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
224
225 #define VTBLK_COMMON_FEATURES \
226 (VIRTIO_BLK_F_SIZE_MAX | \
227 VIRTIO_BLK_F_SEG_MAX | \
228 VIRTIO_BLK_F_GEOMETRY | \
229 VIRTIO_BLK_F_RO | \
230 VIRTIO_BLK_F_BLK_SIZE | \
231 VIRTIO_BLK_F_FLUSH | \
232 VIRTIO_BLK_F_TOPOLOGY | \
233 VIRTIO_BLK_F_CONFIG_WCE | \
234 VIRTIO_BLK_F_DISCARD | \
235 VIRTIO_RING_F_INDIRECT_DESC)
236
237 #define VTBLK_MODERN_FEATURES (VTBLK_COMMON_FEATURES)
238 #define VTBLK_LEGACY_FEATURES (VIRTIO_BLK_F_BARRIER | VTBLK_COMMON_FEATURES)
239
240 #define VTBLK_MTX(_sc) &(_sc)->vtblk_mtx
241 #define VTBLK_LOCK_INIT(_sc, _name) \
242 mtx_init(VTBLK_MTX((_sc)), (_name), \
243 "VirtIO Block Lock", MTX_DEF)
244 #define VTBLK_LOCK(_sc) mtx_lock(VTBLK_MTX((_sc)))
245 #define VTBLK_UNLOCK(_sc) mtx_unlock(VTBLK_MTX((_sc)))
246 #define VTBLK_LOCK_DESTROY(_sc) mtx_destroy(VTBLK_MTX((_sc)))
247 #define VTBLK_LOCK_ASSERT(_sc) mtx_assert(VTBLK_MTX((_sc)), MA_OWNED)
248 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
249 mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
250
251 #define VTBLK_DISK_NAME "vtbd"
252 #define VTBLK_QUIESCE_TIMEOUT (30 * hz)
253 #define VTBLK_BSIZE 512
254
255 /*
256 * Each block request uses at least two segments - one for the header
257 * and one for the status.
258 */
259 #define VTBLK_MIN_SEGMENTS 2
260
261 static device_method_t vtblk_methods[] = {
262 /* Device methods. */
263 DEVMETHOD(device_probe, vtblk_probe),
264 DEVMETHOD(device_attach, vtblk_attach),
265 DEVMETHOD(device_detach, vtblk_detach),
266 DEVMETHOD(device_suspend, vtblk_suspend),
267 DEVMETHOD(device_resume, vtblk_resume),
268 DEVMETHOD(device_shutdown, vtblk_shutdown),
269
270 /* VirtIO methods. */
271 DEVMETHOD(virtio_attach_completed, vtblk_attach_completed),
272 DEVMETHOD(virtio_config_change, vtblk_config_change),
273
274 DEVMETHOD_END
275 };
276
277 static driver_t vtblk_driver = {
278 "vtblk",
279 vtblk_methods,
280 sizeof(struct vtblk_softc)
281 };
282
283 VIRTIO_DRIVER_MODULE(virtio_blk, vtblk_driver, vtblk_modevent, NULL);
284 MODULE_VERSION(virtio_blk, 1);
285 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
286
287 VIRTIO_SIMPLE_PNPINFO(virtio_blk, VIRTIO_ID_BLOCK, "VirtIO Block Adapter");
288
289 static int
290 vtblk_modevent(module_t mod, int type, void *unused)
291 {
292 int error;
293
294 error = 0;
295
296 switch (type) {
297 case MOD_LOAD:
298 case MOD_QUIESCE:
299 case MOD_UNLOAD:
300 case MOD_SHUTDOWN:
301 break;
302 default:
303 error = EOPNOTSUPP;
304 break;
305 }
306
307 return (error);
308 }
309
310 static int
311 vtblk_probe(device_t dev)
312 {
313 return (VIRTIO_SIMPLE_PROBE(dev, virtio_blk));
314 }
315
316 static int
317 vtblk_attach(device_t dev)
318 {
319 struct vtblk_softc *sc;
320 struct virtio_blk_config blkcfg;
321 int error;
322
323 sc = device_get_softc(dev);
324 sc->vtblk_dev = dev;
325 virtio_set_feature_desc(dev, vtblk_feature_desc);
326
327 VTBLK_LOCK_INIT(sc, device_get_nameunit(dev));
328 bioq_init(&sc->vtblk_bioq);
329 TAILQ_INIT(&sc->vtblk_dump_queue);
330 TAILQ_INIT(&sc->vtblk_req_free);
331 TAILQ_INIT(&sc->vtblk_req_ready);
332
333 vtblk_setup_sysctl(sc);
334
335 error = vtblk_setup_features(sc);
336 if (error) {
337 device_printf(dev, "cannot setup features\n");
338 goto fail;
339 }
340
341 vtblk_read_config(sc, &blkcfg);
342
343 /*
344 * With the current sglist(9) implementation, it is not easy
345 * for us to support a maximum segment size as adjacent
346 * segments are coalesced. For now, just make sure it's larger
347 * than the maximum supported transfer size.
348 */
349 if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
350 if (blkcfg.size_max < maxphys) {
351 error = ENOTSUP;
352 device_printf(dev, "host requires unsupported "
353 "maximum segment size feature\n");
354 goto fail;
355 }
356 }
357
358 sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
359 if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
360 error = EINVAL;
361 device_printf(dev, "fewer than minimum number of segments "
362 "allowed: %d\n", sc->vtblk_max_nsegs);
363 goto fail;
364 }
365
366 sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
367 if (sc->vtblk_sglist == NULL) {
368 error = ENOMEM;
369 device_printf(dev, "cannot allocate sglist\n");
370 goto fail;
371 }
372
373 /*
374 * If vtblk_max_nsegs == VTBLK_MIN_SEGMENTS + 1, the device only
375 * supports a single data segment; in that case we need busdma to
376 * align to a page boundary so we can send a *contiguous* page size
377 * request to the host.
378 */
379 if (sc->vtblk_max_nsegs == VTBLK_MIN_SEGMENTS + 1)
380 sc->vtblk_flags |= VTBLK_FLAG_BUSDMA_ALIGN;
381 error = bus_dma_tag_create(
382 bus_get_dma_tag(dev), /* parent */
383 (sc->vtblk_flags & VTBLK_FLAG_BUSDMA_ALIGN) ? PAGE_SIZE : 1,
384 0, /* boundary */
385 BUS_SPACE_MAXADDR, /* lowaddr */
386 BUS_SPACE_MAXADDR, /* highaddr */
387 NULL, NULL, /* filter, filterarg */
388 maxphys, /* max request size */
389 sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS, /* max # segments */
390 maxphys, /* maxsegsize */
391 0, /* flags */
392 busdma_lock_mutex, /* lockfunc */
393 &sc->vtblk_mtx, /* lockarg */
394 &sc->vtblk_dmat);
395 if (error) {
396 device_printf(dev, "cannot create bus dma tag\n");
397 goto fail;
398 }
399
400 #ifdef __powerpc__
401 /*
402 * Virtio uses physical addresses rather than bus addresses, so we
403 * need to ask busdma to skip the iommu physical->bus mapping. At
404 * present, this is only a thing on the powerpc architectures.
405 */
406 bus_dma_tag_set_iommu(sc->vtblk_dmat, NULL, NULL);
407 #endif
408
409 error = vtblk_alloc_virtqueue(sc);
410 if (error) {
411 device_printf(dev, "cannot allocate virtqueue\n");
412 goto fail;
413 }
414
415 error = vtblk_request_prealloc(sc);
416 if (error) {
417 device_printf(dev, "cannot preallocate requests\n");
418 goto fail;
419 }
420
421 vtblk_alloc_disk(sc, &blkcfg);
422
423 error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY);
424 if (error) {
425 device_printf(dev, "cannot setup virtqueue interrupt\n");
426 goto fail;
427 }
428
429 virtqueue_enable_intr(sc->vtblk_vq);
430
431 fail:
432 if (error)
433 vtblk_detach(dev);
434
435 return (error);
436 }
437
438 static int
439 vtblk_detach(device_t dev)
440 {
441 struct vtblk_softc *sc;
442
443 sc = device_get_softc(dev);
444
445 VTBLK_LOCK(sc);
446 sc->vtblk_flags |= VTBLK_FLAG_DETACH;
447 if (device_is_attached(dev))
448 vtblk_stop(sc);
449 VTBLK_UNLOCK(sc);
450
451 vtblk_drain(sc);
452
453 if (sc->vtblk_disk != NULL) {
454 disk_destroy(sc->vtblk_disk);
455 sc->vtblk_disk = NULL;
456 }
457
458 if (sc->vtblk_dmat != NULL) {
459 bus_dma_tag_destroy(sc->vtblk_dmat);
460 sc->vtblk_dmat = NULL;
461 }
462
463 if (sc->vtblk_sglist != NULL) {
464 sglist_free(sc->vtblk_sglist);
465 sc->vtblk_sglist = NULL;
466 }
467
468 VTBLK_LOCK_DESTROY(sc);
469
470 return (0);
471 }
472
473 static int
474 vtblk_suspend(device_t dev)
475 {
476 struct vtblk_softc *sc;
477 int error;
478
479 sc = device_get_softc(dev);
480
481 VTBLK_LOCK(sc);
482 sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
483 /* XXX BMV: virtio_stop(), etc needed here? */
484 error = vtblk_quiesce(sc);
485 if (error)
486 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
487 VTBLK_UNLOCK(sc);
488
489 return (error);
490 }
491
492 static int
493 vtblk_resume(device_t dev)
494 {
495 struct vtblk_softc *sc;
496
497 sc = device_get_softc(dev);
498
499 VTBLK_LOCK(sc);
500 /* XXX BMV: virtio_reinit(), etc needed here? */
501 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
502 vtblk_startio(sc);
503 VTBLK_UNLOCK(sc);
504
505 return (0);
506 }
507
508 static int
509 vtblk_shutdown(device_t dev)
510 {
511
512 return (0);
513 }
514
515 static int
516 vtblk_attach_completed(device_t dev)
517 {
518 struct vtblk_softc *sc;
519
520 sc = device_get_softc(dev);
521
522 /*
523 * Create disk after attach as VIRTIO_BLK_T_GET_ID can only be
524 * processed after the device acknowledged
525 * VIRTIO_CONFIG_STATUS_DRIVER_OK.
526 */
527 vtblk_create_disk(sc);
528 return (0);
529 }
530
531 static int
532 vtblk_config_change(device_t dev)
533 {
534 struct vtblk_softc *sc;
535 struct virtio_blk_config blkcfg;
536 uint64_t capacity;
537
538 sc = device_get_softc(dev);
539
540 vtblk_read_config(sc, &blkcfg);
541
542 /* Capacity is always in 512-byte units. */
543 capacity = blkcfg.capacity * VTBLK_BSIZE;
544
545 if (sc->vtblk_disk->d_mediasize != capacity)
546 vtblk_resize_disk(sc, capacity);
547
548 return (0);
549 }
550
551 static int
552 vtblk_open(struct disk *dp)
553 {
554 struct vtblk_softc *sc;
555
556 if ((sc = dp->d_drv1) == NULL)
557 return (ENXIO);
558
559 return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
560 }
561
562 static int
563 vtblk_close(struct disk *dp)
564 {
565 struct vtblk_softc *sc;
566
567 if ((sc = dp->d_drv1) == NULL)
568 return (ENXIO);
569
570 return (0);
571 }
572
573 static int
574 vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag,
575 struct thread *td)
576 {
577 struct vtblk_softc *sc;
578
579 if ((sc = dp->d_drv1) == NULL)
580 return (ENXIO);
581
582 return (ENOTTY);
583 }
584
585 static int
586 vtblk_dump(void *arg, void *virtual, off_t offset, size_t length)
587 {
588 struct disk *dp;
589 struct vtblk_softc *sc;
590 int error;
591
592 dp = arg;
593 error = 0;
594
595 if ((sc = dp->d_drv1) == NULL)
596 return (ENXIO);
597
598 VTBLK_LOCK(sc);
599
600 vtblk_dump_quiesce(sc);
601
602 if (length > 0)
603 error = vtblk_dump_write(sc, virtual, offset, length);
604 if (error || (virtual == NULL && offset == 0))
605 vtblk_dump_complete(sc);
606
607 VTBLK_UNLOCK(sc);
608
609 return (error);
610 }
611
612 static void
613 vtblk_strategy(struct bio *bp)
614 {
615 struct vtblk_softc *sc;
616
617 if ((sc = bp->bio_disk->d_drv1) == NULL) {
618 vtblk_bio_done(NULL, bp, EINVAL);
619 return;
620 }
621
622 if ((bp->bio_cmd != BIO_READ) && (bp->bio_cmd != BIO_WRITE) &&
623 (bp->bio_cmd != BIO_FLUSH) && (bp->bio_cmd != BIO_DELETE)) {
624 vtblk_bio_done(sc, bp, EOPNOTSUPP);
625 return;
626 }
627
628 VTBLK_LOCK(sc);
629
630 if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
631 VTBLK_UNLOCK(sc);
632 vtblk_bio_done(sc, bp, ENXIO);
633 return;
634 }
635
636 bioq_insert_tail(&sc->vtblk_bioq, bp);
637 vtblk_startio(sc);
638
639 VTBLK_UNLOCK(sc);
640 }
641
642 static int
643 vtblk_negotiate_features(struct vtblk_softc *sc)
644 {
645 device_t dev;
646 uint64_t features;
647
648 dev = sc->vtblk_dev;
649 features = virtio_bus_is_modern(dev) ? VTBLK_MODERN_FEATURES :
650 VTBLK_LEGACY_FEATURES;
651
652 sc->vtblk_features = virtio_negotiate_features(dev, features);
653 return (virtio_finalize_features(dev));
654 }
655
656 static int
657 vtblk_setup_features(struct vtblk_softc *sc)
658 {
659 device_t dev;
660 int error;
661
662 dev = sc->vtblk_dev;
663
664 error = vtblk_negotiate_features(sc);
665 if (error)
666 return (error);
667
668 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
669 sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
670 if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
671 sc->vtblk_flags |= VTBLK_FLAG_WCE_CONFIG;
672
673 /* Legacy. */
674 if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER))
675 sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
676
677 return (0);
678 }
679
680 static int
681 vtblk_maximum_segments(struct vtblk_softc *sc,
682 struct virtio_blk_config *blkcfg)
683 {
684 device_t dev;
685 int nsegs;
686
687 dev = sc->vtblk_dev;
688 nsegs = VTBLK_MIN_SEGMENTS;
689
690 if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
691 nsegs += MIN(blkcfg->seg_max, maxphys / PAGE_SIZE + 1);
692 if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
693 nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
694 } else
695 nsegs += 1;
696
697 return (nsegs);
698 }
699
700 static int
701 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
702 {
703 device_t dev;
704 struct vq_alloc_info vq_info;
705
706 dev = sc->vtblk_dev;
707
708 VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
709 vtblk_vq_intr, sc, &sc->vtblk_vq,
710 "%s request", device_get_nameunit(dev));
711
712 return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
713 }
714
715 static void
716 vtblk_resize_disk(struct vtblk_softc *sc, uint64_t new_capacity)
717 {
718 device_t dev;
719 struct disk *dp;
720 int error;
721
722 dev = sc->vtblk_dev;
723 dp = sc->vtblk_disk;
724
725 dp->d_mediasize = new_capacity;
726 if (bootverbose) {
727 device_printf(dev, "resized to %juMB (%ju %u byte sectors)\n",
728 (uintmax_t) dp->d_mediasize >> 20,
729 (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
730 dp->d_sectorsize);
731 }
732
733 error = disk_resize(dp, M_NOWAIT);
734 if (error) {
735 device_printf(dev,
736 "disk_resize(9) failed, error: %d\n", error);
737 }
738 }
739
740 static void
741 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
742 {
743 device_t dev;
744 struct disk *dp;
745
746 dev = sc->vtblk_dev;
747
748 sc->vtblk_disk = dp = disk_alloc();
749 dp->d_open = vtblk_open;
750 dp->d_close = vtblk_close;
751 dp->d_ioctl = vtblk_ioctl;
752 dp->d_strategy = vtblk_strategy;
753 dp->d_name = VTBLK_DISK_NAME;
754 dp->d_unit = device_get_unit(dev);
755 dp->d_drv1 = sc;
756 dp->d_flags = DISKFLAG_UNMAPPED_BIO | DISKFLAG_DIRECT_COMPLETION;
757 dp->d_hba_vendor = virtio_get_vendor(dev);
758 dp->d_hba_device = virtio_get_device(dev);
759 dp->d_hba_subvendor = virtio_get_subvendor(dev);
760 dp->d_hba_subdevice = virtio_get_subdevice(dev);
761
762 if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
763 dp->d_flags |= DISKFLAG_WRITE_PROTECT;
764 else {
765 if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH))
766 dp->d_flags |= DISKFLAG_CANFLUSHCACHE;
767 dp->d_dump = vtblk_dump;
768 }
769
770 /* Capacity is always in 512-byte units. */
771 dp->d_mediasize = blkcfg->capacity * VTBLK_BSIZE;
772
773 if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
774 dp->d_sectorsize = blkcfg->blk_size;
775 else
776 dp->d_sectorsize = VTBLK_BSIZE;
777
778 /*
779 * The VirtIO maximum I/O size is given in terms of segments.
780 * However, FreeBSD limits I/O size by logical buffer size, not
781 * by physically contiguous pages. Therefore, we have to assume
782 * no pages are contiguous. This may impose an artificially low
783 * maximum I/O size. But in practice, since QEMU advertises 128
784 * segments, this gives us a maximum IO size of 125 * PAGE_SIZE,
785 * which is typically greater than maxphys. Eventually we should
786 * just advertise maxphys and split buffers that are too big.
787 *
788 * If we're not asking busdma to align data to page boundaries, the
789 * maximum I/O size is reduced by PAGE_SIZE in order to accommodate
790 * unaligned I/Os.
791 */
792 dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS) *
793 PAGE_SIZE;
794 if ((sc->vtblk_flags & VTBLK_FLAG_BUSDMA_ALIGN) == 0)
795 dp->d_maxsize -= PAGE_SIZE;
796
797 if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) {
798 dp->d_fwsectors = blkcfg->geometry.sectors;
799 dp->d_fwheads = blkcfg->geometry.heads;
800 }
801
802 if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY) &&
803 blkcfg->topology.physical_block_exp > 0) {
804 dp->d_stripesize = dp->d_sectorsize *
805 (1 << blkcfg->topology.physical_block_exp);
806 dp->d_stripeoffset = (dp->d_stripesize -
807 blkcfg->topology.alignment_offset * dp->d_sectorsize) %
808 dp->d_stripesize;
809 }
810
811 if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD)) {
812 dp->d_flags |= DISKFLAG_CANDELETE;
813 dp->d_delmaxsize = blkcfg->max_discard_sectors * VTBLK_BSIZE;
814 }
815
816 if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
817 sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
818 else
819 sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH;
820 }
821
822 static void
823 vtblk_create_disk(struct vtblk_softc *sc)
824 {
825 struct disk *dp;
826
827 dp = sc->vtblk_disk;
828
829 vtblk_ident(sc);
830
831 device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n",
832 (uintmax_t) dp->d_mediasize >> 20,
833 (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
834 dp->d_sectorsize);
835
836 disk_create(dp, DISK_VERSION);
837 }
838
839 static int
840 vtblk_request_prealloc(struct vtblk_softc *sc)
841 {
842 struct vtblk_request *req;
843 int i, nreqs;
844
845 nreqs = virtqueue_size(sc->vtblk_vq);
846
847 /*
848 * Preallocate sufficient requests to keep the virtqueue full. Each
849 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
850 * the number allocated when indirect descriptors are not available.
851 */
852 if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
853 nreqs /= VTBLK_MIN_SEGMENTS;
854
855 for (i = 0; i < nreqs; i++) {
856 req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
857 if (req == NULL)
858 return (ENOMEM);
859
860 req->vbr_sc = sc;
861 if (bus_dmamap_create(sc->vtblk_dmat, 0, &req->vbr_mapp)) {
862 free(req, M_DEVBUF);
863 return (ENOMEM);
864 }
865
866 MPASS(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) == 1);
867 MPASS(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) == 1);
868
869 sc->vtblk_request_count++;
870 vtblk_request_enqueue(sc, req);
871 }
872
873 return (0);
874 }
875
876 static void
877 vtblk_request_free(struct vtblk_softc *sc)
878 {
879 struct vtblk_request *req;
880
881 MPASS(TAILQ_EMPTY(&sc->vtblk_req_ready));
882
883 while ((req = vtblk_request_dequeue(sc)) != NULL) {
884 sc->vtblk_request_count--;
885 bus_dmamap_destroy(sc->vtblk_dmat, req->vbr_mapp);
886 free(req, M_DEVBUF);
887 }
888
889 KASSERT(sc->vtblk_request_count == 0,
890 ("%s: leaked %d requests", __func__, sc->vtblk_request_count));
891 }
892
893 static struct vtblk_request *
894 vtblk_request_dequeue(struct vtblk_softc *sc)
895 {
896 struct vtblk_request *req;
897
898 req = TAILQ_FIRST(&sc->vtblk_req_free);
899 if (req != NULL) {
900 TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
901 bzero(&req->vbr_hdr, sizeof(struct vtblk_request) -
902 offsetof(struct vtblk_request, vbr_hdr));
903 }
904
905 return (req);
906 }
907
908 static void
909 vtblk_request_enqueue(struct vtblk_softc *sc, struct vtblk_request *req)
910 {
911
912 TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
913 }
914
915 static struct vtblk_request *
916 vtblk_request_next_ready(struct vtblk_softc *sc)
917 {
918 struct vtblk_request *req;
919
920 req = TAILQ_FIRST(&sc->vtblk_req_ready);
921 if (req != NULL)
922 TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
923
924 return (req);
925 }
926
927 static void
928 vtblk_request_requeue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
929 {
930
931 /* NOTE: Currently, there will be at most one request in the queue. */
932 TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
933 }
934
935 static struct vtblk_request *
936 vtblk_request_next(struct vtblk_softc *sc)
937 {
938 struct vtblk_request *req;
939
940 req = vtblk_request_next_ready(sc);
941 if (req != NULL)
942 return (req);
943
944 return (vtblk_request_bio(sc));
945 }
946
947 static struct vtblk_request *
948 vtblk_request_bio(struct vtblk_softc *sc)
949 {
950 struct bio_queue_head *bioq;
951 struct vtblk_request *req;
952 struct bio *bp;
953
954 bioq = &sc->vtblk_bioq;
955
956 if (bioq_first(bioq) == NULL)
957 return (NULL);
958
959 req = vtblk_request_dequeue(sc);
960 if (req == NULL)
961 return (NULL);
962
963 bp = bioq_takefirst(bioq);
964 req->vbr_bp = bp;
965 req->vbr_ack = -1;
966 req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
967
968 switch (bp->bio_cmd) {
969 case BIO_FLUSH:
970 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
971 req->vbr_hdr.sector = 0;
972 break;
973 case BIO_READ:
974 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_IN);
975 req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
976 break;
977 case BIO_WRITE:
978 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
979 req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
980 break;
981 case BIO_DELETE:
982 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_DISCARD);
983 req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
984 break;
985 default:
986 panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
987 }
988
989 if (bp->bio_flags & BIO_ORDERED)
990 req->vbr_hdr.type |= vtblk_gtoh32(sc, VIRTIO_BLK_T_BARRIER);
991
992 return (req);
993 }
994
995 static int
996 vtblk_request_execute(struct vtblk_request *req, int flags)
997 {
998 struct vtblk_softc *sc = req->vbr_sc;
999 struct bio *bp = req->vbr_bp;
1000 int error = 0;
1001
1002 /*
1003 * Call via bus_dmamap_load_bio or directly depending on whether we
1004 * have a buffer we need to map. If we don't have a busdma map,
1005 * try to perform the I/O directly and hope that it works (this will
1006 * happen when dumping).
1007 */
1008 if ((req->vbr_mapp != NULL) &&
1009 (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
1010 error = bus_dmamap_load_bio(sc->vtblk_dmat, req->vbr_mapp,
1011 req->vbr_bp, vtblk_request_execute_cb, req, flags);
1012 if (error == EINPROGRESS) {
1013 req->vbr_busdma_wait = 1;
1014 sc->vtblk_flags |= VTBLK_FLAG_BUSDMA_WAIT;
1015 }
1016 } else {
1017 vtblk_request_execute_cb(req, NULL, 0, 0);
1018 }
1019
1020 return (error ? error : req->vbr_error);
1021 }
1022
1023 static void
1024 vtblk_request_execute_cb(void * callback_arg, bus_dma_segment_t * segs,
1025 int nseg, int error)
1026 {
1027 struct vtblk_request *req;
1028 struct vtblk_softc *sc;
1029 struct virtqueue *vq;
1030 struct sglist *sg;
1031 struct bio *bp;
1032 int ordered, readable, writable, i;
1033
1034 req = (struct vtblk_request *)callback_arg;
1035 sc = req->vbr_sc;
1036 vq = sc->vtblk_vq;
1037 sg = sc->vtblk_sglist;
1038 bp = req->vbr_bp;
1039 ordered = 0;
1040 writable = 0;
1041
1042 /*
1043 * If we paused request queueing while we waited for busdma to call us
1044 * asynchronously, unpause it now; this request made it through so we
1045 * don't need to worry about others getting ahead of us. (Note that we
1046 * hold the device mutex so nothing will happen until after we return
1047 * anyway.)
1048 */
1049 if (req->vbr_busdma_wait)
1050 sc->vtblk_flags &= ~VTBLK_FLAG_BUSDMA_WAIT;
1051
1052 /* Fail on errors from busdma. */
1053 if (error)
1054 goto out1;
1055
1056 /*
1057 * Some hosts (such as bhyve) do not implement the barrier feature,
1058 * so we emulate it in the driver by allowing the barrier request
1059 * to be the only one in flight.
1060 */
1061 if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) {
1062 if (sc->vtblk_req_ordered != NULL) {
1063 error = EBUSY;
1064 goto out;
1065 }
1066 if (bp->bio_flags & BIO_ORDERED) {
1067 if (!virtqueue_empty(vq)) {
1068 error = EBUSY;
1069 goto out;
1070 }
1071 ordered = 1;
1072 req->vbr_hdr.type &= vtblk_gtoh32(sc,
1073 ~VIRTIO_BLK_T_BARRIER);
1074 }
1075 }
1076
1077 sglist_reset(sg);
1078 sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr));
1079
1080 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
1081 /*
1082 * We cast bus_addr_t to vm_paddr_t here; since we skip the
1083 * iommu mapping (see vtblk_attach) this should be safe.
1084 */
1085 for (i = 0; i < nseg; i++) {
1086 error = sglist_append_phys(sg,
1087 (vm_paddr_t)segs[i].ds_addr, segs[i].ds_len);
1088 if (error || sg->sg_nseg == sg->sg_maxseg) {
1089 panic("%s: bio %p data buffer too big %d",
1090 __func__, bp, error);
1091 }
1092 }
1093
1094 /* Special handling for dump, which bypasses busdma. */
1095 if (req->vbr_mapp == NULL) {
1096 error = sglist_append_bio(sg, bp);
1097 if (error || sg->sg_nseg == sg->sg_maxseg) {
1098 panic("%s: bio %p data buffer too big %d",
1099 __func__, bp, error);
1100 }
1101 }
1102
1103 /* BIO_READ means the host writes into our buffer. */
1104 if (bp->bio_cmd == BIO_READ)
1105 writable = sg->sg_nseg - 1;
1106 } else if (bp->bio_cmd == BIO_DELETE) {
1107 struct virtio_blk_discard_write_zeroes *discard;
1108
1109 discard = malloc(sizeof(*discard), M_DEVBUF, M_NOWAIT | M_ZERO);
1110 if (discard == NULL) {
1111 error = ENOMEM;
1112 goto out;
1113 }
1114
1115 bp->bio_driver1 = discard;
1116 discard->sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
1117 discard->num_sectors = vtblk_gtoh32(sc, bp->bio_bcount / VTBLK_BSIZE);
1118 error = sglist_append(sg, discard, sizeof(*discard));
1119 if (error || sg->sg_nseg == sg->sg_maxseg) {
1120 panic("%s: bio %p data buffer too big %d",
1121 __func__, bp, error);
1122 }
1123 }
1124
1125 writable++;
1126 sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
1127 readable = sg->sg_nseg - writable;
1128
1129 if (req->vbr_mapp != NULL) {
1130 switch (bp->bio_cmd) {
1131 case BIO_READ:
1132 bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1133 BUS_DMASYNC_PREREAD);
1134 break;
1135 case BIO_WRITE:
1136 bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1137 BUS_DMASYNC_PREWRITE);
1138 break;
1139 }
1140 }
1141
1142 error = virtqueue_enqueue(vq, req, sg, readable, writable);
1143 if (error == 0 && ordered)
1144 sc->vtblk_req_ordered = req;
1145
1146 /*
1147 * If we were called asynchronously, we need to notify the queue that
1148 * we've added a new request, since the notification from startio was
1149 * performed already.
1150 */
1151 if (error == 0 && req->vbr_busdma_wait)
1152 virtqueue_notify(vq);
1153
1154 out:
1155 if (error && (req->vbr_mapp != NULL))
1156 bus_dmamap_unload(sc->vtblk_dmat, req->vbr_mapp);
1157 out1:
1158 if (error && req->vbr_requeue_on_error)
1159 vtblk_request_requeue_ready(sc, req);
1160 req->vbr_error = error;
1161 }
1162
1163 static int
1164 vtblk_request_error(struct vtblk_request *req)
1165 {
1166 int error;
1167
1168 switch (req->vbr_ack) {
1169 case VIRTIO_BLK_S_OK:
1170 error = 0;
1171 break;
1172 case VIRTIO_BLK_S_UNSUPP:
1173 error = ENOTSUP;
1174 break;
1175 default:
1176 error = EIO;
1177 break;
1178 }
1179
1180 return (error);
1181 }
1182
1183 static void
1184 vtblk_queue_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1185 {
1186 struct vtblk_request *req;
1187 struct bio *bp;
1188
1189 while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
1190 if (sc->vtblk_req_ordered != NULL) {
1191 MPASS(sc->vtblk_req_ordered == req);
1192 sc->vtblk_req_ordered = NULL;
1193 }
1194
1195 bp = req->vbr_bp;
1196 if (req->vbr_mapp != NULL) {
1197 switch (bp->bio_cmd) {
1198 case BIO_READ:
1199 bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1200 BUS_DMASYNC_POSTREAD);
1201 bus_dmamap_unload(sc->vtblk_dmat,
1202 req->vbr_mapp);
1203 break;
1204 case BIO_WRITE:
1205 bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1206 BUS_DMASYNC_POSTWRITE);
1207 bus_dmamap_unload(sc->vtblk_dmat,
1208 req->vbr_mapp);
1209 break;
1210 }
1211 }
1212 bp->bio_error = vtblk_request_error(req);
1213 TAILQ_INSERT_TAIL(queue, bp, bio_queue);
1214
1215 vtblk_request_enqueue(sc, req);
1216 }
1217 }
1218
1219 static void
1220 vtblk_done_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1221 {
1222 struct bio *bp, *tmp;
1223
1224 TAILQ_FOREACH_SAFE(bp, queue, bio_queue, tmp) {
1225 if (bp->bio_error != 0)
1226 disk_err(bp, "hard error", -1, 1);
1227 vtblk_bio_done(sc, bp, bp->bio_error);
1228 }
1229 }
1230
1231 static void
1232 vtblk_drain_vq(struct vtblk_softc *sc)
1233 {
1234 struct virtqueue *vq;
1235 struct vtblk_request *req;
1236 int last;
1237
1238 vq = sc->vtblk_vq;
1239 last = 0;
1240
1241 while ((req = virtqueue_drain(vq, &last)) != NULL) {
1242 vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1243 vtblk_request_enqueue(sc, req);
1244 }
1245
1246 sc->vtblk_req_ordered = NULL;
1247 KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
1248 }
1249
1250 static void
1251 vtblk_drain(struct vtblk_softc *sc)
1252 {
1253 struct bio_queue_head *bioq;
1254 struct vtblk_request *req;
1255 struct bio *bp;
1256
1257 bioq = &sc->vtblk_bioq;
1258
1259 if (sc->vtblk_vq != NULL) {
1260 struct bio_queue queue;
1261
1262 TAILQ_INIT(&queue);
1263 vtblk_queue_completed(sc, &queue);
1264 vtblk_done_completed(sc, &queue);
1265
1266 vtblk_drain_vq(sc);
1267 }
1268
1269 while ((req = vtblk_request_next_ready(sc)) != NULL) {
1270 vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1271 vtblk_request_enqueue(sc, req);
1272 }
1273
1274 while (bioq_first(bioq) != NULL) {
1275 bp = bioq_takefirst(bioq);
1276 vtblk_bio_done(sc, bp, ENXIO);
1277 }
1278
1279 vtblk_request_free(sc);
1280 }
1281
1282 static void
1283 vtblk_startio(struct vtblk_softc *sc)
1284 {
1285 struct virtqueue *vq;
1286 struct vtblk_request *req;
1287 int enq;
1288
1289 VTBLK_LOCK_ASSERT(sc);
1290 vq = sc->vtblk_vq;
1291 enq = 0;
1292
1293 if (sc->vtblk_flags & (VTBLK_FLAG_SUSPEND | VTBLK_FLAG_BUSDMA_WAIT))
1294 return;
1295
1296 while (!virtqueue_full(vq)) {
1297 req = vtblk_request_next(sc);
1298 if (req == NULL)
1299 break;
1300
1301 req->vbr_requeue_on_error = 1;
1302 if (vtblk_request_execute(req, BUS_DMA_WAITOK))
1303 break;
1304
1305 enq++;
1306 }
1307
1308 if (enq > 0)
1309 virtqueue_notify(vq);
1310 }
1311
1312 static void
1313 vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, int error)
1314 {
1315
1316 /* Because of GEOM direct dispatch, we cannot hold any locks. */
1317 if (sc != NULL)
1318 VTBLK_LOCK_ASSERT_NOTOWNED(sc);
1319
1320 if (error) {
1321 bp->bio_resid = bp->bio_bcount;
1322 bp->bio_error = error;
1323 bp->bio_flags |= BIO_ERROR;
1324 } else {
1325 kmsan_mark_bio(bp, KMSAN_STATE_INITED);
1326 }
1327
1328 if (bp->bio_driver1 != NULL) {
1329 free(bp->bio_driver1, M_DEVBUF);
1330 bp->bio_driver1 = NULL;
1331 }
1332
1333 biodone(bp);
1334 }
1335
1336 #define VTBLK_GET_CONFIG(_dev, _feature, _field, _cfg) \
1337 if (virtio_with_feature(_dev, _feature)) { \
1338 virtio_read_device_config(_dev, \
1339 offsetof(struct virtio_blk_config, _field), \
1340 &(_cfg)->_field, sizeof((_cfg)->_field)); \
1341 }
1342
1343 static void
1344 vtblk_read_config(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
1345 {
1346 device_t dev;
1347
1348 dev = sc->vtblk_dev;
1349
1350 bzero(blkcfg, sizeof(struct virtio_blk_config));
1351
1352 /* The capacity is always available. */
1353 virtio_read_device_config(dev, offsetof(struct virtio_blk_config,
1354 capacity), &blkcfg->capacity, sizeof(blkcfg->capacity));
1355
1356 /* Read the configuration if the feature was negotiated. */
1357 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SIZE_MAX, size_max, blkcfg);
1358 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SEG_MAX, seg_max, blkcfg);
1359 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1360 geometry.cylinders, blkcfg);
1361 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1362 geometry.heads, blkcfg);
1363 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1364 geometry.sectors, blkcfg);
1365 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg);
1366 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1367 topology.physical_block_exp, blkcfg);
1368 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1369 topology.alignment_offset, blkcfg);
1370 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1371 topology.min_io_size, blkcfg);
1372 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1373 topology.opt_io_size, blkcfg);
1374 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, wce, blkcfg);
1375 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_sectors,
1376 blkcfg);
1377 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_seg, blkcfg);
1378 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, discard_sector_alignment,
1379 blkcfg);
1380 }
1381
1382 #undef VTBLK_GET_CONFIG
1383
1384 static void
1385 vtblk_ident(struct vtblk_softc *sc)
1386 {
1387 struct bio buf;
1388 struct disk *dp;
1389 struct vtblk_request *req;
1390 int len, error;
1391
1392 dp = sc->vtblk_disk;
1393 len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE);
1394
1395 if (vtblk_tunable_int(sc, "no_ident", vtblk_no_ident) != 0)
1396 return;
1397
1398 req = vtblk_request_dequeue(sc);
1399 if (req == NULL)
1400 return;
1401
1402 req->vbr_ack = -1;
1403 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_GET_ID);
1404 req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1405 req->vbr_hdr.sector = 0;
1406
1407 req->vbr_bp = &buf;
1408 g_reset_bio(&buf);
1409
1410 buf.bio_cmd = BIO_READ;
1411 buf.bio_data = dp->d_ident;
1412 buf.bio_bcount = len;
1413
1414 VTBLK_LOCK(sc);
1415 error = vtblk_poll_request(sc, req);
1416 VTBLK_UNLOCK(sc);
1417
1418 vtblk_request_enqueue(sc, req);
1419
1420 if (error) {
1421 device_printf(sc->vtblk_dev,
1422 "error getting device identifier: %d\n", error);
1423 }
1424 }
1425
1426 static int
1427 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
1428 {
1429 struct virtqueue *vq;
1430 int error;
1431
1432 vq = sc->vtblk_vq;
1433
1434 if (!virtqueue_empty(vq))
1435 return (EBUSY);
1436
1437 error = vtblk_request_execute(req, BUS_DMA_NOWAIT);
1438 if (error)
1439 return (error);
1440
1441 virtqueue_notify(vq);
1442 virtqueue_poll(vq, NULL);
1443
1444 error = vtblk_request_error(req);
1445 if (error && bootverbose) {
1446 device_printf(sc->vtblk_dev,
1447 "%s: IO error: %d\n", __func__, error);
1448 }
1449
1450 return (error);
1451 }
1452
1453 static int
1454 vtblk_quiesce(struct vtblk_softc *sc)
1455 {
1456 int error;
1457
1458 VTBLK_LOCK_ASSERT(sc);
1459 error = 0;
1460
1461 while (!virtqueue_empty(sc->vtblk_vq)) {
1462 if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
1463 VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
1464 error = EBUSY;
1465 break;
1466 }
1467 }
1468
1469 return (error);
1470 }
1471
1472 static void
1473 vtblk_vq_intr(void *xsc)
1474 {
1475 struct vtblk_softc *sc;
1476 struct virtqueue *vq;
1477 struct bio_queue queue;
1478
1479 sc = xsc;
1480 vq = sc->vtblk_vq;
1481 TAILQ_INIT(&queue);
1482
1483 VTBLK_LOCK(sc);
1484
1485 again:
1486 if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
1487 goto out;
1488
1489 vtblk_queue_completed(sc, &queue);
1490 vtblk_startio(sc);
1491
1492 if (virtqueue_enable_intr(vq) != 0) {
1493 virtqueue_disable_intr(vq);
1494 goto again;
1495 }
1496
1497 if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1498 wakeup(&sc->vtblk_vq);
1499
1500 out:
1501 VTBLK_UNLOCK(sc);
1502 vtblk_done_completed(sc, &queue);
1503 }
1504
1505 static void
1506 vtblk_stop(struct vtblk_softc *sc)
1507 {
1508
1509 virtqueue_disable_intr(sc->vtblk_vq);
1510 virtio_stop(sc->vtblk_dev);
1511 }
1512
1513 static void
1514 vtblk_dump_quiesce(struct vtblk_softc *sc)
1515 {
1516
1517 /*
1518 * Spin here until all the requests in-flight at the time of the
1519 * dump are completed and queued. The queued requests will be
1520 * biodone'd once the dump is finished.
1521 */
1522 while (!virtqueue_empty(sc->vtblk_vq))
1523 vtblk_queue_completed(sc, &sc->vtblk_dump_queue);
1524 }
1525
1526 static int
1527 vtblk_dump_write(struct vtblk_softc *sc, void *virtual, off_t offset,
1528 size_t length)
1529 {
1530 struct bio buf;
1531 struct vtblk_request *req;
1532
1533 req = &sc->vtblk_dump_request;
1534 req->vbr_sc = sc;
1535 req->vbr_ack = -1;
1536 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
1537 req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1538 req->vbr_hdr.sector = vtblk_gtoh64(sc, offset / VTBLK_BSIZE);
1539
1540 req->vbr_bp = &buf;
1541 g_reset_bio(&buf);
1542
1543 buf.bio_cmd = BIO_WRITE;
1544 buf.bio_data = virtual;
1545 buf.bio_bcount = length;
1546
1547 return (vtblk_poll_request(sc, req));
1548 }
1549
1550 static int
1551 vtblk_dump_flush(struct vtblk_softc *sc)
1552 {
1553 struct bio buf;
1554 struct vtblk_request *req;
1555
1556 req = &sc->vtblk_dump_request;
1557 req->vbr_sc = sc;
1558 req->vbr_ack = -1;
1559 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
1560 req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1561 req->vbr_hdr.sector = 0;
1562
1563 req->vbr_bp = &buf;
1564 g_reset_bio(&buf);
1565
1566 buf.bio_cmd = BIO_FLUSH;
1567
1568 return (vtblk_poll_request(sc, req));
1569 }
1570
1571 static void
1572 vtblk_dump_complete(struct vtblk_softc *sc)
1573 {
1574
1575 vtblk_dump_flush(sc);
1576
1577 VTBLK_UNLOCK(sc);
1578 vtblk_done_completed(sc, &sc->vtblk_dump_queue);
1579 VTBLK_LOCK(sc);
1580 }
1581
1582 static void
1583 vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
1584 {
1585
1586 /* Set either writeback (1) or writethrough (0) mode. */
1587 virtio_write_dev_config_1(sc->vtblk_dev,
1588 offsetof(struct virtio_blk_config, wce), wc);
1589 }
1590
1591 static int
1592 vtblk_write_cache_enabled(struct vtblk_softc *sc,
1593 struct virtio_blk_config *blkcfg)
1594 {
1595 int wc;
1596
1597 if (sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) {
1598 wc = vtblk_tunable_int(sc, "writecache_mode",
1599 vtblk_writecache_mode);
1600 if (wc >= 0 && wc < VTBLK_CACHE_MAX)
1601 vtblk_set_write_cache(sc, wc);
1602 else
1603 wc = blkcfg->wce;
1604 } else
1605 wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_FLUSH);
1606
1607 return (wc);
1608 }
1609
1610 static int
1611 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
1612 {
1613 struct vtblk_softc *sc;
1614 int wc, error;
1615
1616 sc = oidp->oid_arg1;
1617 wc = sc->vtblk_write_cache;
1618
1619 error = sysctl_handle_int(oidp, &wc, 0, req);
1620 if (error || req->newptr == NULL)
1621 return (error);
1622 if ((sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) == 0)
1623 return (EPERM);
1624 if (wc < 0 || wc >= VTBLK_CACHE_MAX)
1625 return (EINVAL);
1626
1627 VTBLK_LOCK(sc);
1628 sc->vtblk_write_cache = wc;
1629 vtblk_set_write_cache(sc, sc->vtblk_write_cache);
1630 VTBLK_UNLOCK(sc);
1631
1632 return (0);
1633 }
1634
1635 static void
1636 vtblk_setup_sysctl(struct vtblk_softc *sc)
1637 {
1638 device_t dev;
1639 struct sysctl_ctx_list *ctx;
1640 struct sysctl_oid *tree;
1641 struct sysctl_oid_list *child;
1642
1643 dev = sc->vtblk_dev;
1644 ctx = device_get_sysctl_ctx(dev);
1645 tree = device_get_sysctl_tree(dev);
1646 child = SYSCTL_CHILDREN(tree);
1647
1648 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode",
1649 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1650 vtblk_write_cache_sysctl, "I",
1651 "Write cache mode (writethrough (0) or writeback (1))");
1652 }
1653
1654 static int
1655 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def)
1656 {
1657 char path[64];
1658
1659 snprintf(path, sizeof(path),
1660 "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob);
1661 TUNABLE_INT_FETCH(path, &def);
1662
1663 return (def);
1664 }
Cache object: e8713247fc2dd8451afde4634b3ecf08
|