The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/cam/ctl/ctl_backend_block.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2003 Silicon Graphics International Corp.
    5  * Copyright (c) 2009-2011 Spectra Logic Corporation
    6  * Copyright (c) 2012,2021 The FreeBSD Foundation
    7  * Copyright (c) 2014-2021 Alexander Motin <mav@FreeBSD.org>
    8  * All rights reserved.
    9  *
   10  * Portions of this software were developed by Edward Tomasz Napierala
   11  * under sponsorship from the FreeBSD Foundation.
   12  *
   13  * Portions of this software were developed by Ka Ho Ng <khng@FreeBSD.org>
   14  * under sponsorship from the FreeBSD Foundation.
   15  *
   16  * Redistribution and use in source and binary forms, with or without
   17  * modification, are permitted provided that the following conditions
   18  * are met:
   19  * 1. Redistributions of source code must retain the above copyright
   20  *    notice, this list of conditions, and the following disclaimer,
   21  *    without modification.
   22  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
   23  *    substantially similar to the "NO WARRANTY" disclaimer below
   24  *    ("Disclaimer") and any redistribution must be conditioned upon
   25  *    including a substantially similar Disclaimer requirement for further
   26  *    binary redistribution.
   27  *
   28  * NO WARRANTY
   29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
   32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
   33  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
   37  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
   38  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   39  * POSSIBILITY OF SUCH DAMAGES.
   40  *
   41  * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
   42  */
   43 /*
   44  * CAM Target Layer driver backend for block devices.
   45  *
   46  * Author: Ken Merry <ken@FreeBSD.org>
   47  */
   48 #include <sys/cdefs.h>
   49 __FBSDID("$FreeBSD$");
   50 
   51 #include <sys/param.h>
   52 #include <sys/systm.h>
   53 #include <sys/kernel.h>
   54 #include <sys/types.h>
   55 #include <sys/kthread.h>
   56 #include <sys/bio.h>
   57 #include <sys/fcntl.h>
   58 #include <sys/limits.h>
   59 #include <sys/lock.h>
   60 #include <sys/mutex.h>
   61 #include <sys/condvar.h>
   62 #include <sys/malloc.h>
   63 #include <sys/conf.h>
   64 #include <sys/ioccom.h>
   65 #include <sys/queue.h>
   66 #include <sys/sbuf.h>
   67 #include <sys/endian.h>
   68 #include <sys/uio.h>
   69 #include <sys/buf.h>
   70 #include <sys/taskqueue.h>
   71 #include <sys/vnode.h>
   72 #include <sys/namei.h>
   73 #include <sys/mount.h>
   74 #include <sys/disk.h>
   75 #include <sys/fcntl.h>
   76 #include <sys/filedesc.h>
   77 #include <sys/filio.h>
   78 #include <sys/proc.h>
   79 #include <sys/pcpu.h>
   80 #include <sys/module.h>
   81 #include <sys/sdt.h>
   82 #include <sys/devicestat.h>
   83 #include <sys/sysctl.h>
   84 #include <sys/nv.h>
   85 #include <sys/dnv.h>
   86 #include <sys/sx.h>
   87 #include <sys/unistd.h>
   88 
   89 #include <geom/geom.h>
   90 
   91 #include <cam/cam.h>
   92 #include <cam/scsi/scsi_all.h>
   93 #include <cam/scsi/scsi_da.h>
   94 #include <cam/ctl/ctl_io.h>
   95 #include <cam/ctl/ctl.h>
   96 #include <cam/ctl/ctl_backend.h>
   97 #include <cam/ctl/ctl_ioctl.h>
   98 #include <cam/ctl/ctl_ha.h>
   99 #include <cam/ctl/ctl_scsi_all.h>
  100 #include <cam/ctl/ctl_private.h>
  101 #include <cam/ctl/ctl_error.h>
  102 
  103 /*
  104  * The idea here is to allocate enough S/G space to handle at least 1MB I/Os.
  105  * On systems with small maxphys it can be 8 128KB segments.  On large systems
  106  * it can be up to 8 1MB segments.  I/Os larger than that we'll split.
  107  */
  108 #define CTLBLK_MAX_SEGS         8
  109 #define CTLBLK_HALF_SEGS        (CTLBLK_MAX_SEGS / 2)
  110 #define CTLBLK_MIN_SEG          (128 * 1024)
  111 #define CTLBLK_MAX_SEG          MIN(1024 * 1024, MAX(CTLBLK_MIN_SEG, maxphys))
  112 #define CTLBLK_MAX_IO_SIZE      (CTLBLK_MAX_SEG * CTLBLK_MAX_SEGS)
  113 
  114 #ifdef CTLBLK_DEBUG
  115 #define DPRINTF(fmt, args...) \
  116     printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
  117 #else
  118 #define DPRINTF(fmt, args...) do {} while(0)
  119 #endif
  120 
  121 #define PRIV(io)        \
  122     ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
  123 #define ARGS(io)        \
  124     ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
  125 
  126 SDT_PROVIDER_DEFINE(cbb);
  127 
  128 typedef enum {
  129         CTL_BE_BLOCK_LUN_UNCONFIGURED   = 0x01,
  130         CTL_BE_BLOCK_LUN_WAITING        = 0x04,
  131 } ctl_be_block_lun_flags;
  132 
  133 typedef enum {
  134         CTL_BE_BLOCK_NONE,
  135         CTL_BE_BLOCK_DEV,
  136         CTL_BE_BLOCK_FILE
  137 } ctl_be_block_type;
  138 
  139 struct ctl_be_block_filedata {
  140         struct ucred *cred;
  141 };
  142 
  143 union ctl_be_block_bedata {
  144         struct ctl_be_block_filedata file;
  145 };
  146 
  147 struct ctl_be_block_io;
  148 struct ctl_be_block_lun;
  149 
  150 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
  151                                struct ctl_be_block_io *beio);
  152 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
  153                                   const char *attrname);
  154 
  155 /*
  156  * Backend LUN structure.  There is a 1:1 mapping between a block device
  157  * and a backend block LUN, and between a backend block LUN and a CTL LUN.
  158  */
  159 struct ctl_be_block_lun {
  160         struct ctl_be_lun cbe_lun;              /* Must be first element. */
  161         struct ctl_lun_create_params params;
  162         char *dev_path;
  163         ctl_be_block_type dev_type;
  164         struct vnode *vn;
  165         union ctl_be_block_bedata backend;
  166         cbb_dispatch_t dispatch;
  167         cbb_dispatch_t lun_flush;
  168         cbb_dispatch_t unmap;
  169         cbb_dispatch_t get_lba_status;
  170         cbb_getattr_t getattr;
  171         uint64_t size_blocks;
  172         uint64_t size_bytes;
  173         struct ctl_be_block_softc *softc;
  174         struct devstat *disk_stats;
  175         ctl_be_block_lun_flags flags;
  176         SLIST_ENTRY(ctl_be_block_lun) links;
  177         struct taskqueue *io_taskqueue;
  178         struct task io_task;
  179         int num_threads;
  180         STAILQ_HEAD(, ctl_io_hdr) input_queue;
  181         STAILQ_HEAD(, ctl_io_hdr) config_read_queue;
  182         STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
  183         STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
  184         struct mtx_padalign io_lock;
  185         struct mtx_padalign queue_lock;
  186 };
  187 
  188 /*
  189  * Overall softc structure for the block backend module.
  190  */
  191 struct ctl_be_block_softc {
  192         struct sx                        modify_lock;
  193         struct mtx                       lock;
  194         int                              num_luns;
  195         SLIST_HEAD(, ctl_be_block_lun)   lun_list;
  196         uma_zone_t                       beio_zone;
  197         uma_zone_t                       bufmin_zone;
  198         uma_zone_t                       bufmax_zone;
  199 };
  200 
  201 static struct ctl_be_block_softc backend_block_softc;
  202 
  203 /*
  204  * Per-I/O information.
  205  */
  206 struct ctl_be_block_io {
  207         union ctl_io                    *io;
  208         struct ctl_sg_entry             sg_segs[CTLBLK_MAX_SEGS];
  209         struct iovec                    xiovecs[CTLBLK_MAX_SEGS];
  210         int                             refcnt;
  211         int                             bio_cmd;
  212         int                             two_sglists;
  213         int                             num_segs;
  214         int                             num_bios_sent;
  215         int                             num_bios_done;
  216         int                             send_complete;
  217         int                             first_error;
  218         uint64_t                        first_error_offset;
  219         struct bintime                  ds_t0;
  220         devstat_tag_type                ds_tag_type;
  221         devstat_trans_flags             ds_trans_type;
  222         uint64_t                        io_len;
  223         uint64_t                        io_offset;
  224         int                             io_arg;
  225         struct ctl_be_block_softc       *softc;
  226         struct ctl_be_block_lun         *lun;
  227         void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
  228 };
  229 
  230 extern struct ctl_softc *control_softc;
  231 
  232 static int cbb_num_threads = 32;
  233 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  234             "CAM Target Layer Block Backend");
  235 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN,
  236            &cbb_num_threads, 0, "Number of threads per backing file");
  237 
  238 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
  239 static void ctl_free_beio(struct ctl_be_block_io *beio);
  240 static void ctl_complete_beio(struct ctl_be_block_io *beio);
  241 static int ctl_be_block_move_done(union ctl_io *io, bool samethr);
  242 static void ctl_be_block_biodone(struct bio *bio);
  243 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
  244                                     struct ctl_be_block_io *beio);
  245 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
  246                                        struct ctl_be_block_io *beio);
  247 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
  248                                   struct ctl_be_block_io *beio);
  249 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun,
  250                                          const char *attrname);
  251 static void ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun,
  252                                     struct ctl_be_block_io *beio);
  253 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
  254                                    struct ctl_be_block_io *beio);
  255 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
  256                                    struct ctl_be_block_io *beio);
  257 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
  258                                       struct ctl_be_block_io *beio);
  259 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun,
  260                                          const char *attrname);
  261 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
  262                                     union ctl_io *io);
  263 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
  264                                     union ctl_io *io);
  265 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
  266                                   union ctl_io *io);
  267 static void ctl_be_block_worker(void *context, int pending);
  268 static int ctl_be_block_submit(union ctl_io *io);
  269 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
  270                                    int flag, struct thread *td);
  271 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
  272                                   struct ctl_lun_req *req);
  273 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
  274                                  struct ctl_lun_req *req);
  275 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
  276 static int ctl_be_block_open(struct ctl_be_block_lun *be_lun,
  277                              struct ctl_lun_req *req);
  278 static int ctl_be_block_create(struct ctl_be_block_softc *softc,
  279                                struct ctl_lun_req *req);
  280 static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
  281                            struct ctl_lun_req *req);
  282 static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
  283                            struct ctl_lun_req *req);
  284 static void ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun);
  285 static int ctl_be_block_config_write(union ctl_io *io);
  286 static int ctl_be_block_config_read(union ctl_io *io);
  287 static int ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb);
  288 static uint64_t ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname);
  289 static int ctl_be_block_init(void);
  290 static int ctl_be_block_shutdown(void);
  291 
  292 static struct ctl_backend_driver ctl_be_block_driver = 
  293 {
  294         .name = "block",
  295         .flags = CTL_BE_FLAG_HAS_CONFIG,
  296         .init = ctl_be_block_init,
  297         .shutdown = ctl_be_block_shutdown,
  298         .data_submit = ctl_be_block_submit,
  299         .config_read = ctl_be_block_config_read,
  300         .config_write = ctl_be_block_config_write,
  301         .ioctl = ctl_be_block_ioctl,
  302         .lun_info = ctl_be_block_lun_info,
  303         .lun_attr = ctl_be_block_lun_attr
  304 };
  305 
  306 MALLOC_DEFINE(M_CTLBLK, "ctlblock", "Memory used for CTL block backend");
  307 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
  308 
  309 static void
  310 ctl_alloc_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg,
  311     size_t len)
  312 {
  313 
  314         if (len <= CTLBLK_MIN_SEG) {
  315                 sg->addr = uma_zalloc(softc->bufmin_zone, M_WAITOK);
  316         } else {
  317                 KASSERT(len <= CTLBLK_MAX_SEG,
  318                     ("Too large alloc %zu > %lu", len, CTLBLK_MAX_SEG));
  319                 sg->addr = uma_zalloc(softc->bufmax_zone, M_WAITOK);
  320         }
  321         sg->len = len;
  322 }
  323 
  324 static void
  325 ctl_free_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg)
  326 {
  327 
  328         if (sg->len <= CTLBLK_MIN_SEG) {
  329                 uma_zfree(softc->bufmin_zone, sg->addr);
  330         } else {
  331                 KASSERT(sg->len <= CTLBLK_MAX_SEG,
  332                     ("Too large free %zu > %lu", sg->len, CTLBLK_MAX_SEG));
  333                 uma_zfree(softc->bufmax_zone, sg->addr);
  334         }
  335 }
  336 
  337 static struct ctl_be_block_io *
  338 ctl_alloc_beio(struct ctl_be_block_softc *softc)
  339 {
  340         struct ctl_be_block_io *beio;
  341 
  342         beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO);
  343         beio->softc = softc;
  344         beio->refcnt = 1;
  345         return (beio);
  346 }
  347 
  348 static void
  349 ctl_real_free_beio(struct ctl_be_block_io *beio)
  350 {
  351         struct ctl_be_block_softc *softc = beio->softc;
  352         int i;
  353 
  354         for (i = 0; i < beio->num_segs; i++) {
  355                 ctl_free_seg(softc, &beio->sg_segs[i]);
  356 
  357                 /* For compare we had two equal S/G lists. */
  358                 if (beio->two_sglists) {
  359                         ctl_free_seg(softc,
  360                             &beio->sg_segs[i + CTLBLK_HALF_SEGS]);
  361                 }
  362         }
  363 
  364         uma_zfree(softc->beio_zone, beio);
  365 }
  366 
  367 static void
  368 ctl_refcnt_beio(void *arg, int diff)
  369 {
  370         struct ctl_be_block_io *beio = arg;
  371 
  372         if (atomic_fetchadd_int(&beio->refcnt, diff) + diff == 0)
  373                 ctl_real_free_beio(beio);
  374 }
  375 
  376 static void
  377 ctl_free_beio(struct ctl_be_block_io *beio)
  378 {
  379 
  380         ctl_refcnt_beio(beio, -1);
  381 }
  382 
  383 static void
  384 ctl_complete_beio(struct ctl_be_block_io *beio)
  385 {
  386         union ctl_io *io = beio->io;
  387 
  388         if (beio->beio_cont != NULL) {
  389                 beio->beio_cont(beio);
  390         } else {
  391                 ctl_free_beio(beio);
  392                 ctl_data_submit_done(io);
  393         }
  394 }
  395 
  396 static size_t
  397 cmp(uint8_t *a, uint8_t *b, size_t size)
  398 {
  399         size_t i;
  400 
  401         for (i = 0; i < size; i++) {
  402                 if (a[i] != b[i])
  403                         break;
  404         }
  405         return (i);
  406 }
  407 
  408 static void
  409 ctl_be_block_compare(union ctl_io *io)
  410 {
  411         struct ctl_be_block_io *beio;
  412         uint64_t off, res;
  413         int i;
  414         uint8_t info[8];
  415 
  416         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
  417         off = 0;
  418         for (i = 0; i < beio->num_segs; i++) {
  419                 res = cmp(beio->sg_segs[i].addr,
  420                     beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
  421                     beio->sg_segs[i].len);
  422                 off += res;
  423                 if (res < beio->sg_segs[i].len)
  424                         break;
  425         }
  426         if (i < beio->num_segs) {
  427                 scsi_u64to8b(off, info);
  428                 ctl_set_sense(&io->scsiio, /*current_error*/ 1,
  429                     /*sense_key*/ SSD_KEY_MISCOMPARE,
  430                     /*asc*/ 0x1D, /*ascq*/ 0x00,
  431                     /*type*/ SSD_ELEM_INFO,
  432                     /*size*/ sizeof(info), /*data*/ &info,
  433                     /*type*/ SSD_ELEM_NONE);
  434         } else
  435                 ctl_set_success(&io->scsiio);
  436 }
  437 
  438 static int
  439 ctl_be_block_move_done(union ctl_io *io, bool samethr)
  440 {
  441         struct ctl_be_block_io *beio;
  442         struct ctl_be_block_lun *be_lun;
  443         struct ctl_lba_len_flags *lbalen;
  444 
  445         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
  446 
  447         DPRINTF("entered\n");
  448         io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
  449 
  450         /*
  451          * We set status at this point for read and compare commands.
  452          */
  453         if ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0 &&
  454             (io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE) {
  455                 lbalen = ARGS(io);
  456                 if (lbalen->flags & CTL_LLF_READ) {
  457                         ctl_set_success(&io->scsiio);
  458                 } else if (lbalen->flags & CTL_LLF_COMPARE) {
  459                         /* We have two data blocks ready for comparison. */
  460                         ctl_be_block_compare(io);
  461                 }
  462         }
  463 
  464         /*
  465          * If this is a read, or a write with errors, it is done.
  466          */
  467         if ((beio->bio_cmd == BIO_READ)
  468          || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
  469          || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
  470                 ctl_complete_beio(beio);
  471                 return (0);
  472         }
  473 
  474         /*
  475          * At this point, we have a write and the DMA completed successfully.
  476          * If we were called synchronously in the original thread then just
  477          * dispatch, otherwise we now have to queue it to the task queue to
  478          * execute the backend I/O.  That is because we do blocking
  479          * memory allocations, and in the file backing case, blocking I/O.
  480          * This move done routine is generally called in the SIM's
  481          * interrupt context, and therefore we cannot block.
  482          */
  483         be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
  484         if (samethr) {
  485                 be_lun->dispatch(be_lun, beio);
  486         } else {
  487                 mtx_lock(&be_lun->queue_lock);
  488                 STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
  489                 mtx_unlock(&be_lun->queue_lock);
  490                 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
  491         }
  492         return (0);
  493 }
  494 
  495 static void
  496 ctl_be_block_biodone(struct bio *bio)
  497 {
  498         struct ctl_be_block_io *beio = bio->bio_caller1;
  499         struct ctl_be_block_lun *be_lun = beio->lun;
  500         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
  501         union ctl_io *io;
  502         int error;
  503 
  504         io = beio->io;
  505 
  506         DPRINTF("entered\n");
  507 
  508         error = bio->bio_error;
  509         mtx_lock(&be_lun->io_lock);
  510         if (error != 0 &&
  511             (beio->first_error == 0 ||
  512              bio->bio_offset < beio->first_error_offset)) {
  513                 beio->first_error = error;
  514                 beio->first_error_offset = bio->bio_offset;
  515         }
  516 
  517         beio->num_bios_done++;
  518 
  519         /*
  520          * XXX KDM will this cause WITNESS to complain?  Holding a lock
  521          * during the free might cause it to complain.
  522          */
  523         g_destroy_bio(bio);
  524 
  525         /*
  526          * If the send complete bit isn't set, or we aren't the last I/O to
  527          * complete, then we're done.
  528          */
  529         if ((beio->send_complete == 0)
  530          || (beio->num_bios_done < beio->num_bios_sent)) {
  531                 mtx_unlock(&be_lun->io_lock);
  532                 return;
  533         }
  534 
  535         /*
  536          * At this point, we've verified that we are the last I/O to
  537          * complete, so it's safe to drop the lock.
  538          */
  539         devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
  540             beio->ds_tag_type, beio->ds_trans_type,
  541             /*now*/ NULL, /*then*/&beio->ds_t0);
  542         mtx_unlock(&be_lun->io_lock);
  543 
  544         /*
  545          * If there are any errors from the backing device, we fail the
  546          * entire I/O with a medium error.
  547          */
  548         error = beio->first_error;
  549         if (error != 0) {
  550                 if (error == EOPNOTSUPP) {
  551                         ctl_set_invalid_opcode(&io->scsiio);
  552                 } else if (error == ENOSPC || error == EDQUOT) {
  553                         ctl_set_space_alloc_fail(&io->scsiio);
  554                 } else if (error == EROFS || error == EACCES) {
  555                         ctl_set_hw_write_protected(&io->scsiio);
  556                 } else if (beio->bio_cmd == BIO_FLUSH) {
  557                         /* XXX KDM is there is a better error here? */
  558                         ctl_set_internal_failure(&io->scsiio,
  559                                                  /*sks_valid*/ 1,
  560                                                  /*retry_count*/ 0xbad2);
  561                 } else {
  562                         ctl_set_medium_error(&io->scsiio,
  563                             beio->bio_cmd == BIO_READ);
  564                 }
  565                 ctl_complete_beio(beio);
  566                 return;
  567         }
  568 
  569         /*
  570          * If this is a write, a flush, a delete or verify, we're all done.
  571          * If this is a read, we can now send the data to the user.
  572          */
  573         if ((beio->bio_cmd == BIO_WRITE)
  574          || (beio->bio_cmd == BIO_FLUSH)
  575          || (beio->bio_cmd == BIO_DELETE)
  576          || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
  577                 ctl_set_success(&io->scsiio);
  578                 ctl_complete_beio(beio);
  579         } else {
  580                 if ((ARGS(io)->flags & CTL_LLF_READ) &&
  581                     beio->beio_cont == NULL) {
  582                         ctl_set_success(&io->scsiio);
  583                         if (cbe_lun->serseq >= CTL_LUN_SERSEQ_SOFT)
  584                                 ctl_serseq_done(io);
  585                 }
  586                 ctl_datamove(io);
  587         }
  588 }
  589 
  590 static void
  591 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
  592                         struct ctl_be_block_io *beio)
  593 {
  594         union ctl_io *io = beio->io;
  595         struct mount *mountpoint;
  596         int error;
  597 
  598         DPRINTF("entered\n");
  599 
  600         binuptime(&beio->ds_t0);
  601         devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
  602 
  603         (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
  604 
  605         vn_lock(be_lun->vn, vn_lktype_write(mountpoint, be_lun->vn) |
  606             LK_RETRY);
  607         error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT,
  608             curthread);
  609         VOP_UNLOCK(be_lun->vn);
  610 
  611         vn_finished_write(mountpoint);
  612 
  613         mtx_lock(&be_lun->io_lock);
  614         devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
  615             beio->ds_tag_type, beio->ds_trans_type,
  616             /*now*/ NULL, /*then*/&beio->ds_t0);
  617         mtx_unlock(&be_lun->io_lock);
  618 
  619         if (error == 0)
  620                 ctl_set_success(&io->scsiio);
  621         else {
  622                 /* XXX KDM is there is a better error here? */
  623                 ctl_set_internal_failure(&io->scsiio,
  624                                          /*sks_valid*/ 1,
  625                                          /*retry_count*/ 0xbad1);
  626         }
  627 
  628         ctl_complete_beio(beio);
  629 }
  630 
  631 SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t");
  632 SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t");
  633 SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t");
  634 SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t");
  635 
  636 static void
  637 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
  638                            struct ctl_be_block_io *beio)
  639 {
  640         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
  641         struct ctl_be_block_filedata *file_data;
  642         union ctl_io *io;
  643         struct uio xuio;
  644         struct iovec *xiovec;
  645         size_t s;
  646         int error, flags, i;
  647 
  648         DPRINTF("entered\n");
  649 
  650         file_data = &be_lun->backend.file;
  651         io = beio->io;
  652         flags = 0;
  653         if (ARGS(io)->flags & CTL_LLF_DPO)
  654                 flags |= IO_DIRECT;
  655         if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
  656                 flags |= IO_SYNC;
  657 
  658         bzero(&xuio, sizeof(xuio));
  659         if (beio->bio_cmd == BIO_READ) {
  660                 SDT_PROBE0(cbb, , read, file_start);
  661                 xuio.uio_rw = UIO_READ;
  662         } else {
  663                 SDT_PROBE0(cbb, , write, file_start);
  664                 xuio.uio_rw = UIO_WRITE;
  665         }
  666         xuio.uio_offset = beio->io_offset;
  667         xuio.uio_resid = beio->io_len;
  668         xuio.uio_segflg = UIO_SYSSPACE;
  669         xuio.uio_iov = beio->xiovecs;
  670         xuio.uio_iovcnt = beio->num_segs;
  671         xuio.uio_td = curthread;
  672 
  673         for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
  674                 xiovec->iov_base = beio->sg_segs[i].addr;
  675                 xiovec->iov_len = beio->sg_segs[i].len;
  676         }
  677 
  678         binuptime(&beio->ds_t0);
  679         devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
  680 
  681         if (beio->bio_cmd == BIO_READ) {
  682                 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
  683 
  684                 if (beio->beio_cont == NULL &&
  685                     cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT)
  686                         ctl_serseq_done(io);
  687                 /*
  688                  * UFS pays attention to IO_DIRECT for reads.  If the
  689                  * DIRECTIO option is configured into the kernel, it calls
  690                  * ffs_rawread().  But that only works for single-segment
  691                  * uios with user space addresses.  In our case, with a
  692                  * kernel uio, it still reads into the buffer cache, but it
  693                  * will just try to release the buffer from the cache later
  694                  * on in ffs_read().
  695                  *
  696                  * ZFS does not pay attention to IO_DIRECT for reads.
  697                  *
  698                  * UFS does not pay attention to IO_SYNC for reads.
  699                  *
  700                  * ZFS pays attention to IO_SYNC (which translates into the
  701                  * Solaris define FRSYNC for zfs_read()) for reads.  It
  702                  * attempts to sync the file before reading.
  703                  */
  704                 error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred);
  705 
  706                 VOP_UNLOCK(be_lun->vn);
  707                 SDT_PROBE0(cbb, , read, file_done);
  708                 if (error == 0 && xuio.uio_resid > 0) {
  709                         /*
  710                          * If we red less then requested (EOF), then
  711                          * we should clean the rest of the buffer.
  712                          */
  713                         s = beio->io_len - xuio.uio_resid;
  714                         for (i = 0; i < beio->num_segs; i++) {
  715                                 if (s >= beio->sg_segs[i].len) {
  716                                         s -= beio->sg_segs[i].len;
  717                                         continue;
  718                                 }
  719                                 bzero((uint8_t *)beio->sg_segs[i].addr + s,
  720                                     beio->sg_segs[i].len - s);
  721                                 s = 0;
  722                         }
  723                 }
  724         } else {
  725                 struct mount *mountpoint;
  726 
  727                 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
  728                 vn_lock(be_lun->vn, vn_lktype_write(mountpoint,
  729                     be_lun->vn) | LK_RETRY);
  730 
  731                 /*
  732                  * UFS pays attention to IO_DIRECT for writes.  The write
  733                  * is done asynchronously.  (Normally the write would just
  734                  * get put into cache.
  735                  *
  736                  * UFS pays attention to IO_SYNC for writes.  It will
  737                  * attempt to write the buffer out synchronously if that
  738                  * flag is set.
  739                  *
  740                  * ZFS does not pay attention to IO_DIRECT for writes.
  741                  *
  742                  * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
  743                  * for writes.  It will flush the transaction from the
  744                  * cache before returning.
  745                  */
  746                 error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred);
  747                 VOP_UNLOCK(be_lun->vn);
  748 
  749                 vn_finished_write(mountpoint);
  750                 SDT_PROBE0(cbb, , write, file_done);
  751         }
  752 
  753         mtx_lock(&be_lun->io_lock);
  754         devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
  755             beio->ds_tag_type, beio->ds_trans_type,
  756             /*now*/ NULL, /*then*/&beio->ds_t0);
  757         mtx_unlock(&be_lun->io_lock);
  758 
  759         /*
  760          * If we got an error, set the sense data to "MEDIUM ERROR" and
  761          * return the I/O to the user.
  762          */
  763         if (error != 0) {
  764                 if (error == ENOSPC || error == EDQUOT) {
  765                         ctl_set_space_alloc_fail(&io->scsiio);
  766                 } else if (error == EROFS || error == EACCES) {
  767                         ctl_set_hw_write_protected(&io->scsiio);
  768                 } else {
  769                         ctl_set_medium_error(&io->scsiio,
  770                             beio->bio_cmd == BIO_READ);
  771                 }
  772                 ctl_complete_beio(beio);
  773                 return;
  774         }
  775 
  776         /*
  777          * If this is a write or a verify, we're all done.
  778          * If this is a read, we can now send the data to the user.
  779          */
  780         if ((beio->bio_cmd == BIO_WRITE) ||
  781             (ARGS(io)->flags & CTL_LLF_VERIFY)) {
  782                 ctl_set_success(&io->scsiio);
  783                 ctl_complete_beio(beio);
  784         } else {
  785                 if ((ARGS(io)->flags & CTL_LLF_READ) &&
  786                     beio->beio_cont == NULL) {
  787                         ctl_set_success(&io->scsiio);
  788                         if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT)
  789                                 ctl_serseq_done(io);
  790                 }
  791                 ctl_datamove(io);
  792         }
  793 }
  794 
  795 static void
  796 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
  797                         struct ctl_be_block_io *beio)
  798 {
  799         union ctl_io *io = beio->io;
  800         struct ctl_lba_len_flags *lbalen = ARGS(io);
  801         struct scsi_get_lba_status_data *data;
  802         off_t roff, off;
  803         int error, status;
  804 
  805         DPRINTF("entered\n");
  806 
  807         off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
  808         vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
  809         error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off,
  810             0, curthread->td_ucred, curthread);
  811         if (error == 0 && off > roff)
  812                 status = 0;     /* mapped up to off */
  813         else {
  814                 error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off,
  815                     0, curthread->td_ucred, curthread);
  816                 if (error == 0 && off > roff)
  817                         status = 1;     /* deallocated up to off */
  818                 else {
  819                         status = 0;     /* unknown up to the end */
  820                         off = be_lun->size_bytes;
  821                 }
  822         }
  823         VOP_UNLOCK(be_lun->vn);
  824 
  825         data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
  826         scsi_u64to8b(lbalen->lba, data->descr[0].addr);
  827         scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
  828             lbalen->lba), data->descr[0].length);
  829         data->descr[0].status = status;
  830 
  831         ctl_complete_beio(beio);
  832 }
  833 
  834 static uint64_t
  835 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname)
  836 {
  837         struct vattr            vattr;
  838         struct statfs           statfs;
  839         uint64_t                val;
  840         int                     error;
  841 
  842         val = UINT64_MAX;
  843         if (be_lun->vn == NULL)
  844                 return (val);
  845         vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
  846         if (strcmp(attrname, "blocksused") == 0) {
  847                 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
  848                 if (error == 0)
  849                         val = vattr.va_bytes / be_lun->cbe_lun.blocksize;
  850         }
  851         if (strcmp(attrname, "blocksavail") == 0 &&
  852             !VN_IS_DOOMED(be_lun->vn)) {
  853                 error = VFS_STATFS(be_lun->vn->v_mount, &statfs);
  854                 if (error == 0)
  855                         val = statfs.f_bavail * statfs.f_bsize /
  856                             be_lun->cbe_lun.blocksize;
  857         }
  858         VOP_UNLOCK(be_lun->vn);
  859         return (val);
  860 }
  861 
  862 static void
  863 ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun,
  864                         struct ctl_be_block_io *beio)
  865 {
  866         struct ctl_be_block_filedata *file_data;
  867         union ctl_io *io;
  868         struct ctl_ptr_len_flags *ptrlen;
  869         struct scsi_unmap_desc *buf, *end;
  870         struct mount *mp;
  871         off_t off, len;
  872         int error;
  873 
  874         io = beio->io;
  875         file_data = &be_lun->backend.file;
  876         mp = NULL;
  877         error = 0;
  878 
  879         binuptime(&beio->ds_t0);
  880         devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
  881 
  882         (void)vn_start_write(be_lun->vn, &mp, V_WAIT);
  883         vn_lock(be_lun->vn, vn_lktype_write(mp, be_lun->vn) | LK_RETRY);
  884         if (beio->io_offset == -1) {
  885                 beio->io_len = 0;
  886                 ptrlen = (struct ctl_ptr_len_flags *)
  887                     &io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
  888                 buf = (struct scsi_unmap_desc *)ptrlen->ptr;
  889                 end = buf + ptrlen->len / sizeof(*buf);
  890                 for (; buf < end; buf++) {
  891                         off = (off_t)scsi_8btou64(buf->lba) *
  892                             be_lun->cbe_lun.blocksize;
  893                         len = (off_t)scsi_4btoul(buf->length) *
  894                             be_lun->cbe_lun.blocksize;
  895                         beio->io_len += len;
  896                         error = vn_deallocate(be_lun->vn, &off, &len,
  897                             0, IO_NOMACCHECK | IO_NODELOCKED, file_data->cred,
  898                             NOCRED);
  899                         if (error != 0)
  900                                 break;
  901                 }
  902         } else {
  903                 /* WRITE_SAME */
  904                 off = beio->io_offset;
  905                 len = beio->io_len;
  906                 error = vn_deallocate(be_lun->vn, &off, &len, 0,
  907                     IO_NOMACCHECK | IO_NODELOCKED, file_data->cred, NOCRED);
  908         }
  909         VOP_UNLOCK(be_lun->vn);
  910         vn_finished_write(mp);
  911 
  912         mtx_lock(&be_lun->io_lock);
  913         devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
  914             beio->ds_tag_type, beio->ds_trans_type,
  915             /*now*/ NULL, /*then*/&beio->ds_t0);
  916         mtx_unlock(&be_lun->io_lock);
  917 
  918         /*
  919          * If we got an error, set the sense data to "MEDIUM ERROR" and
  920          * return the I/O to the user.
  921          */
  922         switch (error) {
  923         case 0:
  924                 ctl_set_success(&io->scsiio);
  925                 break;
  926         case ENOSPC:
  927         case EDQUOT:
  928                 ctl_set_space_alloc_fail(&io->scsiio);
  929                 break;
  930         case EROFS:
  931         case EACCES:
  932                 ctl_set_hw_write_protected(&io->scsiio);
  933                 break;
  934         default:
  935                 ctl_set_medium_error(&io->scsiio, false);
  936         }
  937         ctl_complete_beio(beio);
  938 }
  939 
  940 static void
  941 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
  942                            struct ctl_be_block_io *beio)
  943 {
  944         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
  945         union ctl_io *io;
  946         struct cdevsw *csw;
  947         struct cdev *dev;
  948         struct uio xuio;
  949         struct iovec *xiovec;
  950         int error, flags, i, ref;
  951 
  952         DPRINTF("entered\n");
  953 
  954         io = beio->io;
  955         flags = 0;
  956         if (ARGS(io)->flags & CTL_LLF_DPO)
  957                 flags |= IO_DIRECT;
  958         if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
  959                 flags |= IO_SYNC;
  960 
  961         bzero(&xuio, sizeof(xuio));
  962         if (beio->bio_cmd == BIO_READ) {
  963                 SDT_PROBE0(cbb, , read, file_start);
  964                 xuio.uio_rw = UIO_READ;
  965         } else {
  966                 SDT_PROBE0(cbb, , write, file_start);
  967                 xuio.uio_rw = UIO_WRITE;
  968         }
  969         xuio.uio_offset = beio->io_offset;
  970         xuio.uio_resid = beio->io_len;
  971         xuio.uio_segflg = UIO_SYSSPACE;
  972         xuio.uio_iov = beio->xiovecs;
  973         xuio.uio_iovcnt = beio->num_segs;
  974         xuio.uio_td = curthread;
  975 
  976         for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
  977                 xiovec->iov_base = beio->sg_segs[i].addr;
  978                 xiovec->iov_len = beio->sg_segs[i].len;
  979         }
  980 
  981         binuptime(&beio->ds_t0);
  982         devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
  983 
  984         csw = devvn_refthread(be_lun->vn, &dev, &ref);
  985         if (csw) {
  986                 if (beio->bio_cmd == BIO_READ) {
  987                         if (beio->beio_cont == NULL &&
  988                             cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT)
  989                                 ctl_serseq_done(io);
  990                         error = csw->d_read(dev, &xuio, flags);
  991                 } else
  992                         error = csw->d_write(dev, &xuio, flags);
  993                 dev_relthread(dev, ref);
  994         } else
  995                 error = ENXIO;
  996 
  997         if (beio->bio_cmd == BIO_READ)
  998                 SDT_PROBE0(cbb, , read, file_done);
  999         else
 1000                 SDT_PROBE0(cbb, , write, file_done);
 1001 
 1002         mtx_lock(&be_lun->io_lock);
 1003         devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
 1004             beio->ds_tag_type, beio->ds_trans_type,
 1005             /*now*/ NULL, /*then*/&beio->ds_t0);
 1006         mtx_unlock(&be_lun->io_lock);
 1007 
 1008         /*
 1009          * If we got an error, set the sense data to "MEDIUM ERROR" and
 1010          * return the I/O to the user.
 1011          */
 1012         if (error != 0) {
 1013                 if (error == ENOSPC || error == EDQUOT) {
 1014                         ctl_set_space_alloc_fail(&io->scsiio);
 1015                 } else if (error == EROFS || error == EACCES) {
 1016                         ctl_set_hw_write_protected(&io->scsiio);
 1017                 } else {
 1018                         ctl_set_medium_error(&io->scsiio,
 1019                             beio->bio_cmd == BIO_READ);
 1020                 }
 1021                 ctl_complete_beio(beio);
 1022                 return;
 1023         }
 1024 
 1025         /*
 1026          * If this is a write or a verify, we're all done.
 1027          * If this is a read, we can now send the data to the user.
 1028          */
 1029         if ((beio->bio_cmd == BIO_WRITE) ||
 1030             (ARGS(io)->flags & CTL_LLF_VERIFY)) {
 1031                 ctl_set_success(&io->scsiio);
 1032                 ctl_complete_beio(beio);
 1033         } else {
 1034                 if ((ARGS(io)->flags & CTL_LLF_READ) &&
 1035                     beio->beio_cont == NULL) {
 1036                         ctl_set_success(&io->scsiio);
 1037                         if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT)
 1038                                 ctl_serseq_done(io);
 1039                 }
 1040                 ctl_datamove(io);
 1041         }
 1042 }
 1043 
 1044 static void
 1045 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
 1046                         struct ctl_be_block_io *beio)
 1047 {
 1048         union ctl_io *io = beio->io;
 1049         struct cdevsw *csw;
 1050         struct cdev *dev;
 1051         struct ctl_lba_len_flags *lbalen = ARGS(io);
 1052         struct scsi_get_lba_status_data *data;
 1053         off_t roff, off;
 1054         int error, ref, status;
 1055 
 1056         DPRINTF("entered\n");
 1057 
 1058         csw = devvn_refthread(be_lun->vn, &dev, &ref);
 1059         if (csw == NULL) {
 1060                 status = 0;     /* unknown up to the end */
 1061                 off = be_lun->size_bytes;
 1062                 goto done;
 1063         }
 1064         off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
 1065         error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD,
 1066             curthread);
 1067         if (error == 0 && off > roff)
 1068                 status = 0;     /* mapped up to off */
 1069         else {
 1070                 error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD,
 1071                     curthread);
 1072                 if (error == 0 && off > roff)
 1073                         status = 1;     /* deallocated up to off */
 1074                 else {
 1075                         status = 0;     /* unknown up to the end */
 1076                         off = be_lun->size_bytes;
 1077                 }
 1078         }
 1079         dev_relthread(dev, ref);
 1080 
 1081 done:
 1082         data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
 1083         scsi_u64to8b(lbalen->lba, data->descr[0].addr);
 1084         scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
 1085             lbalen->lba), data->descr[0].length);
 1086         data->descr[0].status = status;
 1087 
 1088         ctl_complete_beio(beio);
 1089 }
 1090 
 1091 static void
 1092 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
 1093                        struct ctl_be_block_io *beio)
 1094 {
 1095         struct bio *bio;
 1096         struct cdevsw *csw;
 1097         struct cdev *dev;
 1098         int ref;
 1099 
 1100         DPRINTF("entered\n");
 1101 
 1102         /* This can't fail, it's a blocking allocation. */
 1103         bio = g_alloc_bio();
 1104 
 1105         bio->bio_cmd        = BIO_FLUSH;
 1106         bio->bio_offset     = 0;
 1107         bio->bio_data       = 0;
 1108         bio->bio_done       = ctl_be_block_biodone;
 1109         bio->bio_caller1    = beio;
 1110         bio->bio_pblkno     = 0;
 1111 
 1112         /*
 1113          * We don't need to acquire the LUN lock here, because we are only
 1114          * sending one bio, and so there is no other context to synchronize
 1115          * with.
 1116          */
 1117         beio->num_bios_sent = 1;
 1118         beio->send_complete = 1;
 1119 
 1120         binuptime(&beio->ds_t0);
 1121         devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
 1122 
 1123         csw = devvn_refthread(be_lun->vn, &dev, &ref);
 1124         if (csw) {
 1125                 bio->bio_dev = dev;
 1126                 csw->d_strategy(bio);
 1127                 dev_relthread(dev, ref);
 1128         } else {
 1129                 bio->bio_error = ENXIO;
 1130                 ctl_be_block_biodone(bio);
 1131         }
 1132 }
 1133 
 1134 static void
 1135 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
 1136                        struct ctl_be_block_io *beio,
 1137                        uint64_t off, uint64_t len, int last)
 1138 {
 1139         struct bio *bio;
 1140         uint64_t maxlen;
 1141         struct cdevsw *csw;
 1142         struct cdev *dev;
 1143         int ref;
 1144 
 1145         csw = devvn_refthread(be_lun->vn, &dev, &ref);
 1146         maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize);
 1147         while (len > 0) {
 1148                 bio = g_alloc_bio();
 1149                 bio->bio_cmd        = BIO_DELETE;
 1150                 bio->bio_dev        = dev;
 1151                 bio->bio_offset     = off;
 1152                 bio->bio_length     = MIN(len, maxlen);
 1153                 bio->bio_data       = 0;
 1154                 bio->bio_done       = ctl_be_block_biodone;
 1155                 bio->bio_caller1    = beio;
 1156                 bio->bio_pblkno     = off / be_lun->cbe_lun.blocksize;
 1157 
 1158                 off += bio->bio_length;
 1159                 len -= bio->bio_length;
 1160 
 1161                 mtx_lock(&be_lun->io_lock);
 1162                 beio->num_bios_sent++;
 1163                 if (last && len == 0)
 1164                         beio->send_complete = 1;
 1165                 mtx_unlock(&be_lun->io_lock);
 1166 
 1167                 if (csw) {
 1168                         csw->d_strategy(bio);
 1169                 } else {
 1170                         bio->bio_error = ENXIO;
 1171                         ctl_be_block_biodone(bio);
 1172                 }
 1173         }
 1174         if (csw)
 1175                 dev_relthread(dev, ref);
 1176 }
 1177 
 1178 static void
 1179 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
 1180                        struct ctl_be_block_io *beio)
 1181 {
 1182         union ctl_io *io;
 1183         struct ctl_ptr_len_flags *ptrlen;
 1184         struct scsi_unmap_desc *buf, *end;
 1185         uint64_t len;
 1186 
 1187         io = beio->io;
 1188 
 1189         DPRINTF("entered\n");
 1190 
 1191         binuptime(&beio->ds_t0);
 1192         devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
 1193 
 1194         if (beio->io_offset == -1) {
 1195                 beio->io_len = 0;
 1196                 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
 1197                 buf = (struct scsi_unmap_desc *)ptrlen->ptr;
 1198                 end = buf + ptrlen->len / sizeof(*buf);
 1199                 for (; buf < end; buf++) {
 1200                         len = (uint64_t)scsi_4btoul(buf->length) *
 1201                             be_lun->cbe_lun.blocksize;
 1202                         beio->io_len += len;
 1203                         ctl_be_block_unmap_dev_range(be_lun, beio,
 1204                             scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize,
 1205                             len, (end - buf < 2) ? TRUE : FALSE);
 1206                 }
 1207         } else
 1208                 ctl_be_block_unmap_dev_range(be_lun, beio,
 1209                     beio->io_offset, beio->io_len, TRUE);
 1210 }
 1211 
 1212 static void
 1213 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
 1214                           struct ctl_be_block_io *beio)
 1215 {
 1216         TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
 1217         struct bio *bio;
 1218         struct cdevsw *csw;
 1219         struct cdev *dev;
 1220         off_t cur_offset;
 1221         int i, max_iosize, ref;
 1222 
 1223         DPRINTF("entered\n");
 1224         csw = devvn_refthread(be_lun->vn, &dev, &ref);
 1225 
 1226         /*
 1227          * We have to limit our I/O size to the maximum supported by the
 1228          * backend device.
 1229          */
 1230         if (csw) {
 1231                 max_iosize = dev->si_iosize_max;
 1232                 if (max_iosize <= 0)
 1233                         max_iosize = DFLTPHYS;
 1234         } else
 1235                 max_iosize = maxphys;
 1236 
 1237         cur_offset = beio->io_offset;
 1238         for (i = 0; i < beio->num_segs; i++) {
 1239                 size_t cur_size;
 1240                 uint8_t *cur_ptr;
 1241 
 1242                 cur_size = beio->sg_segs[i].len;
 1243                 cur_ptr = beio->sg_segs[i].addr;
 1244 
 1245                 while (cur_size > 0) {
 1246                         /* This can't fail, it's a blocking allocation. */
 1247                         bio = g_alloc_bio();
 1248 
 1249                         KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
 1250 
 1251                         bio->bio_cmd = beio->bio_cmd;
 1252                         bio->bio_dev = dev;
 1253                         bio->bio_caller1 = beio;
 1254                         bio->bio_length = min(cur_size, max_iosize);
 1255                         bio->bio_offset = cur_offset;
 1256                         bio->bio_data = cur_ptr;
 1257                         bio->bio_done = ctl_be_block_biodone;
 1258                         bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize;
 1259 
 1260                         cur_offset += bio->bio_length;
 1261                         cur_ptr += bio->bio_length;
 1262                         cur_size -= bio->bio_length;
 1263 
 1264                         TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
 1265                         beio->num_bios_sent++;
 1266                 }
 1267         }
 1268         beio->send_complete = 1;
 1269         binuptime(&beio->ds_t0);
 1270         devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
 1271 
 1272         /*
 1273          * Fire off all allocated requests!
 1274          */
 1275         while ((bio = TAILQ_FIRST(&queue)) != NULL) {
 1276                 TAILQ_REMOVE(&queue, bio, bio_queue);
 1277                 if (csw)
 1278                         csw->d_strategy(bio);
 1279                 else {
 1280                         bio->bio_error = ENXIO;
 1281                         ctl_be_block_biodone(bio);
 1282                 }
 1283         }
 1284         if (csw)
 1285                 dev_relthread(dev, ref);
 1286 }
 1287 
 1288 static uint64_t
 1289 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
 1290 {
 1291         struct diocgattr_arg    arg;
 1292         struct cdevsw *csw;
 1293         struct cdev *dev;
 1294         int error, ref;
 1295 
 1296         csw = devvn_refthread(be_lun->vn, &dev, &ref);
 1297         if (csw == NULL)
 1298                 return (UINT64_MAX);
 1299         strlcpy(arg.name, attrname, sizeof(arg.name));
 1300         arg.len = sizeof(arg.value.off);
 1301         if (csw->d_ioctl) {
 1302                 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
 1303                     curthread);
 1304         } else
 1305                 error = ENODEV;
 1306         dev_relthread(dev, ref);
 1307         if (error != 0)
 1308                 return (UINT64_MAX);
 1309         return (arg.value.off);
 1310 }
 1311 
 1312 static void
 1313 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
 1314                             union ctl_io *io)
 1315 {
 1316         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
 1317         struct ctl_be_block_io *beio;
 1318         struct ctl_lba_len_flags *lbalen;
 1319 
 1320         DPRINTF("entered\n");
 1321         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
 1322         lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
 1323 
 1324         beio->io_len = lbalen->len * cbe_lun->blocksize;
 1325         beio->io_offset = lbalen->lba * cbe_lun->blocksize;
 1326         beio->io_arg = (lbalen->flags & SSC_IMMED) != 0;
 1327         beio->bio_cmd = BIO_FLUSH;
 1328         beio->ds_trans_type = DEVSTAT_NO_DATA;
 1329         DPRINTF("SYNC\n");
 1330         be_lun->lun_flush(be_lun, beio);
 1331 }
 1332 
 1333 static void
 1334 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
 1335 {
 1336         union ctl_io *io;
 1337 
 1338         io = beio->io;
 1339         ctl_free_beio(beio);
 1340         if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
 1341             ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
 1342              (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
 1343                 ctl_config_write_done(io);
 1344                 return;
 1345         }
 1346 
 1347         ctl_be_block_config_write(io);
 1348 }
 1349 
 1350 static void
 1351 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
 1352                             union ctl_io *io)
 1353 {
 1354         struct ctl_be_block_softc *softc = be_lun->softc;
 1355         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
 1356         struct ctl_be_block_io *beio;
 1357         struct ctl_lba_len_flags *lbalen;
 1358         uint64_t len_left, lba;
 1359         uint32_t pb, pbo, adj;
 1360         int i, seglen;
 1361         uint8_t *buf, *end;
 1362 
 1363         DPRINTF("entered\n");
 1364 
 1365         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
 1366         lbalen = ARGS(io);
 1367 
 1368         if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) ||
 1369             (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) {
 1370                 ctl_free_beio(beio);
 1371                 ctl_set_invalid_field(&io->scsiio,
 1372                                       /*sks_valid*/ 1,
 1373                                       /*command*/ 1,
 1374                                       /*field*/ 1,
 1375                                       /*bit_valid*/ 0,
 1376                                       /*bit*/ 0);
 1377                 ctl_config_write_done(io);
 1378                 return;
 1379         }
 1380 
 1381         if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
 1382                 beio->io_offset = lbalen->lba * cbe_lun->blocksize;
 1383                 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
 1384                 beio->bio_cmd = BIO_DELETE;
 1385                 beio->ds_trans_type = DEVSTAT_FREE;
 1386 
 1387                 be_lun->unmap(be_lun, beio);
 1388                 return;
 1389         }
 1390 
 1391         beio->bio_cmd = BIO_WRITE;
 1392         beio->ds_trans_type = DEVSTAT_WRITE;
 1393 
 1394         DPRINTF("WRITE SAME at LBA %jx len %u\n",
 1395                (uintmax_t)lbalen->lba, lbalen->len);
 1396 
 1397         pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
 1398         if (be_lun->cbe_lun.pblockoff > 0)
 1399                 pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
 1400         else
 1401                 pbo = 0;
 1402         len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
 1403         for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
 1404                 /*
 1405                  * Setup the S/G entry for this chunk.
 1406                  */
 1407                 seglen = MIN(CTLBLK_MAX_SEG, len_left);
 1408                 if (pb > cbe_lun->blocksize) {
 1409                         adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
 1410                             seglen - pbo) % pb;
 1411                         if (seglen > adj)
 1412                                 seglen -= adj;
 1413                         else
 1414                                 seglen -= seglen % cbe_lun->blocksize;
 1415                 } else
 1416                         seglen -= seglen % cbe_lun->blocksize;
 1417                 ctl_alloc_seg(softc, &beio->sg_segs[i], seglen);
 1418 
 1419                 DPRINTF("segment %d addr %p len %zd\n", i,
 1420                         beio->sg_segs[i].addr, beio->sg_segs[i].len);
 1421 
 1422                 beio->num_segs++;
 1423                 len_left -= seglen;
 1424 
 1425                 buf = beio->sg_segs[i].addr;
 1426                 end = buf + seglen;
 1427                 for (; buf < end; buf += cbe_lun->blocksize) {
 1428                         if (lbalen->flags & SWS_NDOB) {
 1429                                 memset(buf, 0, cbe_lun->blocksize);
 1430                         } else {
 1431                                 memcpy(buf, io->scsiio.kern_data_ptr,
 1432                                     cbe_lun->blocksize);
 1433                         }
 1434                         if (lbalen->flags & SWS_LBDATA)
 1435                                 scsi_ulto4b(lbalen->lba + lba, buf);
 1436                         lba++;
 1437                 }
 1438         }
 1439 
 1440         beio->io_offset = lbalen->lba * cbe_lun->blocksize;
 1441         beio->io_len = lba * cbe_lun->blocksize;
 1442 
 1443         /* We can not do all in one run. Correct and schedule rerun. */
 1444         if (len_left > 0) {
 1445                 lbalen->lba += lba;
 1446                 lbalen->len -= lba;
 1447                 beio->beio_cont = ctl_be_block_cw_done_ws;
 1448         }
 1449 
 1450         be_lun->dispatch(be_lun, beio);
 1451 }
 1452 
 1453 static void
 1454 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
 1455                             union ctl_io *io)
 1456 {
 1457         struct ctl_be_block_io *beio;
 1458         struct ctl_ptr_len_flags *ptrlen;
 1459 
 1460         DPRINTF("entered\n");
 1461 
 1462         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
 1463         ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
 1464 
 1465         if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) {
 1466                 ctl_free_beio(beio);
 1467                 ctl_set_invalid_field(&io->scsiio,
 1468                                       /*sks_valid*/ 0,
 1469                                       /*command*/ 1,
 1470                                       /*field*/ 0,
 1471                                       /*bit_valid*/ 0,
 1472                                       /*bit*/ 0);
 1473                 ctl_config_write_done(io);
 1474                 return;
 1475         }
 1476 
 1477         beio->io_len = 0;
 1478         beio->io_offset = -1;
 1479         beio->bio_cmd = BIO_DELETE;
 1480         beio->ds_trans_type = DEVSTAT_FREE;
 1481         DPRINTF("UNMAP\n");
 1482         be_lun->unmap(be_lun, beio);
 1483 }
 1484 
 1485 static void
 1486 ctl_be_block_cr_done(struct ctl_be_block_io *beio)
 1487 {
 1488         union ctl_io *io;
 1489 
 1490         io = beio->io;
 1491         ctl_free_beio(beio);
 1492         ctl_config_read_done(io);
 1493 }
 1494 
 1495 static void
 1496 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
 1497                          union ctl_io *io)
 1498 {
 1499         struct ctl_be_block_io *beio;
 1500         struct ctl_be_block_softc *softc;
 1501 
 1502         DPRINTF("entered\n");
 1503 
 1504         softc = be_lun->softc;
 1505         beio = ctl_alloc_beio(softc);
 1506         beio->io = io;
 1507         beio->lun = be_lun;
 1508         beio->beio_cont = ctl_be_block_cr_done;
 1509         PRIV(io)->ptr = (void *)beio;
 1510 
 1511         switch (io->scsiio.cdb[0]) {
 1512         case SERVICE_ACTION_IN:         /* GET LBA STATUS */
 1513                 beio->bio_cmd = -1;
 1514                 beio->ds_trans_type = DEVSTAT_NO_DATA;
 1515                 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
 1516                 beio->io_len = 0;
 1517                 if (be_lun->get_lba_status)
 1518                         be_lun->get_lba_status(be_lun, beio);
 1519                 else
 1520                         ctl_be_block_cr_done(beio);
 1521                 break;
 1522         default:
 1523                 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
 1524                 break;
 1525         }
 1526 }
 1527 
 1528 static void
 1529 ctl_be_block_cw_done(struct ctl_be_block_io *beio)
 1530 {
 1531         union ctl_io *io;
 1532 
 1533         io = beio->io;
 1534         ctl_free_beio(beio);
 1535         ctl_config_write_done(io);
 1536 }
 1537 
 1538 static void
 1539 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
 1540                          union ctl_io *io)
 1541 {
 1542         struct ctl_be_block_io *beio;
 1543         struct ctl_be_block_softc *softc;
 1544 
 1545         DPRINTF("entered\n");
 1546 
 1547         softc = be_lun->softc;
 1548         beio = ctl_alloc_beio(softc);
 1549         beio->io = io;
 1550         beio->lun = be_lun;
 1551         beio->beio_cont = ctl_be_block_cw_done;
 1552         switch (io->scsiio.tag_type) {
 1553         case CTL_TAG_ORDERED:
 1554                 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
 1555                 break;
 1556         case CTL_TAG_HEAD_OF_QUEUE:
 1557                 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
 1558                 break;
 1559         case CTL_TAG_UNTAGGED:
 1560         case CTL_TAG_SIMPLE:
 1561         case CTL_TAG_ACA:
 1562         default:
 1563                 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
 1564                 break;
 1565         }
 1566         PRIV(io)->ptr = (void *)beio;
 1567 
 1568         switch (io->scsiio.cdb[0]) {
 1569         case SYNCHRONIZE_CACHE:
 1570         case SYNCHRONIZE_CACHE_16:
 1571                 ctl_be_block_cw_dispatch_sync(be_lun, io);
 1572                 break;
 1573         case WRITE_SAME_10:
 1574         case WRITE_SAME_16:
 1575                 ctl_be_block_cw_dispatch_ws(be_lun, io);
 1576                 break;
 1577         case UNMAP:
 1578                 ctl_be_block_cw_dispatch_unmap(be_lun, io);
 1579                 break;
 1580         default:
 1581                 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
 1582                 break;
 1583         }
 1584 }
 1585 
 1586 SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t");
 1587 SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t");
 1588 SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t");
 1589 SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t");
 1590 
 1591 static void
 1592 ctl_be_block_next(struct ctl_be_block_io *beio)
 1593 {
 1594         struct ctl_be_block_lun *be_lun;
 1595         union ctl_io *io;
 1596 
 1597         io = beio->io;
 1598         be_lun = beio->lun;
 1599         ctl_free_beio(beio);
 1600         if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
 1601             ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
 1602              (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
 1603                 ctl_data_submit_done(io);
 1604                 return;
 1605         }
 1606 
 1607         io->io_hdr.status &= ~CTL_STATUS_MASK;
 1608         io->io_hdr.status |= CTL_STATUS_NONE;
 1609 
 1610         mtx_lock(&be_lun->queue_lock);
 1611         STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
 1612         mtx_unlock(&be_lun->queue_lock);
 1613         taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
 1614 }
 1615 
 1616 static void
 1617 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
 1618                            union ctl_io *io)
 1619 {
 1620         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
 1621         struct ctl_be_block_io *beio;
 1622         struct ctl_be_block_softc *softc;
 1623         struct ctl_lba_len_flags *lbalen;
 1624         struct ctl_ptr_len_flags *bptrlen;
 1625         uint64_t len_left, lbas;
 1626         int i;
 1627 
 1628         softc = be_lun->softc;
 1629 
 1630         DPRINTF("entered\n");
 1631 
 1632         lbalen = ARGS(io);
 1633         if (lbalen->flags & CTL_LLF_WRITE) {
 1634                 SDT_PROBE0(cbb, , write, start);
 1635         } else {
 1636                 SDT_PROBE0(cbb, , read, start);
 1637         }
 1638 
 1639         beio = ctl_alloc_beio(softc);
 1640         beio->io = io;
 1641         beio->lun = be_lun;
 1642         bptrlen = PRIV(io);
 1643         bptrlen->ptr = (void *)beio;
 1644 
 1645         switch (io->scsiio.tag_type) {
 1646         case CTL_TAG_ORDERED:
 1647                 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
 1648                 break;
 1649         case CTL_TAG_HEAD_OF_QUEUE:
 1650                 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
 1651                 break;
 1652         case CTL_TAG_UNTAGGED:
 1653         case CTL_TAG_SIMPLE:
 1654         case CTL_TAG_ACA:
 1655         default:
 1656                 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
 1657                 break;
 1658         }
 1659 
 1660         if (lbalen->flags & CTL_LLF_WRITE) {
 1661                 beio->bio_cmd = BIO_WRITE;
 1662                 beio->ds_trans_type = DEVSTAT_WRITE;
 1663         } else {
 1664                 beio->bio_cmd = BIO_READ;
 1665                 beio->ds_trans_type = DEVSTAT_READ;
 1666         }
 1667 
 1668         DPRINTF("%s at LBA %jx len %u @%ju\n",
 1669                (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
 1670                (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
 1671         lbas = CTLBLK_MAX_IO_SIZE;
 1672         if (lbalen->flags & CTL_LLF_COMPARE) {
 1673                 beio->two_sglists = 1;
 1674                 lbas /= 2;
 1675         }
 1676         lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize);
 1677         beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize;
 1678         beio->io_len = lbas * cbe_lun->blocksize;
 1679         bptrlen->len += lbas;
 1680 
 1681         for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
 1682                 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
 1683                     i, CTLBLK_MAX_SEGS));
 1684 
 1685                 /*
 1686                  * Setup the S/G entry for this chunk.
 1687                  */
 1688                 ctl_alloc_seg(softc, &beio->sg_segs[i],
 1689                     MIN(CTLBLK_MAX_SEG, len_left));
 1690 
 1691                 DPRINTF("segment %d addr %p len %zd\n", i,
 1692                         beio->sg_segs[i].addr, beio->sg_segs[i].len);
 1693 
 1694                 /* Set up second segment for compare operation. */
 1695                 if (beio->two_sglists) {
 1696                         ctl_alloc_seg(softc,
 1697                             &beio->sg_segs[i + CTLBLK_HALF_SEGS],
 1698                             beio->sg_segs[i].len);
 1699                 }
 1700 
 1701                 beio->num_segs++;
 1702                 len_left -= beio->sg_segs[i].len;
 1703         }
 1704         if (bptrlen->len < lbalen->len)
 1705                 beio->beio_cont = ctl_be_block_next;
 1706         io->scsiio.be_move_done = ctl_be_block_move_done;
 1707         /* For compare we have separate S/G lists for read and datamove. */
 1708         if (beio->two_sglists)
 1709                 io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
 1710         else
 1711                 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
 1712         io->scsiio.kern_data_len = beio->io_len;
 1713         io->scsiio.kern_sg_entries = beio->num_segs;
 1714         io->scsiio.kern_data_ref = ctl_refcnt_beio;
 1715         io->scsiio.kern_data_arg = beio;
 1716         io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
 1717 
 1718         /*
 1719          * For the read case, we need to read the data into our buffers and
 1720          * then we can send it back to the user.  For the write case, we
 1721          * need to get the data from the user first.
 1722          */
 1723         if (beio->bio_cmd == BIO_READ) {
 1724                 SDT_PROBE0(cbb, , read, alloc_done);
 1725                 be_lun->dispatch(be_lun, beio);
 1726         } else {
 1727                 SDT_PROBE0(cbb, , write, alloc_done);
 1728                 ctl_datamove(io);
 1729         }
 1730 }
 1731 
 1732 static void
 1733 ctl_be_block_worker(void *context, int pending)
 1734 {
 1735         struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context;
 1736         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
 1737         union ctl_io *io;
 1738         struct ctl_be_block_io *beio;
 1739 
 1740         DPRINTF("entered\n");
 1741         /*
 1742          * Fetch and process I/Os from all queues.  If we detect LUN
 1743          * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race,
 1744          * so make response maximally opaque to not confuse initiator.
 1745          */
 1746         for (;;) {
 1747                 mtx_lock(&be_lun->queue_lock);
 1748                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
 1749                 if (io != NULL) {
 1750                         DPRINTF("datamove queue\n");
 1751                         STAILQ_REMOVE_HEAD(&be_lun->datamove_queue, links);
 1752                         mtx_unlock(&be_lun->queue_lock);
 1753                         beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
 1754                         if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
 1755                                 ctl_set_busy(&io->scsiio);
 1756                                 ctl_complete_beio(beio);
 1757                                 continue;
 1758                         }
 1759                         be_lun->dispatch(be_lun, beio);
 1760                         continue;
 1761                 }
 1762                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
 1763                 if (io != NULL) {
 1764                         DPRINTF("config write queue\n");
 1765                         STAILQ_REMOVE_HEAD(&be_lun->config_write_queue, links);
 1766                         mtx_unlock(&be_lun->queue_lock);
 1767                         if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
 1768                                 ctl_set_busy(&io->scsiio);
 1769                                 ctl_config_write_done(io);
 1770                                 continue;
 1771                         }
 1772                         ctl_be_block_cw_dispatch(be_lun, io);
 1773                         continue;
 1774                 }
 1775                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue);
 1776                 if (io != NULL) {
 1777                         DPRINTF("config read queue\n");
 1778                         STAILQ_REMOVE_HEAD(&be_lun->config_read_queue, links);
 1779                         mtx_unlock(&be_lun->queue_lock);
 1780                         if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
 1781                                 ctl_set_busy(&io->scsiio);
 1782                                 ctl_config_read_done(io);
 1783                                 continue;
 1784                         }
 1785                         ctl_be_block_cr_dispatch(be_lun, io);
 1786                         continue;
 1787                 }
 1788                 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
 1789                 if (io != NULL) {
 1790                         DPRINTF("input queue\n");
 1791                         STAILQ_REMOVE_HEAD(&be_lun->input_queue, links);
 1792                         mtx_unlock(&be_lun->queue_lock);
 1793                         if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
 1794                                 ctl_set_busy(&io->scsiio);
 1795                                 ctl_data_submit_done(io);
 1796                                 continue;
 1797                         }
 1798                         ctl_be_block_dispatch(be_lun, io);
 1799                         continue;
 1800                 }
 1801 
 1802                 /*
 1803                  * If we get here, there is no work left in the queues, so
 1804                  * just break out and let the task queue go to sleep.
 1805                  */
 1806                 mtx_unlock(&be_lun->queue_lock);
 1807                 break;
 1808         }
 1809 }
 1810 
 1811 /*
 1812  * Entry point from CTL to the backend for I/O.  We queue everything to a
 1813  * work thread, so this just puts the I/O on a queue and wakes up the
 1814  * thread.
 1815  */
 1816 static int
 1817 ctl_be_block_submit(union ctl_io *io)
 1818 {
 1819         struct ctl_be_block_lun *be_lun;
 1820 
 1821         DPRINTF("entered\n");
 1822 
 1823         be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
 1824 
 1825         KASSERT(io->io_hdr.io_type == CTL_IO_SCSI,
 1826             ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type));
 1827 
 1828         PRIV(io)->len = 0;
 1829 
 1830         mtx_lock(&be_lun->queue_lock);
 1831         STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
 1832         mtx_unlock(&be_lun->queue_lock);
 1833         taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
 1834 
 1835         return (CTL_RETVAL_COMPLETE);
 1836 }
 1837 
 1838 static int
 1839 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
 1840                         int flag, struct thread *td)
 1841 {
 1842         struct ctl_be_block_softc *softc = &backend_block_softc;
 1843         int error;
 1844 
 1845         error = 0;
 1846         switch (cmd) {
 1847         case CTL_LUN_REQ: {
 1848                 struct ctl_lun_req *lun_req;
 1849 
 1850                 lun_req = (struct ctl_lun_req *)addr;
 1851 
 1852                 switch (lun_req->reqtype) {
 1853                 case CTL_LUNREQ_CREATE:
 1854                         error = ctl_be_block_create(softc, lun_req);
 1855                         break;
 1856                 case CTL_LUNREQ_RM:
 1857                         error = ctl_be_block_rm(softc, lun_req);
 1858                         break;
 1859                 case CTL_LUNREQ_MODIFY:
 1860                         error = ctl_be_block_modify(softc, lun_req);
 1861                         break;
 1862                 default:
 1863                         lun_req->status = CTL_LUN_ERROR;
 1864                         snprintf(lun_req->error_str, sizeof(lun_req->error_str),
 1865                                  "invalid LUN request type %d",
 1866                                  lun_req->reqtype);
 1867                         break;
 1868                 }
 1869                 break;
 1870         }
 1871         default:
 1872                 error = ENOTTY;
 1873                 break;
 1874         }
 1875 
 1876         return (error);
 1877 }
 1878 
 1879 static int
 1880 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
 1881 {
 1882         struct ctl_be_lun *cbe_lun;
 1883         struct ctl_be_block_filedata *file_data;
 1884         struct ctl_lun_create_params *params;
 1885         const char                   *value;
 1886         struct vattr                  vattr;
 1887         off_t                         ps, pss, po, pos, us, uss, uo, uos;
 1888         int                           error;
 1889         long                          pconf;
 1890 
 1891         cbe_lun = &be_lun->cbe_lun;
 1892         file_data = &be_lun->backend.file;
 1893         params = &be_lun->params;
 1894 
 1895         be_lun->dev_type = CTL_BE_BLOCK_FILE;
 1896         be_lun->dispatch = ctl_be_block_dispatch_file;
 1897         be_lun->lun_flush = ctl_be_block_flush_file;
 1898         be_lun->get_lba_status = ctl_be_block_gls_file;
 1899         be_lun->getattr = ctl_be_block_getattr_file;
 1900         be_lun->unmap = ctl_be_block_unmap_file;
 1901         cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
 1902 
 1903         error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
 1904         if (error != 0) {
 1905                 snprintf(req->error_str, sizeof(req->error_str),
 1906                          "error calling VOP_GETATTR() for file %s",
 1907                          be_lun->dev_path);
 1908                 return (error);
 1909         }
 1910 
 1911         error = VOP_PATHCONF(be_lun->vn, _PC_DEALLOC_PRESENT, &pconf);
 1912         if (error != 0) {
 1913                 snprintf(req->error_str, sizeof(req->error_str),
 1914                     "error calling VOP_PATHCONF() for file %s",
 1915                     be_lun->dev_path);
 1916                 return (error);
 1917         }
 1918         if (pconf == 1)
 1919                 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
 1920 
 1921         file_data->cred = crhold(curthread->td_ucred);
 1922         if (params->lun_size_bytes != 0)
 1923                 be_lun->size_bytes = params->lun_size_bytes;
 1924         else
 1925                 be_lun->size_bytes = vattr.va_size;
 1926 
 1927         /*
 1928          * For files we can use any logical block size.  Prefer 512 bytes
 1929          * for compatibility reasons.  If file's vattr.va_blocksize
 1930          * (preferred I/O block size) is bigger and multiple to chosen
 1931          * logical block size -- report it as physical block size.
 1932          */
 1933         if (params->blocksize_bytes != 0)
 1934                 cbe_lun->blocksize = params->blocksize_bytes;
 1935         else if (cbe_lun->lun_type == T_CDROM)
 1936                 cbe_lun->blocksize = 2048;
 1937         else
 1938                 cbe_lun->blocksize = 512;
 1939         be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
 1940         cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
 1941             0 : (be_lun->size_blocks - 1);
 1942 
 1943         us = ps = vattr.va_blocksize;
 1944         uo = po = 0;
 1945 
 1946         value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
 1947         if (value != NULL)
 1948                 ctl_expand_number(value, &ps);
 1949         value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
 1950         if (value != NULL)
 1951                 ctl_expand_number(value, &po);
 1952         pss = ps / cbe_lun->blocksize;
 1953         pos = po / cbe_lun->blocksize;
 1954         if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
 1955             ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
 1956                 cbe_lun->pblockexp = fls(pss) - 1;
 1957                 cbe_lun->pblockoff = (pss - pos) % pss;
 1958         }
 1959 
 1960         value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
 1961         if (value != NULL)
 1962                 ctl_expand_number(value, &us);
 1963         value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
 1964         if (value != NULL)
 1965                 ctl_expand_number(value, &uo);
 1966         uss = us / cbe_lun->blocksize;
 1967         uos = uo / cbe_lun->blocksize;
 1968         if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
 1969             ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
 1970                 cbe_lun->ublockexp = fls(uss) - 1;
 1971                 cbe_lun->ublockoff = (uss - uos) % uss;
 1972         }
 1973 
 1974         /*
 1975          * Sanity check.  The media size has to be at least one
 1976          * sector long.
 1977          */
 1978         if (be_lun->size_bytes < cbe_lun->blocksize) {
 1979                 error = EINVAL;
 1980                 snprintf(req->error_str, sizeof(req->error_str),
 1981                          "file %s size %ju < block size %u", be_lun->dev_path,
 1982                          (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize);
 1983         }
 1984 
 1985         cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize;
 1986         return (error);
 1987 }
 1988 
 1989 static int
 1990 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
 1991 {
 1992         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
 1993         struct ctl_lun_create_params *params;
 1994         struct cdevsw                *csw;
 1995         struct cdev                  *dev;
 1996         const char                   *value;
 1997         int                           error, atomic, maxio, ref, unmap, tmp;
 1998         off_t                         ps, pss, po, pos, us, uss, uo, uos, otmp;
 1999 
 2000         params = &be_lun->params;
 2001 
 2002         be_lun->dev_type = CTL_BE_BLOCK_DEV;
 2003         csw = devvn_refthread(be_lun->vn, &dev, &ref);
 2004         if (csw == NULL)
 2005                 return (ENXIO);
 2006         if (strcmp(csw->d_name, "zvol") == 0) {
 2007                 be_lun->dispatch = ctl_be_block_dispatch_zvol;
 2008                 be_lun->get_lba_status = ctl_be_block_gls_zvol;
 2009                 atomic = maxio = CTLBLK_MAX_IO_SIZE;
 2010         } else {
 2011                 be_lun->dispatch = ctl_be_block_dispatch_dev;
 2012                 be_lun->get_lba_status = NULL;
 2013                 atomic = 0;
 2014                 maxio = dev->si_iosize_max;
 2015                 if (maxio <= 0)
 2016                         maxio = DFLTPHYS;
 2017                 if (maxio > CTLBLK_MAX_SEG)
 2018                         maxio = CTLBLK_MAX_SEG;
 2019         }
 2020         be_lun->lun_flush = ctl_be_block_flush_dev;
 2021         be_lun->getattr = ctl_be_block_getattr_dev;
 2022         be_lun->unmap = ctl_be_block_unmap_dev;
 2023 
 2024         if (!csw->d_ioctl) {
 2025                 dev_relthread(dev, ref);
 2026                 snprintf(req->error_str, sizeof(req->error_str),
 2027                          "no d_ioctl for device %s!", be_lun->dev_path);
 2028                 return (ENODEV);
 2029         }
 2030 
 2031         error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD,
 2032                                curthread);
 2033         if (error) {
 2034                 dev_relthread(dev, ref);
 2035                 snprintf(req->error_str, sizeof(req->error_str),
 2036                          "error %d returned for DIOCGSECTORSIZE ioctl "
 2037                          "on %s!", error, be_lun->dev_path);
 2038                 return (error);
 2039         }
 2040 
 2041         /*
 2042          * If the user has asked for a blocksize that is greater than the
 2043          * backing device's blocksize, we can do it only if the blocksize
 2044          * the user is asking for is an even multiple of the underlying 
 2045          * device's blocksize.
 2046          */
 2047         if ((params->blocksize_bytes != 0) &&
 2048             (params->blocksize_bytes >= tmp)) {
 2049                 if (params->blocksize_bytes % tmp == 0) {
 2050                         cbe_lun->blocksize = params->blocksize_bytes;
 2051                 } else {
 2052                         dev_relthread(dev, ref);
 2053                         snprintf(req->error_str, sizeof(req->error_str),
 2054                                  "requested blocksize %u is not an even "
 2055                                  "multiple of backing device blocksize %u",
 2056                                  params->blocksize_bytes, tmp);
 2057                         return (EINVAL);
 2058                 }
 2059         } else if (params->blocksize_bytes != 0) {
 2060                 dev_relthread(dev, ref);
 2061                 snprintf(req->error_str, sizeof(req->error_str),
 2062                          "requested blocksize %u < backing device "
 2063                          "blocksize %u", params->blocksize_bytes, tmp);
 2064                 return (EINVAL);
 2065         } else if (cbe_lun->lun_type == T_CDROM)
 2066                 cbe_lun->blocksize = MAX(tmp, 2048);
 2067         else
 2068                 cbe_lun->blocksize = tmp;
 2069 
 2070         error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD,
 2071                              curthread);
 2072         if (error) {
 2073                 dev_relthread(dev, ref);
 2074                 snprintf(req->error_str, sizeof(req->error_str),
 2075                          "error %d returned for DIOCGMEDIASIZE "
 2076                          " ioctl on %s!", error,
 2077                          be_lun->dev_path);
 2078                 return (error);
 2079         }
 2080 
 2081         if (params->lun_size_bytes != 0) {
 2082                 if (params->lun_size_bytes > otmp) {
 2083                         dev_relthread(dev, ref);
 2084                         snprintf(req->error_str, sizeof(req->error_str),
 2085                                  "requested LUN size %ju > backing device "
 2086                                  "size %ju",
 2087                                  (uintmax_t)params->lun_size_bytes,
 2088                                  (uintmax_t)otmp);
 2089                         return (EINVAL);
 2090                 }
 2091 
 2092                 be_lun->size_bytes = params->lun_size_bytes;
 2093         } else
 2094                 be_lun->size_bytes = otmp;
 2095         be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
 2096         cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
 2097             0 : (be_lun->size_blocks - 1);
 2098 
 2099         error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD,
 2100             curthread);
 2101         if (error)
 2102                 ps = po = 0;
 2103         else {
 2104                 error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po,
 2105                     FREAD, curthread);
 2106                 if (error)
 2107                         po = 0;
 2108         }
 2109         us = ps;
 2110         uo = po;
 2111 
 2112         value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
 2113         if (value != NULL)
 2114                 ctl_expand_number(value, &ps);
 2115         value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
 2116         if (value != NULL)
 2117                 ctl_expand_number(value, &po);
 2118         pss = ps / cbe_lun->blocksize;
 2119         pos = po / cbe_lun->blocksize;
 2120         if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
 2121             ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
 2122                 cbe_lun->pblockexp = fls(pss) - 1;
 2123                 cbe_lun->pblockoff = (pss - pos) % pss;
 2124         }
 2125 
 2126         value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
 2127         if (value != NULL)
 2128                 ctl_expand_number(value, &us);
 2129         value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
 2130         if (value != NULL)
 2131                 ctl_expand_number(value, &uo);
 2132         uss = us / cbe_lun->blocksize;
 2133         uos = uo / cbe_lun->blocksize;
 2134         if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
 2135             ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
 2136                 cbe_lun->ublockexp = fls(uss) - 1;
 2137                 cbe_lun->ublockoff = (uss - uos) % uss;
 2138         }
 2139 
 2140         cbe_lun->atomicblock = atomic / cbe_lun->blocksize;
 2141         cbe_lun->opttxferlen = maxio / cbe_lun->blocksize;
 2142 
 2143         if (be_lun->dispatch == ctl_be_block_dispatch_zvol) {
 2144                 unmap = 1;
 2145         } else {
 2146                 struct diocgattr_arg    arg;
 2147 
 2148                 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
 2149                 arg.len = sizeof(arg.value.i);
 2150                 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
 2151                     curthread);
 2152                 unmap = (error == 0) ? arg.value.i : 0;
 2153         }
 2154         value = dnvlist_get_string(cbe_lun->options, "unmap", NULL);
 2155         if (value != NULL)
 2156                 unmap = (strcmp(value, "on") == 0);
 2157         if (unmap)
 2158                 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
 2159         else
 2160                 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
 2161 
 2162         dev_relthread(dev, ref);
 2163         return (0);
 2164 }
 2165 
 2166 static int
 2167 ctl_be_block_close(struct ctl_be_block_lun *be_lun)
 2168 {
 2169         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
 2170         int flags;
 2171 
 2172         if (be_lun->vn) {
 2173                 flags = FREAD;
 2174                 if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0)
 2175                         flags |= FWRITE;
 2176                 (void)vn_close(be_lun->vn, flags, NOCRED, curthread);
 2177                 be_lun->vn = NULL;
 2178 
 2179                 switch (be_lun->dev_type) {
 2180                 case CTL_BE_BLOCK_DEV:
 2181                         break;
 2182                 case CTL_BE_BLOCK_FILE:
 2183                         if (be_lun->backend.file.cred != NULL) {
 2184                                 crfree(be_lun->backend.file.cred);
 2185                                 be_lun->backend.file.cred = NULL;
 2186                         }
 2187                         break;
 2188                 case CTL_BE_BLOCK_NONE:
 2189                         break;
 2190                 default:
 2191                         panic("Unexpected backend type %d", be_lun->dev_type);
 2192                         break;
 2193                 }
 2194                 be_lun->dev_type = CTL_BE_BLOCK_NONE;
 2195         }
 2196         return (0);
 2197 }
 2198 
 2199 static int
 2200 ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
 2201 {
 2202         struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
 2203         struct nameidata nd;
 2204         const char      *value;
 2205         int              error, flags;
 2206 
 2207         error = 0;
 2208         if (rootvnode == NULL) {
 2209                 snprintf(req->error_str, sizeof(req->error_str),
 2210                          "Root filesystem is not mounted");
 2211                 return (1);
 2212         }
 2213         pwd_ensure_dirs();
 2214 
 2215         value = dnvlist_get_string(cbe_lun->options, "file", NULL);
 2216         if (value == NULL) {
 2217                 snprintf(req->error_str, sizeof(req->error_str),
 2218                          "no file argument specified");
 2219                 return (1);
 2220         }
 2221         free(be_lun->dev_path, M_CTLBLK);
 2222         be_lun->dev_path = strdup(value, M_CTLBLK);
 2223 
 2224         flags = FREAD;
 2225         value = dnvlist_get_string(cbe_lun->options, "readonly", NULL);
 2226         if (value != NULL) {
 2227                 if (strcmp(value, "on") != 0)
 2228                         flags |= FWRITE;
 2229         } else if (cbe_lun->lun_type == T_DIRECT)
 2230                 flags |= FWRITE;
 2231 
 2232 again:
 2233         NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path);
 2234         error = vn_open(&nd, &flags, 0, NULL);
 2235         if ((error == EROFS || error == EACCES) && (flags & FWRITE)) {
 2236                 flags &= ~FWRITE;
 2237                 goto again;
 2238         }
 2239         if (error) {
 2240                 /*
 2241                  * This is the only reasonable guess we can make as far as
 2242                  * path if the user doesn't give us a fully qualified path.
 2243                  * If they want to specify a file, they need to specify the
 2244                  * full path.
 2245                  */
 2246                 if (be_lun->dev_path[0] != '/') {
 2247                         char *dev_name;
 2248 
 2249                         asprintf(&dev_name, M_CTLBLK, "/dev/%s",
 2250                                 be_lun->dev_path);
 2251                         free(be_lun->dev_path, M_CTLBLK);
 2252                         be_lun->dev_path = dev_name;
 2253                         goto again;
 2254                 }
 2255                 snprintf(req->error_str, sizeof(req->error_str),
 2256                     "error opening %s: %d", be_lun->dev_path, error);
 2257                 return (error);
 2258         }
 2259         if (flags & FWRITE)
 2260                 cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY;
 2261         else
 2262                 cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
 2263 
 2264         NDFREE_PNBUF(&nd);
 2265         be_lun->vn = nd.ni_vp;
 2266 
 2267         /* We only support disks and files. */
 2268         if (vn_isdisk_error(be_lun->vn, &error)) {
 2269                 error = ctl_be_block_open_dev(be_lun, req);
 2270         } else if (be_lun->vn->v_type == VREG) {
 2271                 error = ctl_be_block_open_file(be_lun, req);
 2272         } else {
 2273                 error = EINVAL;
 2274                 snprintf(req->error_str, sizeof(req->error_str),
 2275                          "%s is not a disk or plain file", be_lun->dev_path);
 2276         }
 2277         VOP_UNLOCK(be_lun->vn);
 2278 
 2279         if (error != 0)
 2280                 ctl_be_block_close(be_lun);
 2281         cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
 2282         if (be_lun->dispatch != ctl_be_block_dispatch_dev)
 2283                 cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT;
 2284         value = dnvlist_get_string(cbe_lun->options, "serseq", NULL);
 2285         if (value != NULL && strcmp(value, "on") == 0)
 2286                 cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
 2287         else if (value != NULL && strcmp(value, "read") == 0)
 2288                 cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
 2289         else if (value != NULL && strcmp(value, "soft") == 0)
 2290                 cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT;
 2291         else if (value != NULL && strcmp(value, "off") == 0)
 2292                 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
 2293         return (0);
 2294 }
 2295 
 2296 static int
 2297 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
 2298 {
 2299         struct ctl_be_lun *cbe_lun;
 2300         struct ctl_be_block_lun *be_lun;
 2301         struct ctl_lun_create_params *params;
 2302         char num_thread_str[16];
 2303         char tmpstr[32];
 2304         const char *value;
 2305         int retval, num_threads;
 2306         int tmp_num_threads;
 2307 
 2308         params = &req->reqdata.create;
 2309         retval = 0;
 2310         req->status = CTL_LUN_OK;
 2311 
 2312         be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
 2313         cbe_lun = &be_lun->cbe_lun;
 2314         be_lun->params = req->reqdata.create;
 2315         be_lun->softc = softc;
 2316         STAILQ_INIT(&be_lun->input_queue);
 2317         STAILQ_INIT(&be_lun->config_read_queue);
 2318         STAILQ_INIT(&be_lun->config_write_queue);
 2319         STAILQ_INIT(&be_lun->datamove_queue);
 2320         mtx_init(&be_lun->io_lock, "ctlblock io", NULL, MTX_DEF);
 2321         mtx_init(&be_lun->queue_lock, "ctlblock queue", NULL, MTX_DEF);
 2322         cbe_lun->options = nvlist_clone(req->args_nvl);
 2323 
 2324         if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
 2325                 cbe_lun->lun_type = params->device_type;
 2326         else
 2327                 cbe_lun->lun_type = T_DIRECT;
 2328         be_lun->flags = 0;
 2329         cbe_lun->flags = 0;
 2330         value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
 2331         if (value != NULL) {
 2332                 if (strcmp(value, "primary") == 0)
 2333                         cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
 2334         } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
 2335                 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
 2336 
 2337         if (cbe_lun->lun_type == T_DIRECT ||
 2338             cbe_lun->lun_type == T_CDROM) {
 2339                 be_lun->size_bytes = params->lun_size_bytes;
 2340                 if (params->blocksize_bytes != 0)
 2341                         cbe_lun->blocksize = params->blocksize_bytes;
 2342                 else if (cbe_lun->lun_type == T_CDROM)
 2343                         cbe_lun->blocksize = 2048;
 2344                 else
 2345                         cbe_lun->blocksize = 512;
 2346                 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
 2347                 cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
 2348                     0 : (be_lun->size_blocks - 1);
 2349 
 2350                 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
 2351                     control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
 2352                         retval = ctl_be_block_open(be_lun, req);
 2353                         if (retval != 0) {
 2354                                 retval = 0;
 2355                                 req->status = CTL_LUN_WARNING;
 2356                         }
 2357                 }
 2358                 num_threads = cbb_num_threads;
 2359         } else {
 2360                 num_threads = 1;
 2361         }
 2362 
 2363         value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL);
 2364         if (value != NULL) {
 2365                 tmp_num_threads = strtol(value, NULL, 0);
 2366 
 2367                 /*
 2368                  * We don't let the user specify less than one
 2369                  * thread, but hope he's clueful enough not to
 2370                  * specify 1000 threads.
 2371                  */
 2372                 if (tmp_num_threads < 1) {
 2373                         snprintf(req->error_str, sizeof(req->error_str),
 2374                                  "invalid number of threads %s",
 2375                                  num_thread_str);
 2376                         goto bailout_error;
 2377                 }
 2378                 num_threads = tmp_num_threads;
 2379         }
 2380 
 2381         if (be_lun->vn == NULL)
 2382                 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
 2383         /* Tell the user the blocksize we ended up using */
 2384         params->lun_size_bytes = be_lun->size_bytes;
 2385         params->blocksize_bytes = cbe_lun->blocksize;
 2386         if (params->flags & CTL_LUN_FLAG_ID_REQ) {
 2387                 cbe_lun->req_lun_id = params->req_lun_id;
 2388                 cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
 2389         } else
 2390                 cbe_lun->req_lun_id = 0;
 2391 
 2392         cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown;
 2393         cbe_lun->be = &ctl_be_block_driver;
 2394 
 2395         if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
 2396                 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d",
 2397                          softc->num_luns);
 2398                 strncpy((char *)cbe_lun->serial_num, tmpstr,
 2399                         MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
 2400 
 2401                 /* Tell the user what we used for a serial number */
 2402                 strncpy((char *)params->serial_num, tmpstr,
 2403                         MIN(sizeof(params->serial_num), sizeof(tmpstr)));
 2404         } else { 
 2405                 strncpy((char *)cbe_lun->serial_num, params->serial_num,
 2406                         MIN(sizeof(cbe_lun->serial_num),
 2407                         sizeof(params->serial_num)));
 2408         }
 2409         if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
 2410                 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns);
 2411                 strncpy((char *)cbe_lun->device_id, tmpstr,
 2412                         MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
 2413 
 2414                 /* Tell the user what we used for a device ID */
 2415                 strncpy((char *)params->device_id, tmpstr,
 2416                         MIN(sizeof(params->device_id), sizeof(tmpstr)));
 2417         } else {
 2418                 strncpy((char *)cbe_lun->device_id, params->device_id,
 2419                         MIN(sizeof(cbe_lun->device_id),
 2420                             sizeof(params->device_id)));
 2421         }
 2422 
 2423         TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
 2424 
 2425         be_lun->io_taskqueue = taskqueue_create("ctlblocktq", M_WAITOK,
 2426             taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
 2427 
 2428         if (be_lun->io_taskqueue == NULL) {
 2429                 snprintf(req->error_str, sizeof(req->error_str),
 2430                          "unable to create taskqueue");
 2431                 goto bailout_error;
 2432         }
 2433 
 2434         /*
 2435          * Note that we start the same number of threads by default for
 2436          * both the file case and the block device case.  For the file
 2437          * case, we need multiple threads to allow concurrency, because the
 2438          * vnode interface is designed to be a blocking interface.  For the
 2439          * block device case, ZFS zvols at least will block the caller's
 2440          * context in many instances, and so we need multiple threads to
 2441          * overcome that problem.  Other block devices don't need as many
 2442          * threads, but they shouldn't cause too many problems.
 2443          *
 2444          * If the user wants to just have a single thread for a block
 2445          * device, he can specify that when the LUN is created, or change
 2446          * the tunable/sysctl to alter the default number of threads.
 2447          */
 2448         retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue,
 2449                                          /*num threads*/num_threads,
 2450                                          /*priority*/PUSER,
 2451                                          /*proc*/control_softc->ctl_proc,
 2452                                          /*thread name*/"block");
 2453 
 2454         if (retval != 0)
 2455                 goto bailout_error;
 2456 
 2457         be_lun->num_threads = num_threads;
 2458 
 2459         retval = ctl_add_lun(&be_lun->cbe_lun);
 2460         if (retval != 0) {
 2461                 snprintf(req->error_str, sizeof(req->error_str),
 2462                          "ctl_add_lun() returned error %d, see dmesg for "
 2463                          "details", retval);
 2464                 retval = 0;
 2465                 goto bailout_error;
 2466         }
 2467 
 2468         be_lun->disk_stats = devstat_new_entry("cbb", cbe_lun->lun_id,
 2469                                                cbe_lun->blocksize,
 2470                                                DEVSTAT_ALL_SUPPORTED,
 2471                                                cbe_lun->lun_type
 2472                                                | DEVSTAT_TYPE_IF_OTHER,
 2473                                                DEVSTAT_PRIORITY_OTHER);
 2474 
 2475         mtx_lock(&softc->lock);
 2476         softc->num_luns++;
 2477         SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links);
 2478         mtx_unlock(&softc->lock);
 2479 
 2480         params->req_lun_id = cbe_lun->lun_id;
 2481 
 2482         return (retval);
 2483 
 2484 bailout_error:
 2485         req->status = CTL_LUN_ERROR;
 2486 
 2487         if (be_lun->io_taskqueue != NULL)
 2488                 taskqueue_free(be_lun->io_taskqueue);
 2489         ctl_be_block_close(be_lun);
 2490         if (be_lun->dev_path != NULL)
 2491                 free(be_lun->dev_path, M_CTLBLK);
 2492         nvlist_destroy(cbe_lun->options);
 2493         mtx_destroy(&be_lun->queue_lock);
 2494         mtx_destroy(&be_lun->io_lock);
 2495         free(be_lun, M_CTLBLK);
 2496 
 2497         return (retval);
 2498 }
 2499 
 2500 static int
 2501 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
 2502 {
 2503         struct ctl_lun_rm_params *params;
 2504         struct ctl_be_block_lun *be_lun;
 2505         struct ctl_be_lun *cbe_lun;
 2506         int retval;
 2507 
 2508         params = &req->reqdata.rm;
 2509 
 2510         sx_xlock(&softc->modify_lock);
 2511         mtx_lock(&softc->lock);
 2512         SLIST_FOREACH(be_lun, &softc->lun_list, links) {
 2513                 if (be_lun->cbe_lun.lun_id == params->lun_id) {
 2514                         SLIST_REMOVE(&softc->lun_list, be_lun,
 2515                             ctl_be_block_lun, links);
 2516                         softc->num_luns--;
 2517                         break;
 2518                 }
 2519         }
 2520         mtx_unlock(&softc->lock);
 2521         sx_xunlock(&softc->modify_lock);
 2522         if (be_lun == NULL) {
 2523                 snprintf(req->error_str, sizeof(req->error_str),
 2524                          "LUN %u is not managed by the block backend",
 2525                          params->lun_id);
 2526                 goto bailout_error;
 2527         }
 2528         cbe_lun = &be_lun->cbe_lun;
 2529 
 2530         if (be_lun->vn != NULL) {
 2531                 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
 2532                 ctl_lun_no_media(cbe_lun);
 2533                 taskqueue_drain_all(be_lun->io_taskqueue);
 2534                 ctl_be_block_close(be_lun);
 2535         }
 2536 
 2537         mtx_lock(&softc->lock);
 2538         be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
 2539         mtx_unlock(&softc->lock);
 2540 
 2541         retval = ctl_remove_lun(cbe_lun);
 2542         if (retval != 0) {
 2543                 snprintf(req->error_str, sizeof(req->error_str),
 2544                          "error %d returned from ctl_remove_lun() for "
 2545                          "LUN %d", retval, params->lun_id);
 2546                 mtx_lock(&softc->lock);
 2547                 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
 2548                 mtx_unlock(&softc->lock);
 2549                 goto bailout_error;
 2550         }
 2551 
 2552         mtx_lock(&softc->lock);
 2553         while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
 2554                 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblockrm", 0);
 2555                 if (retval == EINTR)
 2556                         break;
 2557         }
 2558         be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
 2559         if (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
 2560                 mtx_unlock(&softc->lock);
 2561                 free(be_lun, M_CTLBLK);
 2562         } else {
 2563                 mtx_unlock(&softc->lock);
 2564                 return (EINTR);
 2565         }
 2566 
 2567         req->status = CTL_LUN_OK;
 2568         return (0);
 2569 
 2570 bailout_error:
 2571         req->status = CTL_LUN_ERROR;
 2572         return (0);
 2573 }
 2574 
 2575 static int
 2576 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
 2577 {
 2578         struct ctl_lun_modify_params *params;
 2579         struct ctl_be_block_lun *be_lun;
 2580         struct ctl_be_lun *cbe_lun;
 2581         const char *value;
 2582         uint64_t oldsize;
 2583         int error, wasprim;
 2584 
 2585         params = &req->reqdata.modify;
 2586 
 2587         sx_xlock(&softc->modify_lock);
 2588         mtx_lock(&softc->lock);
 2589         SLIST_FOREACH(be_lun, &softc->lun_list, links) {
 2590                 if (be_lun->cbe_lun.lun_id == params->lun_id)
 2591                         break;
 2592         }
 2593         mtx_unlock(&softc->lock);
 2594         if (be_lun == NULL) {
 2595                 snprintf(req->error_str, sizeof(req->error_str),
 2596                          "LUN %u is not managed by the block backend",
 2597                          params->lun_id);
 2598                 goto bailout_error;
 2599         }
 2600         cbe_lun = &be_lun->cbe_lun;
 2601 
 2602         if (params->lun_size_bytes != 0)
 2603                 be_lun->params.lun_size_bytes = params->lun_size_bytes;
 2604 
 2605         if (req->args_nvl != NULL) {
 2606                 nvlist_destroy(cbe_lun->options);
 2607                 cbe_lun->options = nvlist_clone(req->args_nvl);
 2608         }
 2609 
 2610         wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
 2611         value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
 2612         if (value != NULL) {
 2613                 if (strcmp(value, "primary") == 0)
 2614                         cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
 2615                 else
 2616                         cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
 2617         } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
 2618                 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
 2619         else
 2620                 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
 2621         if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
 2622                 if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
 2623                         ctl_lun_primary(cbe_lun);
 2624                 else
 2625                         ctl_lun_secondary(cbe_lun);
 2626         }
 2627 
 2628         oldsize = be_lun->size_blocks;
 2629         if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
 2630             control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
 2631                 if (be_lun->vn == NULL)
 2632                         error = ctl_be_block_open(be_lun, req);
 2633                 else if (vn_isdisk_error(be_lun->vn, &error))
 2634                         error = ctl_be_block_open_dev(be_lun, req);
 2635                 else if (be_lun->vn->v_type == VREG) {
 2636                         vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
 2637                         error = ctl_be_block_open_file(be_lun, req);
 2638                         VOP_UNLOCK(be_lun->vn);
 2639                 } else
 2640                         error = EINVAL;
 2641                 if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) &&
 2642                     be_lun->vn != NULL) {
 2643                         cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
 2644                         ctl_lun_has_media(cbe_lun);
 2645                 } else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 &&
 2646                     be_lun->vn == NULL) {
 2647                         cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
 2648                         ctl_lun_no_media(cbe_lun);
 2649                 }
 2650                 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
 2651         } else {
 2652                 if (be_lun->vn != NULL) {
 2653                         cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
 2654                         ctl_lun_no_media(cbe_lun);
 2655                         taskqueue_drain_all(be_lun->io_taskqueue);
 2656                         error = ctl_be_block_close(be_lun);
 2657                 } else
 2658                         error = 0;
 2659         }
 2660         if (be_lun->size_blocks != oldsize)
 2661                 ctl_lun_capacity_changed(cbe_lun);
 2662 
 2663         /* Tell the user the exact size we ended up using */
 2664         params->lun_size_bytes = be_lun->size_bytes;
 2665 
 2666         sx_xunlock(&softc->modify_lock);
 2667         req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
 2668         return (0);
 2669 
 2670 bailout_error:
 2671         sx_xunlock(&softc->modify_lock);
 2672         req->status = CTL_LUN_ERROR;
 2673         return (0);
 2674 }
 2675 
 2676 static void
 2677 ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun)
 2678 {
 2679         struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)cbe_lun;
 2680         struct ctl_be_block_softc *softc = be_lun->softc;
 2681 
 2682         taskqueue_drain_all(be_lun->io_taskqueue);
 2683         taskqueue_free(be_lun->io_taskqueue);
 2684         if (be_lun->disk_stats != NULL)
 2685                 devstat_remove_entry(be_lun->disk_stats);
 2686         nvlist_destroy(be_lun->cbe_lun.options);
 2687         free(be_lun->dev_path, M_CTLBLK);
 2688         mtx_destroy(&be_lun->queue_lock);
 2689         mtx_destroy(&be_lun->io_lock);
 2690 
 2691         mtx_lock(&softc->lock);
 2692         be_lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
 2693         if (be_lun->flags & CTL_BE_BLOCK_LUN_WAITING)
 2694                 wakeup(be_lun);
 2695         else
 2696                 free(be_lun, M_CTLBLK);
 2697         mtx_unlock(&softc->lock);
 2698 }
 2699 
 2700 static int
 2701 ctl_be_block_config_write(union ctl_io *io)
 2702 {
 2703         struct ctl_be_block_lun *be_lun;
 2704         struct ctl_be_lun *cbe_lun;
 2705         int retval;
 2706 
 2707         DPRINTF("entered\n");
 2708 
 2709         cbe_lun = CTL_BACKEND_LUN(io);
 2710         be_lun = (struct ctl_be_block_lun *)cbe_lun;
 2711 
 2712         retval = 0;
 2713         switch (io->scsiio.cdb[0]) {
 2714         case SYNCHRONIZE_CACHE:
 2715         case SYNCHRONIZE_CACHE_16:
 2716         case WRITE_SAME_10:
 2717         case WRITE_SAME_16:
 2718         case UNMAP:
 2719                 /*
 2720                  * The upper level CTL code will filter out any CDBs with
 2721                  * the immediate bit set and return the proper error.
 2722                  *
 2723                  * We don't really need to worry about what LBA range the
 2724                  * user asked to be synced out.  When they issue a sync
 2725                  * cache command, we'll sync out the whole thing.
 2726                  */
 2727                 mtx_lock(&be_lun->queue_lock);
 2728                 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
 2729                                    links);
 2730                 mtx_unlock(&be_lun->queue_lock);
 2731                 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
 2732                 break;
 2733         case START_STOP_UNIT: {
 2734                 struct scsi_start_stop_unit *cdb;
 2735                 struct ctl_lun_req req;
 2736 
 2737                 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
 2738                 if ((cdb->how & SSS_PC_MASK) != 0) {
 2739                         ctl_set_success(&io->scsiio);
 2740                         ctl_config_write_done(io);
 2741                         break;
 2742                 }
 2743                 if (cdb->how & SSS_START) {
 2744                         if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) {
 2745                                 retval = ctl_be_block_open(be_lun, &req);
 2746                                 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
 2747                                 if (retval == 0) {
 2748                                         cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
 2749                                         ctl_lun_has_media(cbe_lun);
 2750                                 } else {
 2751                                         cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
 2752                                         ctl_lun_no_media(cbe_lun);
 2753                                 }
 2754                         }
 2755                         ctl_start_lun(cbe_lun);
 2756                 } else {
 2757                         ctl_stop_lun(cbe_lun);
 2758                         if (cdb->how & SSS_LOEJ) {
 2759                                 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
 2760                                 cbe_lun->flags |= CTL_LUN_FLAG_EJECTED;
 2761                                 ctl_lun_ejected(cbe_lun);
 2762                                 if (be_lun->vn != NULL)
 2763                                         ctl_be_block_close(be_lun);
 2764                         }
 2765                 }
 2766 
 2767                 ctl_set_success(&io->scsiio);
 2768                 ctl_config_write_done(io);
 2769                 break;
 2770         }
 2771         case PREVENT_ALLOW:
 2772                 ctl_set_success(&io->scsiio);
 2773                 ctl_config_write_done(io);
 2774                 break;
 2775         default:
 2776                 ctl_set_invalid_opcode(&io->scsiio);
 2777                 ctl_config_write_done(io);
 2778                 retval = CTL_RETVAL_COMPLETE;
 2779                 break;
 2780         }
 2781 
 2782         return (retval);
 2783 }
 2784 
 2785 static int
 2786 ctl_be_block_config_read(union ctl_io *io)
 2787 {
 2788         struct ctl_be_block_lun *be_lun;
 2789         int retval = 0;
 2790 
 2791         DPRINTF("entered\n");
 2792 
 2793         be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
 2794 
 2795         switch (io->scsiio.cdb[0]) {
 2796         case SERVICE_ACTION_IN:
 2797                 if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
 2798                         mtx_lock(&be_lun->queue_lock);
 2799                         STAILQ_INSERT_TAIL(&be_lun->config_read_queue,
 2800                             &io->io_hdr, links);
 2801                         mtx_unlock(&be_lun->queue_lock);
 2802                         taskqueue_enqueue(be_lun->io_taskqueue,
 2803                             &be_lun->io_task);
 2804                         retval = CTL_RETVAL_QUEUED;
 2805                         break;
 2806                 }
 2807                 ctl_set_invalid_field(&io->scsiio,
 2808                                       /*sks_valid*/ 1,
 2809                                       /*command*/ 1,
 2810                                       /*field*/ 1,
 2811                                       /*bit_valid*/ 1,
 2812                                       /*bit*/ 4);
 2813                 ctl_config_read_done(io);
 2814                 retval = CTL_RETVAL_COMPLETE;
 2815                 break;
 2816         default:
 2817                 ctl_set_invalid_opcode(&io->scsiio);
 2818                 ctl_config_read_done(io);
 2819                 retval = CTL_RETVAL_COMPLETE;
 2820                 break;
 2821         }
 2822 
 2823         return (retval);
 2824 }
 2825 
 2826 static int
 2827 ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb)
 2828 {
 2829         struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun;
 2830         int retval;
 2831 
 2832         retval = sbuf_printf(sb, "\t<num_threads>");
 2833         if (retval != 0)
 2834                 goto bailout;
 2835         retval = sbuf_printf(sb, "%d", lun->num_threads);
 2836         if (retval != 0)
 2837                 goto bailout;
 2838         retval = sbuf_printf(sb, "</num_threads>\n");
 2839 
 2840 bailout:
 2841         return (retval);
 2842 }
 2843 
 2844 static uint64_t
 2845 ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname)
 2846 {
 2847         struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun;
 2848 
 2849         if (lun->getattr == NULL)
 2850                 return (UINT64_MAX);
 2851         return (lun->getattr(lun, attrname));
 2852 }
 2853 
 2854 static int
 2855 ctl_be_block_init(void)
 2856 {
 2857         struct ctl_be_block_softc *softc = &backend_block_softc;
 2858 
 2859         sx_init(&softc->modify_lock, "ctlblock modify");
 2860         mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
 2861         softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
 2862             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 2863         softc->bufmin_zone = uma_zcreate("ctlblockmin", CTLBLK_MIN_SEG,
 2864             NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
 2865         if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG)
 2866                 softc->bufmax_zone = uma_zcreate("ctlblockmax", CTLBLK_MAX_SEG,
 2867                     NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
 2868         SLIST_INIT(&softc->lun_list);
 2869         return (0);
 2870 }
 2871 
 2872 static int
 2873 ctl_be_block_shutdown(void)
 2874 {
 2875         struct ctl_be_block_softc *softc = &backend_block_softc;
 2876         struct ctl_be_block_lun *lun;
 2877 
 2878         mtx_lock(&softc->lock);
 2879         while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) {
 2880                 SLIST_REMOVE_HEAD(&softc->lun_list, links);
 2881                 softc->num_luns--;
 2882                 /*
 2883                  * Drop our lock here.  Since ctl_remove_lun() can call
 2884                  * back into us, this could potentially lead to a recursive
 2885                  * lock of the same mutex, which would cause a hang.
 2886                  */
 2887                 mtx_unlock(&softc->lock);
 2888                 ctl_remove_lun(&lun->cbe_lun);
 2889                 mtx_lock(&softc->lock);
 2890         }
 2891         mtx_unlock(&softc->lock);
 2892         uma_zdestroy(softc->bufmin_zone);
 2893         if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG)
 2894                 uma_zdestroy(softc->bufmax_zone);
 2895         uma_zdestroy(softc->beio_zone);
 2896         mtx_destroy(&softc->lock);
 2897         sx_destroy(&softc->modify_lock);
 2898         return (0);
 2899 }

Cache object: 8ba7171ae0f358fa6b9d0c623923955c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.