The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/xen/blkfront/blkfront.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * XenBSD block device driver
    3  *
    4  * Copyright (c) 2010-2013 Spectra Logic Corporation
    5  * Copyright (c) 2009 Scott Long, Yahoo!
    6  * Copyright (c) 2009 Frank Suchomel, Citrix
    7  * Copyright (c) 2009 Doug F. Rabson, Citrix
    8  * Copyright (c) 2005 Kip Macy
    9  * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   10  * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   11  *
   12  *
   13  * Permission is hereby granted, free of charge, to any person obtaining a copy
   14  * of this software and associated documentation files (the "Software"), to
   15  * deal in the Software without restriction, including without limitation the
   16  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
   17  * sell copies of the Software, and to permit persons to whom the Software is
   18  * furnished to do so, subject to the following conditions:
   19  *
   20  * The above copyright notice and this permission notice shall be included in
   21  * all copies or substantial portions of the Software.
   22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   25  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
   28  * DEALINGS IN THE SOFTWARE.
   29  */
   30 
   31 #include <sys/cdefs.h>
   32 __FBSDID("$FreeBSD$");
   33 
   34 #include <sys/param.h>
   35 #include <sys/systm.h>
   36 #include <sys/malloc.h>
   37 #include <sys/kernel.h>
   38 #include <vm/vm.h>
   39 #include <vm/pmap.h>
   40 
   41 #include <sys/bio.h>
   42 #include <sys/bus.h>
   43 #include <sys/conf.h>
   44 #include <sys/module.h>
   45 #include <sys/sysctl.h>
   46 
   47 #include <machine/bus.h>
   48 #include <sys/rman.h>
   49 #include <machine/resource.h>
   50 #include <machine/vmparam.h>
   51 
   52 #include <xen/xen-os.h>
   53 #include <xen/hypervisor.h>
   54 #include <xen/xen_intr.h>
   55 #include <xen/gnttab.h>
   56 #include <contrib/xen/grant_table.h>
   57 #include <contrib/xen/io/protocols.h>
   58 #include <xen/xenbus/xenbusvar.h>
   59 
   60 #include <machine/_inttypes.h>
   61 
   62 #include <geom/geom_disk.h>
   63 
   64 #include <dev/xen/blkfront/block.h>
   65 
   66 #include "xenbus_if.h"
   67 
   68 /*--------------------------- Forward Declarations ---------------------------*/
   69 static void xbd_closing(device_t);
   70 static void xbd_startio(struct xbd_softc *sc);
   71 
   72 /*---------------------------------- Macros ----------------------------------*/
   73 #if 0
   74 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
   75 #else
   76 #define DPRINTK(fmt, args...) 
   77 #endif
   78 
   79 #define XBD_SECTOR_SHFT         9
   80 
   81 /*---------------------------- Global Static Data ----------------------------*/
   82 static MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data");
   83 
   84 static int xbd_enable_indirect = 1;
   85 SYSCTL_NODE(_hw, OID_AUTO, xbd, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
   86     "xbd driver parameters");
   87 SYSCTL_INT(_hw_xbd, OID_AUTO, xbd_enable_indirect, CTLFLAG_RDTUN,
   88     &xbd_enable_indirect, 0, "Enable xbd indirect segments");
   89 
   90 /*---------------------------- Command Processing ----------------------------*/
   91 static void
   92 xbd_freeze(struct xbd_softc *sc, xbd_flag_t xbd_flag)
   93 {
   94         if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) != 0)
   95                 return;
   96 
   97         sc->xbd_flags |= xbd_flag;
   98         sc->xbd_qfrozen_cnt++;
   99 }
  100 
  101 static void
  102 xbd_thaw(struct xbd_softc *sc, xbd_flag_t xbd_flag)
  103 {
  104         if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) == 0)
  105                 return;
  106 
  107         if (sc->xbd_qfrozen_cnt == 0)
  108                 panic("%s: Thaw with flag 0x%x while not frozen.",
  109                     __func__, xbd_flag);
  110 
  111         sc->xbd_flags &= ~xbd_flag;
  112         sc->xbd_qfrozen_cnt--;
  113 }
  114 
  115 static void
  116 xbd_cm_freeze(struct xbd_softc *sc, struct xbd_command *cm, xbdc_flag_t cm_flag)
  117 {
  118         if ((cm->cm_flags & XBDCF_FROZEN) != 0)
  119                 return;
  120 
  121         cm->cm_flags |= XBDCF_FROZEN|cm_flag;
  122         xbd_freeze(sc, XBDF_NONE);
  123 }
  124 
  125 static void
  126 xbd_cm_thaw(struct xbd_softc *sc, struct xbd_command *cm)
  127 {
  128         if ((cm->cm_flags & XBDCF_FROZEN) == 0)
  129                 return;
  130 
  131         cm->cm_flags &= ~XBDCF_FROZEN;
  132         xbd_thaw(sc, XBDF_NONE);
  133 }
  134 
  135 static inline void 
  136 xbd_flush_requests(struct xbd_softc *sc)
  137 {
  138         int notify;
  139 
  140         RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->xbd_ring, notify);
  141 
  142         if (notify)
  143                 xen_intr_signal(sc->xen_intr_handle);
  144 }
  145 
  146 static void
  147 xbd_free_command(struct xbd_command *cm)
  148 {
  149 
  150         KASSERT((cm->cm_flags & XBDCF_Q_MASK) == XBD_Q_NONE,
  151             ("Freeing command that is still on queue %d.",
  152             cm->cm_flags & XBDCF_Q_MASK));
  153 
  154         cm->cm_flags = XBDCF_INITIALIZER;
  155         cm->cm_bp = NULL;
  156         cm->cm_complete = NULL;
  157         xbd_enqueue_cm(cm, XBD_Q_FREE);
  158         xbd_thaw(cm->cm_sc, XBDF_CM_SHORTAGE);
  159 }
  160 
  161 static void
  162 xbd_mksegarray(bus_dma_segment_t *segs, int nsegs,
  163     grant_ref_t * gref_head, int otherend_id, int readonly,
  164     grant_ref_t * sg_ref, struct blkif_request_segment *sg)
  165 {
  166         struct blkif_request_segment *last_block_sg = sg + nsegs;
  167         vm_paddr_t buffer_ma;
  168         uint64_t fsect, lsect;
  169         int ref;
  170 
  171         while (sg < last_block_sg) {
  172                 KASSERT(segs->ds_addr % (1 << XBD_SECTOR_SHFT) == 0,
  173                     ("XEN disk driver I/O must be sector aligned"));
  174                 KASSERT(segs->ds_len % (1 << XBD_SECTOR_SHFT) == 0,
  175                     ("XEN disk driver I/Os must be a multiple of "
  176                     "the sector length"));
  177                 buffer_ma = segs->ds_addr;
  178                 fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
  179                 lsect = fsect + (segs->ds_len  >> XBD_SECTOR_SHFT) - 1;
  180 
  181                 KASSERT(lsect <= 7, ("XEN disk driver data cannot "
  182                     "cross a page boundary"));
  183 
  184                 /* install a grant reference. */
  185                 ref = gnttab_claim_grant_reference(gref_head);
  186 
  187                 /*
  188                  * GNTTAB_LIST_END == 0xffffffff, but it is private
  189                  * to gnttab.c.
  190                  */
  191                 KASSERT(ref != ~0, ("grant_reference failed"));
  192 
  193                 gnttab_grant_foreign_access_ref(
  194                     ref,
  195                     otherend_id,
  196                     buffer_ma >> PAGE_SHIFT,
  197                     readonly);
  198 
  199                 *sg_ref = ref;
  200                 *sg = (struct blkif_request_segment) {
  201                         .gref       = ref,
  202                         .first_sect = fsect, 
  203                         .last_sect  = lsect
  204                 };
  205                 sg++;
  206                 sg_ref++;
  207                 segs++;
  208         }
  209 }
  210 
  211 static void
  212 xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
  213 {
  214         struct xbd_softc *sc;
  215         struct xbd_command *cm;
  216         int op;
  217 
  218         cm = arg;
  219         sc = cm->cm_sc;
  220 
  221         if (error) {
  222                 cm->cm_bp->bio_error = EIO;
  223                 biodone(cm->cm_bp);
  224                 xbd_free_command(cm);
  225                 return;
  226         }
  227 
  228         KASSERT(nsegs <= sc->xbd_max_request_segments,
  229             ("Too many segments in a blkfront I/O"));
  230 
  231         if (nsegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST) {
  232                 blkif_request_t *ring_req;
  233 
  234                 /* Fill out a blkif_request_t structure. */
  235                 ring_req = (blkif_request_t *)
  236                     RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt);
  237                 sc->xbd_ring.req_prod_pvt++;
  238                 ring_req->id = cm->cm_id;
  239                 ring_req->operation = cm->cm_operation;
  240                 ring_req->sector_number = cm->cm_sector_number;
  241                 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk;
  242                 ring_req->nr_segments = nsegs;
  243                 cm->cm_nseg = nsegs;
  244                 xbd_mksegarray(segs, nsegs, &cm->cm_gref_head,
  245                     xenbus_get_otherend_id(sc->xbd_dev),
  246                     cm->cm_operation == BLKIF_OP_WRITE,
  247                     cm->cm_sg_refs, ring_req->seg);
  248         } else {
  249                 blkif_request_indirect_t *ring_req;
  250 
  251                 /* Fill out a blkif_request_indirect_t structure. */
  252                 ring_req = (blkif_request_indirect_t *)
  253                     RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt);
  254                 sc->xbd_ring.req_prod_pvt++;
  255                 ring_req->id = cm->cm_id;
  256                 ring_req->operation = BLKIF_OP_INDIRECT;
  257                 ring_req->indirect_op = cm->cm_operation;
  258                 ring_req->sector_number = cm->cm_sector_number;
  259                 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk;
  260                 ring_req->nr_segments = nsegs;
  261                 cm->cm_nseg = nsegs;
  262                 xbd_mksegarray(segs, nsegs, &cm->cm_gref_head,
  263                     xenbus_get_otherend_id(sc->xbd_dev),
  264                     cm->cm_operation == BLKIF_OP_WRITE,
  265                     cm->cm_sg_refs, cm->cm_indirectionpages);
  266                 memcpy(ring_req->indirect_grefs, &cm->cm_indirectionrefs,
  267                     sizeof(grant_ref_t) * sc->xbd_max_request_indirectpages);
  268         }
  269 
  270         if (cm->cm_operation == BLKIF_OP_READ)
  271                 op = BUS_DMASYNC_PREREAD;
  272         else if (cm->cm_operation == BLKIF_OP_WRITE)
  273                 op = BUS_DMASYNC_PREWRITE;
  274         else
  275                 op = 0;
  276         bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op);
  277 
  278         gnttab_free_grant_references(cm->cm_gref_head);
  279 
  280         xbd_enqueue_cm(cm, XBD_Q_BUSY);
  281 
  282         /*
  283          * If bus dma had to asynchronously call us back to dispatch
  284          * this command, we are no longer executing in the context of 
  285          * xbd_startio().  Thus we cannot rely on xbd_startio()'s call to
  286          * xbd_flush_requests() to publish this command to the backend
  287          * along with any other commands that it could batch.
  288          */
  289         if ((cm->cm_flags & XBDCF_ASYNC_MAPPING) != 0)
  290                 xbd_flush_requests(sc);
  291 
  292         return;
  293 }
  294 
  295 static int
  296 xbd_queue_request(struct xbd_softc *sc, struct xbd_command *cm)
  297 {
  298         int error;
  299 
  300         if (cm->cm_bp != NULL)
  301                 error = bus_dmamap_load_bio(sc->xbd_io_dmat, cm->cm_map,
  302                     cm->cm_bp, xbd_queue_cb, cm, 0);
  303         else
  304                 error = bus_dmamap_load(sc->xbd_io_dmat, cm->cm_map,
  305                     cm->cm_data, cm->cm_datalen, xbd_queue_cb, cm, 0);
  306         if (error == EINPROGRESS) {
  307                 /*
  308                  * Maintain queuing order by freezing the queue.  The next
  309                  * command may not require as many resources as the command
  310                  * we just attempted to map, so we can't rely on bus dma
  311                  * blocking for it too.
  312                  */
  313                 xbd_cm_freeze(sc, cm, XBDCF_ASYNC_MAPPING);
  314                 return (0);
  315         }
  316 
  317         return (error);
  318 }
  319 
  320 static void
  321 xbd_restart_queue_callback(void *arg)
  322 {
  323         struct xbd_softc *sc = arg;
  324 
  325         mtx_lock(&sc->xbd_io_lock);
  326 
  327         xbd_thaw(sc, XBDF_GNT_SHORTAGE);
  328 
  329         xbd_startio(sc);
  330 
  331         mtx_unlock(&sc->xbd_io_lock);
  332 }
  333 
  334 static struct xbd_command *
  335 xbd_bio_command(struct xbd_softc *sc)
  336 {
  337         struct xbd_command *cm;
  338         struct bio *bp;
  339 
  340         if (__predict_false(sc->xbd_state != XBD_STATE_CONNECTED))
  341                 return (NULL);
  342 
  343         bp = xbd_dequeue_bio(sc);
  344         if (bp == NULL)
  345                 return (NULL);
  346 
  347         if ((cm = xbd_dequeue_cm(sc, XBD_Q_FREE)) == NULL) {
  348                 xbd_freeze(sc, XBDF_CM_SHORTAGE);
  349                 xbd_requeue_bio(sc, bp);
  350                 return (NULL);
  351         }
  352 
  353         if (gnttab_alloc_grant_references(sc->xbd_max_request_segments,
  354             &cm->cm_gref_head) != 0) {
  355                 gnttab_request_free_callback(&sc->xbd_callback,
  356                     xbd_restart_queue_callback, sc,
  357                     sc->xbd_max_request_segments);
  358                 xbd_freeze(sc, XBDF_GNT_SHORTAGE);
  359                 xbd_requeue_bio(sc, bp);
  360                 xbd_enqueue_cm(cm, XBD_Q_FREE);
  361                 return (NULL);
  362         }
  363 
  364         cm->cm_bp = bp;
  365         cm->cm_sector_number = (blkif_sector_t)bp->bio_pblkno;
  366 
  367         switch (bp->bio_cmd) {
  368         case BIO_READ:
  369                 cm->cm_operation = BLKIF_OP_READ;
  370                 break;
  371         case BIO_WRITE:
  372                 cm->cm_operation = BLKIF_OP_WRITE;
  373                 if ((bp->bio_flags & BIO_ORDERED) != 0) {
  374                         if ((sc->xbd_flags & XBDF_BARRIER) != 0) {
  375                                 cm->cm_operation = BLKIF_OP_WRITE_BARRIER;
  376                         } else {
  377                                 /*
  378                                  * Single step this command.
  379                                  */
  380                                 cm->cm_flags |= XBDCF_Q_FREEZE;
  381                                 if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
  382                                         /*
  383                                          * Wait for in-flight requests to
  384                                          * finish.
  385                                          */
  386                                         xbd_freeze(sc, XBDF_WAIT_IDLE);
  387                                         xbd_requeue_cm(cm, XBD_Q_READY);
  388                                         return (NULL);
  389                                 }
  390                         }
  391                 }
  392                 break;
  393         case BIO_FLUSH:
  394                 if ((sc->xbd_flags & XBDF_FLUSH) != 0)
  395                         cm->cm_operation = BLKIF_OP_FLUSH_DISKCACHE;
  396                 else if ((sc->xbd_flags & XBDF_BARRIER) != 0)
  397                         cm->cm_operation = BLKIF_OP_WRITE_BARRIER;
  398                 else
  399                         panic("flush request, but no flush support available");
  400                 break;
  401         default:
  402                 biofinish(bp, NULL, EOPNOTSUPP);
  403                 xbd_enqueue_cm(cm, XBD_Q_FREE);
  404                 return (NULL);
  405         }
  406 
  407         return (cm);
  408 }
  409 
  410 /*
  411  * Dequeue buffers and place them in the shared communication ring.
  412  * Return when no more requests can be accepted or all buffers have 
  413  * been queued.
  414  *
  415  * Signal XEN once the ring has been filled out.
  416  */
  417 static void
  418 xbd_startio(struct xbd_softc *sc)
  419 {
  420         struct xbd_command *cm;
  421         int error, queued = 0;
  422 
  423         mtx_assert(&sc->xbd_io_lock, MA_OWNED);
  424 
  425         if (sc->xbd_state != XBD_STATE_CONNECTED)
  426                 return;
  427 
  428         while (!RING_FULL(&sc->xbd_ring)) {
  429                 if (sc->xbd_qfrozen_cnt != 0)
  430                         break;
  431 
  432                 cm = xbd_dequeue_cm(sc, XBD_Q_READY);
  433 
  434                 if (cm == NULL)
  435                     cm = xbd_bio_command(sc);
  436 
  437                 if (cm == NULL)
  438                         break;
  439 
  440                 if ((cm->cm_flags & XBDCF_Q_FREEZE) != 0) {
  441                         /*
  442                          * Single step command.  Future work is
  443                          * held off until this command completes.
  444                          */
  445                         xbd_cm_freeze(sc, cm, XBDCF_Q_FREEZE);
  446                 }
  447 
  448                 if ((error = xbd_queue_request(sc, cm)) != 0) {
  449                         printf("xbd_queue_request returned %d\n", error);
  450                         break;
  451                 }
  452                 queued++;
  453         }
  454 
  455         if (queued != 0) 
  456                 xbd_flush_requests(sc);
  457 }
  458 
  459 static void
  460 xbd_bio_complete(struct xbd_softc *sc, struct xbd_command *cm)
  461 {
  462         struct bio *bp;
  463 
  464         bp = cm->cm_bp;
  465 
  466         if (__predict_false(cm->cm_status != BLKIF_RSP_OKAY)) {
  467                 disk_err(bp, "disk error" , -1, 0);
  468                 printf(" status: %x\n", cm->cm_status);
  469                 bp->bio_flags |= BIO_ERROR;
  470         }
  471 
  472         if (bp->bio_flags & BIO_ERROR)
  473                 bp->bio_error = EIO;
  474         else
  475                 bp->bio_resid = 0;
  476 
  477         xbd_free_command(cm);
  478         biodone(bp);
  479 }
  480 
  481 static void
  482 xbd_int(void *xsc)
  483 {
  484         struct xbd_softc *sc = xsc;
  485         struct xbd_command *cm;
  486         blkif_response_t *bret;
  487         RING_IDX i, rp;
  488         int op;
  489 
  490         mtx_lock(&sc->xbd_io_lock);
  491 
  492         if (__predict_false(sc->xbd_state == XBD_STATE_DISCONNECTED)) {
  493                 mtx_unlock(&sc->xbd_io_lock);
  494                 return;
  495         }
  496 
  497  again:
  498         rp = sc->xbd_ring.sring->rsp_prod;
  499         rmb(); /* Ensure we see queued responses up to 'rp'. */
  500 
  501         for (i = sc->xbd_ring.rsp_cons; i != rp;) {
  502                 bret = RING_GET_RESPONSE(&sc->xbd_ring, i);
  503                 cm   = &sc->xbd_shadow[bret->id];
  504 
  505                 xbd_remove_cm(cm, XBD_Q_BUSY);
  506                 gnttab_end_foreign_access_references(cm->cm_nseg,
  507                     cm->cm_sg_refs);
  508                 i++;
  509 
  510                 if (cm->cm_operation == BLKIF_OP_READ)
  511                         op = BUS_DMASYNC_POSTREAD;
  512                 else if (cm->cm_operation == BLKIF_OP_WRITE ||
  513                     cm->cm_operation == BLKIF_OP_WRITE_BARRIER)
  514                         op = BUS_DMASYNC_POSTWRITE;
  515                 else
  516                         op = 0;
  517                 bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op);
  518                 bus_dmamap_unload(sc->xbd_io_dmat, cm->cm_map);
  519 
  520                 /*
  521                  * Release any hold this command has on future command
  522                  * dispatch. 
  523                  */
  524                 xbd_cm_thaw(sc, cm);
  525 
  526                 /*
  527                  * Directly call the i/o complete routine to save an
  528                  * an indirection in the common case.
  529                  */
  530                 cm->cm_status = bret->status;
  531                 if (cm->cm_bp)
  532                         xbd_bio_complete(sc, cm);
  533                 else if (cm->cm_complete != NULL)
  534                         cm->cm_complete(cm);
  535                 else
  536                         xbd_free_command(cm);
  537         }
  538 
  539         sc->xbd_ring.rsp_cons = i;
  540 
  541         if (i != sc->xbd_ring.req_prod_pvt) {
  542                 int more_to_do;
  543                 RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, more_to_do);
  544                 if (more_to_do)
  545                         goto again;
  546         } else {
  547                 sc->xbd_ring.sring->rsp_event = i + 1;
  548         }
  549 
  550         if (xbd_queue_length(sc, XBD_Q_BUSY) == 0)
  551                 xbd_thaw(sc, XBDF_WAIT_IDLE);
  552 
  553         xbd_startio(sc);
  554 
  555         if (__predict_false(sc->xbd_state == XBD_STATE_SUSPENDED))
  556                 wakeup(&sc->xbd_cm_q[XBD_Q_BUSY]);
  557 
  558         mtx_unlock(&sc->xbd_io_lock);
  559 }
  560 
  561 /*------------------------------- Dump Support -------------------------------*/
  562 /**
  563  * Quiesce the disk writes for a dump file before allowing the next buffer.
  564  */
  565 static void
  566 xbd_quiesce(struct xbd_softc *sc)
  567 {
  568         int mtd;
  569 
  570         // While there are outstanding requests
  571         while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
  572                 RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, mtd);
  573                 if (mtd) {
  574                         /* Received request completions, update queue. */
  575                         xbd_int(sc);
  576                 }
  577                 if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
  578                         /*
  579                          * Still pending requests, wait for the disk i/o
  580                          * to complete.
  581                          */
  582                         HYPERVISOR_yield();
  583                 }
  584         }
  585 }
  586 
  587 /* Kernel dump function for a paravirtualized disk device */
  588 static void
  589 xbd_dump_complete(struct xbd_command *cm)
  590 {
  591 
  592         xbd_enqueue_cm(cm, XBD_Q_COMPLETE);
  593 }
  594 
  595 static int
  596 xbd_dump(void *arg, void *virtual, off_t offset, size_t length)
  597 {
  598         struct disk *dp = arg;
  599         struct xbd_softc *sc = dp->d_drv1;
  600         struct xbd_command *cm;
  601         size_t chunk;
  602         int rc = 0;
  603 
  604         if (length == 0)
  605                 return (0);
  606 
  607         xbd_quiesce(sc);        /* All quiet on the western front. */
  608 
  609         /*
  610          * If this lock is held, then this module is failing, and a
  611          * successful kernel dump is highly unlikely anyway.
  612          */
  613         mtx_lock(&sc->xbd_io_lock);
  614 
  615         /* Split the 64KB block as needed */
  616         while (length > 0) {
  617                 cm = xbd_dequeue_cm(sc, XBD_Q_FREE);
  618                 if (cm == NULL) {
  619                         mtx_unlock(&sc->xbd_io_lock);
  620                         device_printf(sc->xbd_dev, "dump: no more commands?\n");
  621                         return (EBUSY);
  622                 }
  623 
  624                 if (gnttab_alloc_grant_references(sc->xbd_max_request_segments,
  625                     &cm->cm_gref_head) != 0) {
  626                         xbd_free_command(cm);
  627                         mtx_unlock(&sc->xbd_io_lock);
  628                         device_printf(sc->xbd_dev, "no more grant allocs?\n");
  629                         return (EBUSY);
  630                 }
  631 
  632                 chunk = length > sc->xbd_max_request_size ?
  633                     sc->xbd_max_request_size : length;
  634                 cm->cm_data = virtual;
  635                 cm->cm_datalen = chunk;
  636                 cm->cm_operation = BLKIF_OP_WRITE;
  637                 cm->cm_sector_number = offset / dp->d_sectorsize;
  638                 cm->cm_complete = xbd_dump_complete;
  639 
  640                 xbd_enqueue_cm(cm, XBD_Q_READY);
  641 
  642                 length -= chunk;
  643                 offset += chunk;
  644                 virtual = (char *) virtual + chunk;
  645         }
  646 
  647         /* Tell DOM0 to do the I/O */
  648         xbd_startio(sc);
  649         mtx_unlock(&sc->xbd_io_lock);
  650 
  651         /* Poll for the completion. */
  652         xbd_quiesce(sc);        /* All quite on the eastern front */
  653 
  654         /* If there were any errors, bail out... */
  655         while ((cm = xbd_dequeue_cm(sc, XBD_Q_COMPLETE)) != NULL) {
  656                 if (cm->cm_status != BLKIF_RSP_OKAY) {
  657                         device_printf(sc->xbd_dev,
  658                             "Dump I/O failed at sector %jd\n",
  659                             cm->cm_sector_number);
  660                         rc = EIO;
  661                 }
  662                 xbd_free_command(cm);
  663         }
  664 
  665         return (rc);
  666 }
  667 
  668 /*----------------------------- Disk Entrypoints -----------------------------*/
  669 static int
  670 xbd_open(struct disk *dp)
  671 {
  672         struct xbd_softc *sc = dp->d_drv1;
  673 
  674         if (sc == NULL) {
  675                 printf("xbd%d: not found", dp->d_unit);
  676                 return (ENXIO);
  677         }
  678 
  679         sc->xbd_flags |= XBDF_OPEN;
  680         sc->xbd_users++;
  681         return (0);
  682 }
  683 
  684 static int
  685 xbd_close(struct disk *dp)
  686 {
  687         struct xbd_softc *sc = dp->d_drv1;
  688 
  689         if (sc == NULL)
  690                 return (ENXIO);
  691         sc->xbd_flags &= ~XBDF_OPEN;
  692         if (--(sc->xbd_users) == 0) {
  693                 /*
  694                  * Check whether we have been instructed to close.  We will
  695                  * have ignored this request initially, as the device was
  696                  * still mounted.
  697                  */
  698                 if (xenbus_get_otherend_state(sc->xbd_dev) ==
  699                     XenbusStateClosing)
  700                         xbd_closing(sc->xbd_dev);
  701         }
  702         return (0);
  703 }
  704 
  705 static int
  706 xbd_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
  707 {
  708         struct xbd_softc *sc = dp->d_drv1;
  709 
  710         if (sc == NULL)
  711                 return (ENXIO);
  712 
  713         return (ENOTTY);
  714 }
  715 
  716 /*
  717  * Read/write routine for a buffer.  Finds the proper unit, place it on
  718  * the sortq and kick the controller.
  719  */
  720 static void
  721 xbd_strategy(struct bio *bp)
  722 {
  723         struct xbd_softc *sc = bp->bio_disk->d_drv1;
  724 
  725         /* bogus disk? */
  726         if (sc == NULL) {
  727                 bp->bio_error = EINVAL;
  728                 bp->bio_flags |= BIO_ERROR;
  729                 bp->bio_resid = bp->bio_bcount;
  730                 biodone(bp);
  731                 return;
  732         }
  733 
  734         /*
  735          * Place it in the queue of disk activities for this disk
  736          */
  737         mtx_lock(&sc->xbd_io_lock);
  738 
  739         xbd_enqueue_bio(sc, bp);
  740         xbd_startio(sc);
  741 
  742         mtx_unlock(&sc->xbd_io_lock);
  743         return;
  744 }
  745 
  746 /*------------------------------ Ring Management -----------------------------*/
  747 static int 
  748 xbd_alloc_ring(struct xbd_softc *sc)
  749 {
  750         blkif_sring_t *sring;
  751         uintptr_t sring_page_addr;
  752         int error;
  753         int i;
  754 
  755         sring = malloc(sc->xbd_ring_pages * PAGE_SIZE, M_XENBLOCKFRONT,
  756             M_NOWAIT|M_ZERO);
  757         if (sring == NULL) {
  758                 xenbus_dev_fatal(sc->xbd_dev, ENOMEM, "allocating shared ring");
  759                 return (ENOMEM);
  760         }
  761         SHARED_RING_INIT(sring);
  762         FRONT_RING_INIT(&sc->xbd_ring, sring, sc->xbd_ring_pages * PAGE_SIZE);
  763 
  764         for (i = 0, sring_page_addr = (uintptr_t)sring;
  765              i < sc->xbd_ring_pages;
  766              i++, sring_page_addr += PAGE_SIZE) {
  767                 error = xenbus_grant_ring(sc->xbd_dev,
  768                     (vtophys(sring_page_addr) >> PAGE_SHIFT),
  769                     &sc->xbd_ring_ref[i]);
  770                 if (error) {
  771                         xenbus_dev_fatal(sc->xbd_dev, error,
  772                             "granting ring_ref(%d)", i);
  773                         return (error);
  774                 }
  775         }
  776         if (sc->xbd_ring_pages == 1) {
  777                 error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev),
  778                     "ring-ref", "%u", sc->xbd_ring_ref[0]);
  779                 if (error) {
  780                         xenbus_dev_fatal(sc->xbd_dev, error,
  781                             "writing %s/ring-ref",
  782                             xenbus_get_node(sc->xbd_dev));
  783                         return (error);
  784                 }
  785         } else {
  786                 for (i = 0; i < sc->xbd_ring_pages; i++) {
  787                         char ring_ref_name[]= "ring_refXX";
  788 
  789                         snprintf(ring_ref_name, sizeof(ring_ref_name),
  790                             "ring-ref%u", i);
  791                         error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev),
  792                              ring_ref_name, "%u", sc->xbd_ring_ref[i]);
  793                         if (error) {
  794                                 xenbus_dev_fatal(sc->xbd_dev, error,
  795                                     "writing %s/%s",
  796                                     xenbus_get_node(sc->xbd_dev),
  797                                     ring_ref_name);
  798                                 return (error);
  799                         }
  800                 }
  801         }
  802 
  803         error = xen_intr_alloc_and_bind_local_port(sc->xbd_dev,
  804             xenbus_get_otherend_id(sc->xbd_dev), NULL, xbd_int, sc,
  805             INTR_TYPE_BIO | INTR_MPSAFE, &sc->xen_intr_handle);
  806         if (error) {
  807                 xenbus_dev_fatal(sc->xbd_dev, error,
  808                     "xen_intr_alloc_and_bind_local_port failed");
  809                 return (error);
  810         }
  811 
  812         return (0);
  813 }
  814 
  815 static void
  816 xbd_free_ring(struct xbd_softc *sc)
  817 {
  818         int i;
  819 
  820         if (sc->xbd_ring.sring == NULL)
  821                 return;
  822 
  823         for (i = 0; i < sc->xbd_ring_pages; i++) {
  824                 if (sc->xbd_ring_ref[i] != GRANT_REF_INVALID) {
  825                         gnttab_end_foreign_access_ref(sc->xbd_ring_ref[i]);
  826                         sc->xbd_ring_ref[i] = GRANT_REF_INVALID;
  827                 }
  828         }
  829         free(sc->xbd_ring.sring, M_XENBLOCKFRONT);
  830         sc->xbd_ring.sring = NULL;
  831 }
  832 
  833 /*-------------------------- Initialization/Teardown -------------------------*/
  834 static int
  835 xbd_feature_string(struct xbd_softc *sc, char *features, size_t len)
  836 {
  837         struct sbuf sb;
  838         int feature_cnt;
  839 
  840         sbuf_new(&sb, features, len, SBUF_FIXEDLEN);
  841 
  842         feature_cnt = 0;
  843         if ((sc->xbd_flags & XBDF_FLUSH) != 0) {
  844                 sbuf_printf(&sb, "flush");
  845                 feature_cnt++;
  846         }
  847 
  848         if ((sc->xbd_flags & XBDF_BARRIER) != 0) {
  849                 if (feature_cnt != 0)
  850                         sbuf_printf(&sb, ", ");
  851                 sbuf_printf(&sb, "write_barrier");
  852                 feature_cnt++;
  853         }
  854 
  855         if ((sc->xbd_flags & XBDF_DISCARD) != 0) {
  856                 if (feature_cnt != 0)
  857                         sbuf_printf(&sb, ", ");
  858                 sbuf_printf(&sb, "discard");
  859                 feature_cnt++;
  860         }
  861 
  862         if ((sc->xbd_flags & XBDF_PERSISTENT) != 0) {
  863                 if (feature_cnt != 0)
  864                         sbuf_printf(&sb, ", ");
  865                 sbuf_printf(&sb, "persistent_grants");
  866                 feature_cnt++;
  867         }
  868 
  869         (void) sbuf_finish(&sb);
  870         return (sbuf_len(&sb));
  871 }
  872 
  873 static int
  874 xbd_sysctl_features(SYSCTL_HANDLER_ARGS)
  875 {
  876         char features[80];
  877         struct xbd_softc *sc = arg1;
  878         int error;
  879         int len;
  880 
  881         error = sysctl_wire_old_buffer(req, 0);
  882         if (error != 0)
  883                 return (error);
  884 
  885         len = xbd_feature_string(sc, features, sizeof(features));
  886 
  887         /* len is -1 on error, which will make the SYSCTL_OUT a no-op. */
  888         return (SYSCTL_OUT(req, features, len + 1/*NUL*/));
  889 }
  890 
  891 static void
  892 xbd_setup_sysctl(struct xbd_softc *xbd)
  893 {
  894         struct sysctl_ctx_list *sysctl_ctx = NULL;
  895         struct sysctl_oid *sysctl_tree = NULL;
  896         struct sysctl_oid_list *children;
  897 
  898         sysctl_ctx = device_get_sysctl_ctx(xbd->xbd_dev);
  899         if (sysctl_ctx == NULL)
  900                 return;
  901 
  902         sysctl_tree = device_get_sysctl_tree(xbd->xbd_dev);
  903         if (sysctl_tree == NULL)
  904                 return;
  905 
  906         children = SYSCTL_CHILDREN(sysctl_tree);
  907         SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
  908             "max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1,
  909             "maximum outstanding requests (negotiated)");
  910 
  911         SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
  912             "max_request_segments", CTLFLAG_RD,
  913             &xbd->xbd_max_request_segments, 0,
  914             "maximum number of pages per requests (negotiated)");
  915 
  916         SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
  917             "max_request_size", CTLFLAG_RD, &xbd->xbd_max_request_size, 0,
  918             "maximum size in bytes of a request (negotiated)");
  919 
  920         SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
  921             "ring_pages", CTLFLAG_RD, &xbd->xbd_ring_pages, 0,
  922             "communication channel pages (negotiated)");
  923 
  924         SYSCTL_ADD_PROC(sysctl_ctx, children, OID_AUTO,
  925             "features", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, xbd,
  926             0, xbd_sysctl_features, "A", "protocol features (negotiated)");
  927 }
  928 
  929 /*
  930  * Translate Linux major/minor to an appropriate name and unit
  931  * number. For HVM guests, this allows us to use the same drive names
  932  * with blkfront as the emulated drives, easing transition slightly.
  933  */
  934 static void
  935 xbd_vdevice_to_unit(uint32_t vdevice, int *unit, const char **name)
  936 {
  937         static struct vdev_info {
  938                 int major;
  939                 int shift;
  940                 int base;
  941                 const char *name;
  942         } info[] = {
  943                 {3,     6,      0,      "ada"}, /* ide0 */
  944                 {22,    6,      2,      "ada"}, /* ide1 */
  945                 {33,    6,      4,      "ada"}, /* ide2 */
  946                 {34,    6,      6,      "ada"}, /* ide3 */
  947                 {56,    6,      8,      "ada"}, /* ide4 */
  948                 {57,    6,      10,     "ada"}, /* ide5 */
  949                 {88,    6,      12,     "ada"}, /* ide6 */
  950                 {89,    6,      14,     "ada"}, /* ide7 */
  951                 {90,    6,      16,     "ada"}, /* ide8 */
  952                 {91,    6,      18,     "ada"}, /* ide9 */
  953 
  954                 {8,     4,      0,      "da"},  /* scsi disk0 */
  955                 {65,    4,      16,     "da"},  /* scsi disk1 */
  956                 {66,    4,      32,     "da"},  /* scsi disk2 */
  957                 {67,    4,      48,     "da"},  /* scsi disk3 */
  958                 {68,    4,      64,     "da"},  /* scsi disk4 */
  959                 {69,    4,      80,     "da"},  /* scsi disk5 */
  960                 {70,    4,      96,     "da"},  /* scsi disk6 */
  961                 {71,    4,      112,    "da"},  /* scsi disk7 */
  962                 {128,   4,      128,    "da"},  /* scsi disk8 */
  963                 {129,   4,      144,    "da"},  /* scsi disk9 */
  964                 {130,   4,      160,    "da"},  /* scsi disk10 */
  965                 {131,   4,      176,    "da"},  /* scsi disk11 */
  966                 {132,   4,      192,    "da"},  /* scsi disk12 */
  967                 {133,   4,      208,    "da"},  /* scsi disk13 */
  968                 {134,   4,      224,    "da"},  /* scsi disk14 */
  969                 {135,   4,      240,    "da"},  /* scsi disk15 */
  970 
  971                 {202,   4,      0,      "xbd"}, /* xbd */
  972 
  973                 {0,     0,      0,      NULL},
  974         };
  975         int major = vdevice >> 8;
  976         int minor = vdevice & 0xff;
  977         int i;
  978 
  979         if (vdevice & (1 << 28)) {
  980                 *unit = (vdevice & ((1 << 28) - 1)) >> 8;
  981                 *name = "xbd";
  982                 return;
  983         }
  984 
  985         for (i = 0; info[i].major; i++) {
  986                 if (info[i].major == major) {
  987                         *unit = info[i].base + (minor >> info[i].shift);
  988                         *name = info[i].name;
  989                         return;
  990                 }
  991         }
  992 
  993         *unit = minor >> 4;
  994         *name = "xbd";
  995 }
  996 
  997 int
  998 xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors,
  999     int vdevice, uint16_t vdisk_info, unsigned long sector_size,
 1000     unsigned long phys_sector_size)
 1001 {
 1002         char features[80];
 1003         int unit, error = 0;
 1004         const char *name;
 1005 
 1006         xbd_vdevice_to_unit(vdevice, &unit, &name);
 1007 
 1008         sc->xbd_unit = unit;
 1009 
 1010         if (strcmp(name, "xbd") != 0)
 1011                 device_printf(sc->xbd_dev, "attaching as %s%d\n", name, unit);
 1012 
 1013         if (xbd_feature_string(sc, features, sizeof(features)) > 0) {
 1014                 device_printf(sc->xbd_dev, "features: %s\n",
 1015                     features);
 1016         }
 1017 
 1018         sc->xbd_disk = disk_alloc();
 1019         sc->xbd_disk->d_unit = sc->xbd_unit;
 1020         sc->xbd_disk->d_open = xbd_open;
 1021         sc->xbd_disk->d_close = xbd_close;
 1022         sc->xbd_disk->d_ioctl = xbd_ioctl;
 1023         sc->xbd_disk->d_strategy = xbd_strategy;
 1024         sc->xbd_disk->d_dump = xbd_dump;
 1025         sc->xbd_disk->d_name = name;
 1026         sc->xbd_disk->d_drv1 = sc;
 1027         sc->xbd_disk->d_sectorsize = sector_size;
 1028         sc->xbd_disk->d_stripesize = phys_sector_size;
 1029         sc->xbd_disk->d_stripeoffset = 0;
 1030 
 1031         sc->xbd_disk->d_mediasize = sectors * sector_size;
 1032         sc->xbd_disk->d_maxsize = sc->xbd_max_request_size;
 1033         sc->xbd_disk->d_flags = DISKFLAG_UNMAPPED_BIO;
 1034         if ((sc->xbd_flags & (XBDF_FLUSH|XBDF_BARRIER)) != 0) {
 1035                 sc->xbd_disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
 1036                 device_printf(sc->xbd_dev,
 1037                     "synchronize cache commands enabled.\n");
 1038         }
 1039         disk_create(sc->xbd_disk, DISK_VERSION);
 1040 
 1041         return error;
 1042 }
 1043 
 1044 static void 
 1045 xbd_free(struct xbd_softc *sc)
 1046 {
 1047         int i;
 1048 
 1049         /* Prevent new requests being issued until we fix things up. */
 1050         mtx_lock(&sc->xbd_io_lock);
 1051         sc->xbd_state = XBD_STATE_DISCONNECTED; 
 1052         mtx_unlock(&sc->xbd_io_lock);
 1053 
 1054         /* Free resources associated with old device channel. */
 1055         xbd_free_ring(sc);
 1056         if (sc->xbd_shadow) {
 1057                 for (i = 0; i < sc->xbd_max_requests; i++) {
 1058                         struct xbd_command *cm;
 1059 
 1060                         cm = &sc->xbd_shadow[i];
 1061                         if (cm->cm_sg_refs != NULL) {
 1062                                 free(cm->cm_sg_refs, M_XENBLOCKFRONT);
 1063                                 cm->cm_sg_refs = NULL;
 1064                         }
 1065 
 1066                         if (cm->cm_indirectionpages != NULL) {
 1067                                 gnttab_end_foreign_access_references(
 1068                                     sc->xbd_max_request_indirectpages,
 1069                                     &cm->cm_indirectionrefs[0]);
 1070                                 contigfree(cm->cm_indirectionpages, PAGE_SIZE *
 1071                                     sc->xbd_max_request_indirectpages,
 1072                                     M_XENBLOCKFRONT);
 1073                                 cm->cm_indirectionpages = NULL;
 1074                         }
 1075 
 1076                         bus_dmamap_destroy(sc->xbd_io_dmat, cm->cm_map);
 1077                 }
 1078                 free(sc->xbd_shadow, M_XENBLOCKFRONT);
 1079                 sc->xbd_shadow = NULL;
 1080 
 1081                 bus_dma_tag_destroy(sc->xbd_io_dmat);
 1082                 
 1083                 xbd_initq_cm(sc, XBD_Q_FREE);
 1084                 xbd_initq_cm(sc, XBD_Q_READY);
 1085                 xbd_initq_cm(sc, XBD_Q_COMPLETE);
 1086         }
 1087                 
 1088         xen_intr_unbind(&sc->xen_intr_handle);
 1089 
 1090 }
 1091 
 1092 /*--------------------------- State Change Handlers --------------------------*/
 1093 static void
 1094 xbd_initialize(struct xbd_softc *sc)
 1095 {
 1096         const char *otherend_path;
 1097         const char *node_path;
 1098         uint32_t max_ring_page_order;
 1099         int error;
 1100 
 1101         if (xenbus_get_state(sc->xbd_dev) != XenbusStateInitialising) {
 1102                 /* Initialization has already been performed. */
 1103                 return;
 1104         }
 1105 
 1106         /*
 1107          * Protocol defaults valid even if negotiation for a
 1108          * setting fails.
 1109          */
 1110         max_ring_page_order = 0;
 1111         sc->xbd_ring_pages = 1;
 1112 
 1113         /*
 1114          * Protocol negotiation.
 1115          *
 1116          * \note xs_gather() returns on the first encountered error, so
 1117          *       we must use independent calls in order to guarantee
 1118          *       we don't miss information in a sparsly populated back-end
 1119          *       tree.
 1120          *
 1121          * \note xs_scanf() does not update variables for unmatched
 1122          *       fields.
 1123          */
 1124         otherend_path = xenbus_get_otherend_path(sc->xbd_dev);
 1125         node_path = xenbus_get_node(sc->xbd_dev);
 1126 
 1127         /* Support both backend schemes for relaying ring page limits. */
 1128         (void)xs_scanf(XST_NIL, otherend_path,
 1129             "max-ring-page-order", NULL, "%" PRIu32,
 1130             &max_ring_page_order);
 1131         sc->xbd_ring_pages = 1 << max_ring_page_order;
 1132         (void)xs_scanf(XST_NIL, otherend_path,
 1133             "max-ring-pages", NULL, "%" PRIu32,
 1134             &sc->xbd_ring_pages);
 1135         if (sc->xbd_ring_pages < 1)
 1136                 sc->xbd_ring_pages = 1;
 1137 
 1138         if (sc->xbd_ring_pages > XBD_MAX_RING_PAGES) {
 1139                 device_printf(sc->xbd_dev,
 1140                     "Back-end specified ring-pages of %u "
 1141                     "limited to front-end limit of %u.\n",
 1142                     sc->xbd_ring_pages, XBD_MAX_RING_PAGES);
 1143                 sc->xbd_ring_pages = XBD_MAX_RING_PAGES;
 1144         }
 1145 
 1146         if (powerof2(sc->xbd_ring_pages) == 0) {
 1147                 uint32_t new_page_limit;
 1148 
 1149                 new_page_limit = 0x01 << (fls(sc->xbd_ring_pages) - 1);
 1150                 device_printf(sc->xbd_dev,
 1151                     "Back-end specified ring-pages of %u "
 1152                     "is not a power of 2. Limited to %u.\n",
 1153                     sc->xbd_ring_pages, new_page_limit);
 1154                 sc->xbd_ring_pages = new_page_limit;
 1155         }
 1156 
 1157         sc->xbd_max_requests =
 1158             BLKIF_MAX_RING_REQUESTS(sc->xbd_ring_pages * PAGE_SIZE);
 1159         if (sc->xbd_max_requests > XBD_MAX_REQUESTS) {
 1160                 device_printf(sc->xbd_dev,
 1161                     "Back-end specified max_requests of %u "
 1162                     "limited to front-end limit of %zu.\n",
 1163                     sc->xbd_max_requests, XBD_MAX_REQUESTS);
 1164                 sc->xbd_max_requests = XBD_MAX_REQUESTS;
 1165         }
 1166 
 1167         if (xbd_alloc_ring(sc) != 0)
 1168                 return;
 1169 
 1170         /* Support both backend schemes for relaying ring page limits. */
 1171         if (sc->xbd_ring_pages > 1) {
 1172                 error = xs_printf(XST_NIL, node_path,
 1173                     "num-ring-pages","%u",
 1174                     sc->xbd_ring_pages);
 1175                 if (error) {
 1176                         xenbus_dev_fatal(sc->xbd_dev, error,
 1177                             "writing %s/num-ring-pages",
 1178                             node_path);
 1179                         return;
 1180                 }
 1181 
 1182                 error = xs_printf(XST_NIL, node_path,
 1183                     "ring-page-order", "%u",
 1184                     fls(sc->xbd_ring_pages) - 1);
 1185                 if (error) {
 1186                         xenbus_dev_fatal(sc->xbd_dev, error,
 1187                             "writing %s/ring-page-order",
 1188                             node_path);
 1189                         return;
 1190                 }
 1191         }
 1192 
 1193         error = xs_printf(XST_NIL, node_path, "event-channel",
 1194             "%u", xen_intr_port(sc->xen_intr_handle));
 1195         if (error) {
 1196                 xenbus_dev_fatal(sc->xbd_dev, error,
 1197                     "writing %s/event-channel",
 1198                     node_path);
 1199                 return;
 1200         }
 1201 
 1202         error = xs_printf(XST_NIL, node_path, "protocol",
 1203             "%s", XEN_IO_PROTO_ABI_NATIVE);
 1204         if (error) {
 1205                 xenbus_dev_fatal(sc->xbd_dev, error,
 1206                     "writing %s/protocol",
 1207                     node_path);
 1208                 return;
 1209         }
 1210 
 1211         xenbus_set_state(sc->xbd_dev, XenbusStateInitialised);
 1212 }
 1213 
 1214 /* 
 1215  * Invoked when the backend is finally 'ready' (and has published
 1216  * the details about the physical device - #sectors, size, etc). 
 1217  */
 1218 static void 
 1219 xbd_connect(struct xbd_softc *sc)
 1220 {
 1221         device_t dev = sc->xbd_dev;
 1222         blkif_sector_t sectors;
 1223         unsigned long sector_size, phys_sector_size;
 1224         unsigned int binfo;
 1225         int err, feature_barrier, feature_flush;
 1226         int i, j;
 1227 
 1228         DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev));
 1229 
 1230         if (sc->xbd_state == XBD_STATE_SUSPENDED) {
 1231                 return;
 1232         }
 1233 
 1234         if (sc->xbd_state == XBD_STATE_CONNECTED) {
 1235                 struct disk *disk;
 1236 
 1237                 disk = sc->xbd_disk;
 1238                 if (disk == NULL) {
 1239                         return;
 1240                 }
 1241                 err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
 1242                     "sectors", "%"PRIu64, &sectors, NULL);
 1243                 if (err != 0) {
 1244                         xenbus_dev_error(dev, err,
 1245                             "reading sectors at %s",
 1246                             xenbus_get_otherend_path(dev));
 1247                         return;
 1248                 }
 1249                 disk->d_mediasize = disk->d_sectorsize * sectors;
 1250                 err = disk_resize(disk, M_NOWAIT);
 1251                 if (err) {
 1252                         xenbus_dev_error(dev, err,
 1253                             "unable to resize disk %s%u",
 1254                             disk->d_name, disk->d_unit);
 1255                         return;
 1256                 }
 1257                 device_printf(sc->xbd_dev,
 1258                     "changed capacity to %jd\n",
 1259                     (intmax_t)disk->d_mediasize);
 1260                 return;
 1261         }
 1262 
 1263         err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
 1264             "sectors", "%"PRIu64, &sectors,
 1265             "info", "%u", &binfo,
 1266             "sector-size", "%lu", &sector_size,
 1267             NULL);
 1268         if (err) {
 1269                 xenbus_dev_fatal(dev, err,
 1270                     "reading backend fields at %s",
 1271                     xenbus_get_otherend_path(dev));
 1272                 return;
 1273         }
 1274         if ((sectors == 0) || (sector_size == 0)) {
 1275                 xenbus_dev_fatal(dev, 0,
 1276                     "invalid parameters from %s:"
 1277                     " sectors = %"PRIu64", sector_size = %lu",
 1278                     xenbus_get_otherend_path(dev),
 1279                     sectors, sector_size);
 1280                 return;
 1281         }
 1282         err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
 1283              "physical-sector-size", "%lu", &phys_sector_size,
 1284              NULL);
 1285         if (err || phys_sector_size <= sector_size)
 1286                 phys_sector_size = 0;
 1287         err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
 1288              "feature-barrier", "%d", &feature_barrier,
 1289              NULL);
 1290         if (err == 0 && feature_barrier != 0)
 1291                 sc->xbd_flags |= XBDF_BARRIER;
 1292 
 1293         err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
 1294              "feature-flush-cache", "%d", &feature_flush,
 1295              NULL);
 1296         if (err == 0 && feature_flush != 0)
 1297                 sc->xbd_flags |= XBDF_FLUSH;
 1298 
 1299         err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
 1300             "feature-max-indirect-segments", "%" PRIu32,
 1301             &sc->xbd_max_request_segments, NULL);
 1302         if ((err != 0) || (xbd_enable_indirect == 0))
 1303                 sc->xbd_max_request_segments = 0;
 1304         if (sc->xbd_max_request_segments > XBD_MAX_INDIRECT_SEGMENTS)
 1305                 sc->xbd_max_request_segments = XBD_MAX_INDIRECT_SEGMENTS;
 1306         if (sc->xbd_max_request_segments > XBD_SIZE_TO_SEGS(maxphys))
 1307                 sc->xbd_max_request_segments = XBD_SIZE_TO_SEGS(maxphys);
 1308         sc->xbd_max_request_indirectpages =
 1309             XBD_INDIRECT_SEGS_TO_PAGES(sc->xbd_max_request_segments);
 1310         if (sc->xbd_max_request_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST)
 1311                 sc->xbd_max_request_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST;
 1312         sc->xbd_max_request_size =
 1313             XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments);
 1314 
 1315         /* Allocate datastructures based on negotiated values. */
 1316         err = bus_dma_tag_create(
 1317             bus_get_dma_tag(sc->xbd_dev),       /* parent */
 1318             512, PAGE_SIZE,                     /* algnmnt, boundary */
 1319             BUS_SPACE_MAXADDR,                  /* lowaddr */
 1320             BUS_SPACE_MAXADDR,                  /* highaddr */
 1321             NULL, NULL,                         /* filter, filterarg */
 1322             sc->xbd_max_request_size,
 1323             sc->xbd_max_request_segments,
 1324             PAGE_SIZE,                          /* maxsegsize */
 1325             BUS_DMA_ALLOCNOW,                   /* flags */
 1326             busdma_lock_mutex,                  /* lockfunc */
 1327             &sc->xbd_io_lock,                   /* lockarg */
 1328             &sc->xbd_io_dmat);
 1329         if (err != 0) {
 1330                 xenbus_dev_fatal(sc->xbd_dev, err,
 1331                     "Cannot allocate parent DMA tag\n");
 1332                 return;
 1333         }
 1334 
 1335         /* Per-transaction data allocation. */
 1336         sc->xbd_shadow = malloc(sizeof(*sc->xbd_shadow) * sc->xbd_max_requests,
 1337             M_XENBLOCKFRONT, M_NOWAIT|M_ZERO);
 1338         if (sc->xbd_shadow == NULL) {
 1339                 bus_dma_tag_destroy(sc->xbd_io_dmat);
 1340                 xenbus_dev_fatal(sc->xbd_dev, ENOMEM,
 1341                     "Cannot allocate request structures\n");
 1342                 return;
 1343         }
 1344 
 1345         for (i = 0; i < sc->xbd_max_requests; i++) {
 1346                 struct xbd_command *cm;
 1347                 void * indirectpages;
 1348 
 1349                 cm = &sc->xbd_shadow[i];
 1350                 cm->cm_sg_refs = malloc(
 1351                     sizeof(grant_ref_t) * sc->xbd_max_request_segments,
 1352                     M_XENBLOCKFRONT, M_NOWAIT);
 1353                 if (cm->cm_sg_refs == NULL)
 1354                         break;
 1355                 cm->cm_id = i;
 1356                 cm->cm_flags = XBDCF_INITIALIZER;
 1357                 cm->cm_sc = sc;
 1358                 if (bus_dmamap_create(sc->xbd_io_dmat, 0, &cm->cm_map) != 0)
 1359                         break;
 1360                 if (sc->xbd_max_request_indirectpages > 0) {
 1361                         indirectpages = contigmalloc(
 1362                             PAGE_SIZE * sc->xbd_max_request_indirectpages,
 1363                             M_XENBLOCKFRONT, M_ZERO | M_NOWAIT, 0, ~0,
 1364                             PAGE_SIZE, 0);
 1365                         if (indirectpages == NULL)
 1366                                 sc->xbd_max_request_indirectpages = 0;
 1367                 } else {
 1368                         indirectpages = NULL;
 1369                 }
 1370                 for (j = 0; j < sc->xbd_max_request_indirectpages; j++) {
 1371                         if (gnttab_grant_foreign_access(
 1372                             xenbus_get_otherend_id(sc->xbd_dev),
 1373                             (vtophys(indirectpages) >> PAGE_SHIFT) + j,
 1374                             1 /* grant read-only access */,
 1375                             &cm->cm_indirectionrefs[j]))
 1376                                 break;
 1377                 }
 1378                 if (j < sc->xbd_max_request_indirectpages) {
 1379                         contigfree(indirectpages,
 1380                             PAGE_SIZE * sc->xbd_max_request_indirectpages,
 1381                             M_XENBLOCKFRONT);
 1382                         break;
 1383                 }
 1384                 cm->cm_indirectionpages = indirectpages;
 1385                 xbd_free_command(cm);
 1386         }
 1387 
 1388         if (sc->xbd_disk == NULL) {
 1389                 device_printf(dev, "%juMB <%s> at %s",
 1390                     (uintmax_t) sectors / (1048576 / sector_size),
 1391                     device_get_desc(dev),
 1392                     xenbus_get_node(dev));
 1393                 bus_print_child_footer(device_get_parent(dev), dev);
 1394 
 1395                 xbd_instance_create(sc, sectors, sc->xbd_vdevice, binfo,
 1396                     sector_size, phys_sector_size);
 1397         }
 1398 
 1399         (void)xenbus_set_state(dev, XenbusStateConnected); 
 1400 
 1401         /* Kick pending requests. */
 1402         mtx_lock(&sc->xbd_io_lock);
 1403         sc->xbd_state = XBD_STATE_CONNECTED;
 1404         xbd_startio(sc);
 1405         sc->xbd_flags |= XBDF_READY;
 1406         mtx_unlock(&sc->xbd_io_lock);
 1407 }
 1408 
 1409 /**
 1410  * Handle the change of state of the backend to Closing.  We must delete our
 1411  * device-layer structures now, to ensure that writes are flushed through to
 1412  * the backend.  Once this is done, we can switch to Closed in
 1413  * acknowledgement.
 1414  */
 1415 static void
 1416 xbd_closing(device_t dev)
 1417 {
 1418         struct xbd_softc *sc = device_get_softc(dev);
 1419 
 1420         xenbus_set_state(dev, XenbusStateClosing);
 1421 
 1422         DPRINTK("xbd_closing: %s removed\n", xenbus_get_node(dev));
 1423 
 1424         if (sc->xbd_disk != NULL) {
 1425                 disk_destroy(sc->xbd_disk);
 1426                 sc->xbd_disk = NULL;
 1427         }
 1428 
 1429         xenbus_set_state(dev, XenbusStateClosed); 
 1430 }
 1431 
 1432 /*---------------------------- NewBus Entrypoints ----------------------------*/
 1433 static int
 1434 xbd_probe(device_t dev)
 1435 {
 1436         if (strcmp(xenbus_get_type(dev), "vbd") != 0)
 1437                 return (ENXIO);
 1438 
 1439         if (xen_pv_disks_disabled())
 1440                 return (ENXIO);
 1441 
 1442         if (xen_hvm_domain()) {
 1443                 int error;
 1444                 char *type;
 1445 
 1446                 /*
 1447                  * When running in an HVM domain, IDE disk emulation is
 1448                  * disabled early in boot so that native drivers will
 1449                  * not see emulated hardware.  However, CDROM device
 1450                  * emulation cannot be disabled.
 1451                  *
 1452                  * Through use of FreeBSD's vm_guest and xen_hvm_domain()
 1453                  * APIs, we could modify the native CDROM driver to fail its
 1454                  * probe when running under Xen.  Unfortunatlely, the PV
 1455                  * CDROM support in XenServer (up through at least version
 1456                  * 6.2) isn't functional, so we instead rely on the emulated
 1457                  * CDROM instance, and fail to attach the PV one here in
 1458                  * the blkfront driver.
 1459                  */
 1460                 error = xs_read(XST_NIL, xenbus_get_node(dev),
 1461                     "device-type", NULL, (void **) &type);
 1462                 if (error)
 1463                         return (ENXIO);
 1464 
 1465                 if (strncmp(type, "cdrom", 5) == 0) {
 1466                         free(type, M_XENSTORE);
 1467                         return (ENXIO);
 1468                 }
 1469                 free(type, M_XENSTORE);
 1470         }
 1471 
 1472         device_set_desc(dev, "Virtual Block Device");
 1473         device_quiet(dev);
 1474         return (0);
 1475 }
 1476 
 1477 /*
 1478  * Setup supplies the backend dir, virtual device.  We place an event
 1479  * channel and shared frame entries.  We watch backend to wait if it's
 1480  * ok.
 1481  */
 1482 static int
 1483 xbd_attach(device_t dev)
 1484 {
 1485         struct xbd_softc *sc;
 1486         const char *name;
 1487         uint32_t vdevice;
 1488         int error;
 1489         int i;
 1490         int unit;
 1491 
 1492         /* FIXME: Use dynamic device id if this is not set. */
 1493         error = xs_scanf(XST_NIL, xenbus_get_node(dev),
 1494             "virtual-device", NULL, "%" PRIu32, &vdevice);
 1495         if (error)
 1496                 error = xs_scanf(XST_NIL, xenbus_get_node(dev),
 1497                     "virtual-device-ext", NULL, "%" PRIu32, &vdevice);
 1498         if (error) {
 1499                 xenbus_dev_fatal(dev, error, "reading virtual-device");
 1500                 device_printf(dev, "Couldn't determine virtual device.\n");
 1501                 return (error);
 1502         }
 1503 
 1504         xbd_vdevice_to_unit(vdevice, &unit, &name);
 1505         if (!strcmp(name, "xbd"))
 1506                 device_set_unit(dev, unit);
 1507 
 1508         sc = device_get_softc(dev);
 1509         mtx_init(&sc->xbd_io_lock, "blkfront i/o lock", NULL, MTX_DEF);
 1510         xbd_initqs(sc);
 1511         for (i = 0; i < XBD_MAX_RING_PAGES; i++)
 1512                 sc->xbd_ring_ref[i] = GRANT_REF_INVALID;
 1513 
 1514         sc->xbd_dev = dev;
 1515         sc->xbd_vdevice = vdevice;
 1516         sc->xbd_state = XBD_STATE_DISCONNECTED;
 1517 
 1518         xbd_setup_sysctl(sc);
 1519 
 1520         /* Wait for backend device to publish its protocol capabilities. */
 1521         xenbus_set_state(dev, XenbusStateInitialising);
 1522 
 1523         return (0);
 1524 }
 1525 
 1526 static int
 1527 xbd_detach(device_t dev)
 1528 {
 1529         struct xbd_softc *sc = device_get_softc(dev);
 1530 
 1531         DPRINTK("%s: %s removed\n", __func__, xenbus_get_node(dev));
 1532 
 1533         xbd_free(sc);
 1534         mtx_destroy(&sc->xbd_io_lock);
 1535 
 1536         return 0;
 1537 }
 1538 
 1539 static int
 1540 xbd_suspend(device_t dev)
 1541 {
 1542         struct xbd_softc *sc = device_get_softc(dev);
 1543         int retval;
 1544         int saved_state;
 1545 
 1546         /* Prevent new requests being issued until we fix things up. */
 1547         mtx_lock(&sc->xbd_io_lock);
 1548         saved_state = sc->xbd_state;
 1549         sc->xbd_state = XBD_STATE_SUSPENDED;
 1550 
 1551         /* Wait for outstanding I/O to drain. */
 1552         retval = 0;
 1553         while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
 1554                 if (msleep(&sc->xbd_cm_q[XBD_Q_BUSY], &sc->xbd_io_lock,
 1555                     PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) {
 1556                         retval = EBUSY;
 1557                         break;
 1558                 }
 1559         }
 1560         mtx_unlock(&sc->xbd_io_lock);
 1561 
 1562         if (retval != 0)
 1563                 sc->xbd_state = saved_state;
 1564 
 1565         return (retval);
 1566 }
 1567 
 1568 static int
 1569 xbd_resume(device_t dev)
 1570 {
 1571         struct xbd_softc *sc = device_get_softc(dev);
 1572 
 1573         if (xen_suspend_cancelled) {
 1574                 sc->xbd_state = XBD_STATE_CONNECTED;
 1575                 return (0);
 1576         }
 1577 
 1578         DPRINTK("xbd_resume: %s\n", xenbus_get_node(dev));
 1579 
 1580         xbd_free(sc);
 1581         xbd_initialize(sc);
 1582         return (0);
 1583 }
 1584 
 1585 /**
 1586  * Callback received when the backend's state changes.
 1587  */
 1588 static void
 1589 xbd_backend_changed(device_t dev, XenbusState backend_state)
 1590 {
 1591         struct xbd_softc *sc = device_get_softc(dev);
 1592 
 1593         DPRINTK("backend_state=%d\n", backend_state);
 1594 
 1595         switch (backend_state) {
 1596         case XenbusStateUnknown:
 1597         case XenbusStateInitialising:
 1598         case XenbusStateReconfigured:
 1599         case XenbusStateReconfiguring:
 1600         case XenbusStateClosed:
 1601                 break;
 1602 
 1603         case XenbusStateInitWait:
 1604         case XenbusStateInitialised:
 1605                 xbd_initialize(sc);
 1606                 break;
 1607 
 1608         case XenbusStateConnected:
 1609                 xbd_initialize(sc);
 1610                 xbd_connect(sc);
 1611                 break;
 1612 
 1613         case XenbusStateClosing:
 1614                 if (sc->xbd_users > 0) {
 1615                         device_printf(dev, "detaching with pending users\n");
 1616                         KASSERT(sc->xbd_disk != NULL,
 1617                             ("NULL disk with pending users\n"));
 1618                         disk_gone(sc->xbd_disk);
 1619                 } else {
 1620                         xbd_closing(dev);
 1621                 }
 1622                 break;  
 1623         }
 1624 }
 1625 
 1626 /*---------------------------- NewBus Registration ---------------------------*/
 1627 static device_method_t xbd_methods[] = { 
 1628         /* Device interface */ 
 1629         DEVMETHOD(device_probe,         xbd_probe), 
 1630         DEVMETHOD(device_attach,        xbd_attach), 
 1631         DEVMETHOD(device_detach,        xbd_detach), 
 1632         DEVMETHOD(device_shutdown,      bus_generic_shutdown), 
 1633         DEVMETHOD(device_suspend,       xbd_suspend), 
 1634         DEVMETHOD(device_resume,        xbd_resume), 
 1635 
 1636         /* Xenbus interface */
 1637         DEVMETHOD(xenbus_otherend_changed, xbd_backend_changed),
 1638 
 1639         { 0, 0 } 
 1640 }; 
 1641 
 1642 static driver_t xbd_driver = { 
 1643         "xbd", 
 1644         xbd_methods, 
 1645         sizeof(struct xbd_softc),                      
 1646 }; 
 1647 
 1648 DRIVER_MODULE(xbd, xenbusb_front, xbd_driver, 0, 0); 

Cache object: 3e24ef41ee986563df29976e5157c0da


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.