The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/nvme/nvme_qpair.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (C) 2012-2013 Intel Corporation
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD: releng/9.2/sys/dev/nvme/nvme_qpair.c 253296 2013-07-12 22:07:33Z jimharris $");
   29 
   30 #include <sys/param.h>
   31 #include <sys/bus.h>
   32 
   33 #include <dev/pci/pcivar.h>
   34 
   35 #include "nvme_private.h"
   36 
   37 static void     _nvme_qpair_submit_request(struct nvme_qpair *qpair,
   38                                            struct nvme_request *req);
   39 
   40 struct nvme_opcode_string {
   41 
   42         uint16_t        opc;
   43         const char *    str;
   44 };
   45 
   46 static struct nvme_opcode_string admin_opcode[] = {
   47         { NVME_OPC_DELETE_IO_SQ, "DELETE IO SQ" },
   48         { NVME_OPC_CREATE_IO_SQ, "CREATE IO SQ" },
   49         { NVME_OPC_GET_LOG_PAGE, "GET LOG PAGE" },
   50         { NVME_OPC_DELETE_IO_CQ, "DELETE IO CQ" },
   51         { NVME_OPC_CREATE_IO_CQ, "CREATE IO CQ" },
   52         { NVME_OPC_IDENTIFY, "IDENTIFY" },
   53         { NVME_OPC_ABORT, "ABORT" },
   54         { NVME_OPC_SET_FEATURES, "SET FEATURES" },
   55         { NVME_OPC_GET_FEATURES, "GET FEATURES" },
   56         { NVME_OPC_ASYNC_EVENT_REQUEST, "ASYNC EVENT REQUEST" },
   57         { NVME_OPC_FIRMWARE_ACTIVATE, "FIRMWARE ACTIVATE" },
   58         { NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD, "FIRMWARE IMAGE DOWNLOAD" },
   59         { NVME_OPC_FORMAT_NVM, "FORMAT NVM" },
   60         { NVME_OPC_SECURITY_SEND, "SECURITY SEND" },
   61         { NVME_OPC_SECURITY_RECEIVE, "SECURITY RECEIVE" },
   62         { 0xFFFF, "ADMIN COMMAND" }
   63 };
   64 
   65 static struct nvme_opcode_string io_opcode[] = {
   66         { NVME_OPC_FLUSH, "FLUSH" },
   67         { NVME_OPC_WRITE, "WRITE" },
   68         { NVME_OPC_READ, "READ" },
   69         { NVME_OPC_WRITE_UNCORRECTABLE, "WRITE UNCORRECTABLE" },
   70         { NVME_OPC_COMPARE, "COMPARE" },
   71         { NVME_OPC_DATASET_MANAGEMENT, "DATASET MANAGEMENT" },
   72         { 0xFFFF, "IO COMMAND" }
   73 };
   74 
   75 static const char *
   76 get_admin_opcode_string(uint16_t opc)
   77 {
   78         struct nvme_opcode_string *entry;
   79 
   80         entry = admin_opcode;
   81 
   82         while (entry->opc != 0xFFFF) {
   83                 if (entry->opc == opc)
   84                         return (entry->str);
   85                 entry++;
   86         }
   87         return (entry->str);
   88 }
   89 
   90 static const char *
   91 get_io_opcode_string(uint16_t opc)
   92 {
   93         struct nvme_opcode_string *entry;
   94 
   95         entry = io_opcode;
   96 
   97         while (entry->opc != 0xFFFF) {
   98                 if (entry->opc == opc)
   99                         return (entry->str);
  100                 entry++;
  101         }
  102         return (entry->str);
  103 }
  104 
  105 
  106 static void
  107 nvme_admin_qpair_print_command(struct nvme_qpair *qpair,
  108     struct nvme_command *cmd)
  109 {
  110 
  111         nvme_printf(qpair->ctrlr, "%s (%02x) sqid:%d cid:%d nsid:%x "
  112             "cdw10:%08x cdw11:%08x\n",
  113             get_admin_opcode_string(cmd->opc), cmd->opc, qpair->id, cmd->cid,
  114             cmd->nsid, cmd->cdw10, cmd->cdw11);
  115 }
  116 
  117 static void
  118 nvme_io_qpair_print_command(struct nvme_qpair *qpair,
  119     struct nvme_command *cmd)
  120 {
  121 
  122         switch (cmd->opc) {
  123         case NVME_OPC_WRITE:
  124         case NVME_OPC_READ:
  125         case NVME_OPC_WRITE_UNCORRECTABLE:
  126         case NVME_OPC_COMPARE:
  127                 nvme_printf(qpair->ctrlr, "%s sqid:%d cid:%d nsid:%d "
  128                     "lba:%llu len:%d\n",
  129                     get_io_opcode_string(cmd->opc), qpair->id, cmd->cid,
  130                     cmd->nsid,
  131                     ((unsigned long long)cmd->cdw11 << 32) + cmd->cdw10,
  132                     (cmd->cdw12 & 0xFFFF) + 1);
  133                 break;
  134         case NVME_OPC_FLUSH:
  135         case NVME_OPC_DATASET_MANAGEMENT:
  136                 nvme_printf(qpair->ctrlr, "%s sqid:%d cid:%d nsid:%d\n",
  137                     get_io_opcode_string(cmd->opc), qpair->id, cmd->cid,
  138                     cmd->nsid);
  139                 break;
  140         default:
  141                 nvme_printf(qpair->ctrlr, "%s (%02x) sqid:%d cid:%d nsid:%d\n",
  142                     get_io_opcode_string(cmd->opc), cmd->opc, qpair->id,
  143                     cmd->cid, cmd->nsid);
  144                 break;
  145         }
  146 }
  147 
  148 static void
  149 nvme_qpair_print_command(struct nvme_qpair *qpair, struct nvme_command *cmd)
  150 {
  151         if (qpair->id == 0)
  152                 nvme_admin_qpair_print_command(qpair, cmd);
  153         else
  154                 nvme_io_qpair_print_command(qpair, cmd);
  155 }
  156 
  157 struct nvme_status_string {
  158 
  159         uint16_t        sc;
  160         const char *    str;
  161 };
  162 
  163 static struct nvme_status_string generic_status[] = {
  164         { NVME_SC_SUCCESS, "SUCCESS" },
  165         { NVME_SC_INVALID_OPCODE, "INVALID OPCODE" },
  166         { NVME_SC_INVALID_FIELD, "INVALID_FIELD" },
  167         { NVME_SC_COMMAND_ID_CONFLICT, "COMMAND ID CONFLICT" },
  168         { NVME_SC_DATA_TRANSFER_ERROR, "DATA TRANSFER ERROR" },
  169         { NVME_SC_ABORTED_POWER_LOSS, "ABORTED - POWER LOSS" },
  170         { NVME_SC_INTERNAL_DEVICE_ERROR, "INTERNAL DEVICE ERROR" },
  171         { NVME_SC_ABORTED_BY_REQUEST, "ABORTED - BY REQUEST" },
  172         { NVME_SC_ABORTED_SQ_DELETION, "ABORTED - SQ DELETION" },
  173         { NVME_SC_ABORTED_FAILED_FUSED, "ABORTED - FAILED FUSED" },
  174         { NVME_SC_ABORTED_MISSING_FUSED, "ABORTED - MISSING FUSED" },
  175         { NVME_SC_INVALID_NAMESPACE_OR_FORMAT, "INVALID NAMESPACE OR FORMAT" },
  176         { NVME_SC_COMMAND_SEQUENCE_ERROR, "COMMAND SEQUENCE ERROR" },
  177         { NVME_SC_LBA_OUT_OF_RANGE, "LBA OUT OF RANGE" },
  178         { NVME_SC_CAPACITY_EXCEEDED, "CAPACITY EXCEEDED" },
  179         { NVME_SC_NAMESPACE_NOT_READY, "NAMESPACE NOT READY" },
  180         { 0xFFFF, "GENERIC" }
  181 };
  182 
  183 static struct nvme_status_string command_specific_status[] = {
  184         { NVME_SC_COMPLETION_QUEUE_INVALID, "INVALID COMPLETION QUEUE" },
  185         { NVME_SC_INVALID_QUEUE_IDENTIFIER, "INVALID QUEUE IDENTIFIER" },
  186         { NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED, "MAX QUEUE SIZE EXCEEDED" },
  187         { NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED, "ABORT CMD LIMIT EXCEEDED" },
  188         { NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED, "ASYNC LIMIT EXCEEDED" },
  189         { NVME_SC_INVALID_FIRMWARE_SLOT, "INVALID FIRMWARE SLOT" },
  190         { NVME_SC_INVALID_FIRMWARE_IMAGE, "INVALID FIRMWARE IMAGE" },
  191         { NVME_SC_INVALID_INTERRUPT_VECTOR, "INVALID INTERRUPT VECTOR" },
  192         { NVME_SC_INVALID_LOG_PAGE, "INVALID LOG PAGE" },
  193         { NVME_SC_INVALID_FORMAT, "INVALID FORMAT" },
  194         { NVME_SC_FIRMWARE_REQUIRES_RESET, "FIRMWARE REQUIRES RESET" },
  195         { NVME_SC_CONFLICTING_ATTRIBUTES, "CONFLICTING ATTRIBUTES" },
  196         { NVME_SC_INVALID_PROTECTION_INFO, "INVALID PROTECTION INFO" },
  197         { NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE, "WRITE TO RO PAGE" },
  198         { 0xFFFF, "COMMAND SPECIFIC" }
  199 };
  200 
  201 static struct nvme_status_string media_error_status[] = {
  202         { NVME_SC_WRITE_FAULTS, "WRITE FAULTS" },
  203         { NVME_SC_UNRECOVERED_READ_ERROR, "UNRECOVERED READ ERROR" },
  204         { NVME_SC_GUARD_CHECK_ERROR, "GUARD CHECK ERROR" },
  205         { NVME_SC_APPLICATION_TAG_CHECK_ERROR, "APPLICATION TAG CHECK ERROR" },
  206         { NVME_SC_REFERENCE_TAG_CHECK_ERROR, "REFERENCE TAG CHECK ERROR" },
  207         { NVME_SC_COMPARE_FAILURE, "COMPARE FAILURE" },
  208         { NVME_SC_ACCESS_DENIED, "ACCESS DENIED" },
  209         { 0xFFFF, "MEDIA ERROR" }
  210 };
  211 
  212 static const char *
  213 get_status_string(uint16_t sct, uint16_t sc)
  214 {
  215         struct nvme_status_string *entry;
  216 
  217         switch (sct) {
  218         case NVME_SCT_GENERIC:
  219                 entry = generic_status;
  220                 break;
  221         case NVME_SCT_COMMAND_SPECIFIC:
  222                 entry = command_specific_status;
  223                 break;
  224         case NVME_SCT_MEDIA_ERROR:
  225                 entry = media_error_status;
  226                 break;
  227         case NVME_SCT_VENDOR_SPECIFIC:
  228                 return ("VENDOR SPECIFIC");
  229         default:
  230                 return ("RESERVED");
  231         }
  232 
  233         while (entry->sc != 0xFFFF) {
  234                 if (entry->sc == sc)
  235                         return (entry->str);
  236                 entry++;
  237         }
  238         return (entry->str);
  239 }
  240 
  241 static void
  242 nvme_qpair_print_completion(struct nvme_qpair *qpair, 
  243     struct nvme_completion *cpl)
  244 {
  245         nvme_printf(qpair->ctrlr, "%s (%02x/%02x) sqid:%d cid:%d cdw0:%x\n",
  246             get_status_string(cpl->status.sct, cpl->status.sc),
  247             cpl->status.sct, cpl->status.sc, cpl->sqid, cpl->cid, cpl->cdw0);
  248 }
  249 
  250 static boolean_t
  251 nvme_completion_is_retry(const struct nvme_completion *cpl)
  252 {
  253         /*
  254          * TODO: spec is not clear how commands that are aborted due
  255          *  to TLER will be marked.  So for now, it seems
  256          *  NAMESPACE_NOT_READY is the only case where we should
  257          *  look at the DNR bit.
  258          */
  259         switch (cpl->status.sct) {
  260         case NVME_SCT_GENERIC:
  261                 switch (cpl->status.sc) {
  262                 case NVME_SC_ABORTED_BY_REQUEST:
  263                 case NVME_SC_NAMESPACE_NOT_READY:
  264                         if (cpl->status.dnr)
  265                                 return (0);
  266                         else
  267                                 return (1);
  268                 case NVME_SC_INVALID_OPCODE:
  269                 case NVME_SC_INVALID_FIELD:
  270                 case NVME_SC_COMMAND_ID_CONFLICT:
  271                 case NVME_SC_DATA_TRANSFER_ERROR:
  272                 case NVME_SC_ABORTED_POWER_LOSS:
  273                 case NVME_SC_INTERNAL_DEVICE_ERROR:
  274                 case NVME_SC_ABORTED_SQ_DELETION:
  275                 case NVME_SC_ABORTED_FAILED_FUSED:
  276                 case NVME_SC_ABORTED_MISSING_FUSED:
  277                 case NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
  278                 case NVME_SC_COMMAND_SEQUENCE_ERROR:
  279                 case NVME_SC_LBA_OUT_OF_RANGE:
  280                 case NVME_SC_CAPACITY_EXCEEDED:
  281                 default:
  282                         return (0);
  283                 }
  284         case NVME_SCT_COMMAND_SPECIFIC:
  285         case NVME_SCT_MEDIA_ERROR:
  286         case NVME_SCT_VENDOR_SPECIFIC:
  287         default:
  288                 return (0);
  289         }
  290 }
  291 
  292 static void
  293 nvme_qpair_construct_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
  294     uint16_t cid)
  295 {
  296 
  297         bus_dmamap_create(qpair->dma_tag, 0, &tr->payload_dma_map);
  298         bus_dmamap_create(qpair->dma_tag, 0, &tr->prp_dma_map);
  299 
  300         bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp,
  301             sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0);
  302 
  303         callout_init(&tr->timer, 1);
  304         tr->cid = cid;
  305         tr->qpair = qpair;
  306 }
  307 
  308 static void
  309 nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
  310     struct nvme_completion *cpl, boolean_t print_on_error)
  311 {
  312         struct nvme_request     *req;
  313         boolean_t               retry, error;
  314 
  315         req = tr->req;
  316         error = nvme_completion_is_error(cpl);
  317         retry = error && nvme_completion_is_retry(cpl) &&
  318            req->retries < nvme_retry_count;
  319 
  320         if (error && print_on_error) {
  321                 nvme_qpair_print_command(qpair, &req->cmd);
  322                 nvme_qpair_print_completion(qpair, cpl);
  323         }
  324 
  325         qpair->act_tr[cpl->cid] = NULL;
  326 
  327         KASSERT(cpl->cid == req->cmd.cid, ("cpl cid does not match cmd cid\n"));
  328 
  329         if (req->cb_fn && !retry)
  330                 req->cb_fn(req->cb_arg, cpl);
  331 
  332         mtx_lock(&qpair->lock);
  333         callout_stop(&tr->timer);
  334 
  335         if (retry) {
  336                 req->retries++;
  337                 nvme_qpair_submit_tracker(qpair, tr);
  338         } else {
  339                 if (req->type != NVME_REQUEST_NULL)
  340                         bus_dmamap_unload(qpair->dma_tag,
  341                             tr->payload_dma_map);
  342 
  343                 nvme_free_request(req);
  344                 tr->req = NULL;
  345 
  346                 TAILQ_REMOVE(&qpair->outstanding_tr, tr, tailq);
  347                 TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq);
  348 
  349                 /*
  350                  * If the controller is in the middle of resetting, don't
  351                  *  try to submit queued requests here - let the reset logic
  352                  *  handle that instead.
  353                  */
  354                 if (!STAILQ_EMPTY(&qpair->queued_req) &&
  355                     !qpair->ctrlr->is_resetting) {
  356                         req = STAILQ_FIRST(&qpair->queued_req);
  357                         STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
  358                         _nvme_qpair_submit_request(qpair, req);
  359                 }
  360         }
  361 
  362         mtx_unlock(&qpair->lock);
  363 }
  364 
  365 static void
  366 nvme_qpair_manual_complete_tracker(struct nvme_qpair *qpair,
  367     struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr,
  368     boolean_t print_on_error)
  369 {
  370         struct nvme_completion  cpl;
  371 
  372         memset(&cpl, 0, sizeof(cpl));
  373         cpl.sqid = qpair->id;
  374         cpl.cid = tr->cid;
  375         cpl.status.sct = sct;
  376         cpl.status.sc = sc;
  377         cpl.status.dnr = dnr;
  378         nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error);
  379 }
  380 
  381 void
  382 nvme_qpair_manual_complete_request(struct nvme_qpair *qpair,
  383     struct nvme_request *req, uint32_t sct, uint32_t sc,
  384     boolean_t print_on_error)
  385 {
  386         struct nvme_completion  cpl;
  387         boolean_t               error;
  388 
  389         memset(&cpl, 0, sizeof(cpl));
  390         cpl.sqid = qpair->id;
  391         cpl.status.sct = sct;
  392         cpl.status.sc = sc;
  393 
  394         error = nvme_completion_is_error(&cpl);
  395 
  396         if (error && print_on_error) {
  397                 nvme_qpair_print_command(qpair, &req->cmd);
  398                 nvme_qpair_print_completion(qpair, &cpl);
  399         }
  400 
  401         if (req->cb_fn)
  402                 req->cb_fn(req->cb_arg, &cpl);
  403 
  404         nvme_free_request(req);
  405 }
  406 
  407 void
  408 nvme_qpair_process_completions(struct nvme_qpair *qpair)
  409 {
  410         struct nvme_tracker     *tr;
  411         struct nvme_completion  *cpl;
  412 
  413         qpair->num_intr_handler_calls++;
  414 
  415         if (!qpair->is_enabled)
  416                 /*
  417                  * qpair is not enabled, likely because a controller reset is
  418                  *  is in progress.  Ignore the interrupt - any I/O that was
  419                  *  associated with this interrupt will get retried when the
  420                  *  reset is complete.
  421                  */
  422                 return;
  423 
  424         while (1) {
  425                 cpl = &qpair->cpl[qpair->cq_head];
  426 
  427                 if (cpl->status.p != qpair->phase)
  428                         break;
  429 
  430                 tr = qpair->act_tr[cpl->cid];
  431 
  432                 if (tr != NULL) {
  433                         nvme_qpair_complete_tracker(qpair, tr, cpl, TRUE);
  434                         qpair->sq_head = cpl->sqhd;
  435                 } else {
  436                         nvme_printf(qpair->ctrlr, 
  437                             "cpl does not map to outstanding cmd\n");
  438                         nvme_dump_completion(cpl);
  439                         KASSERT(0, ("received completion for unknown cmd\n"));
  440                 }
  441 
  442                 if (++qpair->cq_head == qpair->num_entries) {
  443                         qpair->cq_head = 0;
  444                         qpair->phase = !qpair->phase;
  445                 }
  446 
  447                 nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl,
  448                     qpair->cq_head);
  449         }
  450 }
  451 
  452 static void
  453 nvme_qpair_msix_handler(void *arg)
  454 {
  455         struct nvme_qpair *qpair = arg;
  456 
  457         nvme_qpair_process_completions(qpair);
  458 }
  459 
  460 void
  461 nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
  462     uint16_t vector, uint32_t num_entries, uint32_t num_trackers,
  463     struct nvme_controller *ctrlr)
  464 {
  465         struct nvme_tracker     *tr;
  466         uint32_t                i;
  467 
  468         qpair->id = id;
  469         qpair->vector = vector;
  470         qpair->num_entries = num_entries;
  471 #ifdef CHATHAM2
  472         /*
  473          * Chatham prototype board starts having issues at higher queue
  474          *  depths.  So use a conservative estimate here of no more than 64
  475          *  outstanding I/O per queue at any one point.
  476          */
  477         if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID)
  478                 num_trackers = min(num_trackers, 64);
  479 #endif
  480         qpair->num_trackers = num_trackers;
  481         qpair->ctrlr = ctrlr;
  482 
  483         if (ctrlr->msix_enabled) {
  484 
  485                 /*
  486                  * MSI-X vector resource IDs start at 1, so we add one to
  487                  *  the queue's vector to get the corresponding rid to use.
  488                  */
  489                 qpair->rid = vector + 1;
  490 
  491                 qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
  492                     &qpair->rid, RF_ACTIVE);
  493 
  494                 bus_setup_intr(ctrlr->dev, qpair->res,
  495                     INTR_TYPE_MISC | INTR_MPSAFE, NULL,
  496                     nvme_qpair_msix_handler, qpair, &qpair->tag);
  497         }
  498 
  499         mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF);
  500 
  501         bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
  502             sizeof(uint64_t), PAGE_SIZE, BUS_SPACE_MAXADDR,
  503             BUS_SPACE_MAXADDR, NULL, NULL, NVME_MAX_XFER_SIZE,
  504             (NVME_MAX_XFER_SIZE/PAGE_SIZE)+1, PAGE_SIZE, 0,
  505             NULL, NULL, &qpair->dma_tag);
  506 
  507         qpair->num_cmds = 0;
  508         qpair->num_intr_handler_calls = 0;
  509 
  510         qpair->cmd = contigmalloc(qpair->num_entries *
  511             sizeof(struct nvme_command), M_NVME, M_ZERO,
  512             0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
  513         qpair->cpl = contigmalloc(qpair->num_entries *
  514             sizeof(struct nvme_completion), M_NVME, M_ZERO,
  515             0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
  516 
  517         bus_dmamap_create(qpair->dma_tag, 0, &qpair->cmd_dma_map);
  518         bus_dmamap_create(qpair->dma_tag, 0, &qpair->cpl_dma_map);
  519 
  520         bus_dmamap_load(qpair->dma_tag, qpair->cmd_dma_map,
  521             qpair->cmd, qpair->num_entries * sizeof(struct nvme_command),
  522             nvme_single_map, &qpair->cmd_bus_addr, 0);
  523         bus_dmamap_load(qpair->dma_tag, qpair->cpl_dma_map,
  524             qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion),
  525             nvme_single_map, &qpair->cpl_bus_addr, 0);
  526 
  527         qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl);
  528         qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl);
  529 
  530         TAILQ_INIT(&qpair->free_tr);
  531         TAILQ_INIT(&qpair->outstanding_tr);
  532         STAILQ_INIT(&qpair->queued_req);
  533 
  534         for (i = 0; i < qpair->num_trackers; i++) {
  535                 tr = malloc(sizeof(*tr), M_NVME, M_ZERO | M_WAITOK);
  536                 nvme_qpair_construct_tracker(qpair, tr, i);
  537                 TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq);
  538         }
  539 
  540         qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries,
  541             M_NVME, M_ZERO | M_WAITOK);
  542 }
  543 
  544 static void
  545 nvme_qpair_destroy(struct nvme_qpair *qpair)
  546 {
  547         struct nvme_tracker     *tr;
  548 
  549         if (qpair->tag)
  550                 bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag);
  551 
  552         if (qpair->res)
  553                 bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ,
  554                     rman_get_rid(qpair->res), qpair->res);
  555 
  556         if (qpair->cmd) {
  557                 bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map);
  558                 bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map);
  559                 contigfree(qpair->cmd,
  560                     qpair->num_entries * sizeof(struct nvme_command), M_NVME);
  561         }
  562 
  563         if (qpair->cpl) {
  564                 bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map);
  565                 bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map);
  566                 contigfree(qpair->cpl,
  567                     qpair->num_entries * sizeof(struct nvme_completion),
  568                     M_NVME);
  569         }
  570 
  571         if (qpair->dma_tag)
  572                 bus_dma_tag_destroy(qpair->dma_tag);
  573 
  574         if (qpair->act_tr)
  575                 free(qpair->act_tr, M_NVME);
  576 
  577         while (!TAILQ_EMPTY(&qpair->free_tr)) {
  578                 tr = TAILQ_FIRST(&qpair->free_tr);
  579                 TAILQ_REMOVE(&qpair->free_tr, tr, tailq);
  580                 bus_dmamap_destroy(qpair->dma_tag, tr->payload_dma_map);
  581                 bus_dmamap_destroy(qpair->dma_tag, tr->prp_dma_map);
  582                 free(tr, M_NVME);
  583         }
  584 }
  585 
  586 static void
  587 nvme_admin_qpair_abort_aers(struct nvme_qpair *qpair)
  588 {
  589         struct nvme_tracker     *tr;
  590 
  591         tr = TAILQ_FIRST(&qpair->outstanding_tr);
  592         while (tr != NULL) {
  593                 if (tr->req->cmd.opc == NVME_OPC_ASYNC_EVENT_REQUEST) {
  594                         nvme_qpair_manual_complete_tracker(qpair, tr,
  595                             NVME_SCT_GENERIC, NVME_SC_ABORTED_SQ_DELETION, 0,
  596                             FALSE);
  597                         tr = TAILQ_FIRST(&qpair->outstanding_tr);
  598                 } else {
  599                         tr = TAILQ_NEXT(tr, tailq);
  600                 }
  601         }
  602 }
  603 
  604 void
  605 nvme_admin_qpair_destroy(struct nvme_qpair *qpair)
  606 {
  607 
  608         nvme_admin_qpair_abort_aers(qpair);
  609         nvme_qpair_destroy(qpair);
  610 }
  611 
  612 void
  613 nvme_io_qpair_destroy(struct nvme_qpair *qpair)
  614 {
  615 
  616         nvme_qpair_destroy(qpair);
  617 }
  618 
  619 static void
  620 nvme_abort_complete(void *arg, const struct nvme_completion *status)
  621 {
  622         struct nvme_tracker     *tr = arg;
  623 
  624         /*
  625          * If cdw0 == 1, the controller was not able to abort the command
  626          *  we requested.  We still need to check the active tracker array,
  627          *  to cover race where I/O timed out at same time controller was
  628          *  completing the I/O.
  629          */
  630         if (status->cdw0 == 1 && tr->qpair->act_tr[tr->cid] != NULL) {
  631                 /*
  632                  * An I/O has timed out, and the controller was unable to
  633                  *  abort it for some reason.  Construct a fake completion
  634                  *  status, and then complete the I/O's tracker manually.
  635                  */
  636                 nvme_printf(tr->qpair->ctrlr,
  637                     "abort command failed, aborting command manually\n");
  638                 nvme_qpair_manual_complete_tracker(tr->qpair, tr,
  639                     NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, 0, TRUE);
  640         }
  641 }
  642 
  643 static void
  644 nvme_timeout(void *arg)
  645 {
  646         struct nvme_tracker     *tr = arg;
  647         struct nvme_qpair       *qpair = tr->qpair;
  648         struct nvme_controller  *ctrlr = qpair->ctrlr;
  649         union csts_register     csts;
  650 
  651         /* Read csts to get value of cfs - controller fatal status. */
  652         csts.raw = nvme_mmio_read_4(ctrlr, csts);
  653 
  654         if (ctrlr->enable_aborts && csts.bits.cfs == 0) {
  655                 /*
  656                  * If aborts are enabled, only use them if the controller is
  657                  *  not reporting fatal status.
  658                  */
  659                 nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
  660                     nvme_abort_complete, tr);
  661         } else
  662                 nvme_ctrlr_reset(ctrlr);
  663 }
  664 
  665 void
  666 nvme_qpair_submit_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr)
  667 {
  668         struct nvme_request     *req;
  669         struct nvme_controller  *ctrlr;
  670 
  671         mtx_assert(&qpair->lock, MA_OWNED);
  672 
  673         req = tr->req;
  674         req->cmd.cid = tr->cid;
  675         qpair->act_tr[tr->cid] = tr;
  676         ctrlr = qpair->ctrlr;
  677 
  678         if (req->timeout)
  679 #if __FreeBSD_version >= 800030
  680                 callout_reset_curcpu(&tr->timer, ctrlr->timeout_period * hz,
  681                     nvme_timeout, tr);
  682 #else
  683                 callout_reset(&tr->timer, ctrlr->timeout_period * hz,
  684                     nvme_timeout, tr);
  685 #endif
  686 
  687         /* Copy the command from the tracker to the submission queue. */
  688         memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd));
  689 
  690         if (++qpair->sq_tail == qpair->num_entries)
  691                 qpair->sq_tail = 0;
  692 
  693         wmb();
  694         nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl,
  695             qpair->sq_tail);
  696 
  697         qpair->num_cmds++;
  698 }
  699 
  700 static void
  701 nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
  702 {
  703         struct nvme_tracker     *tr = arg;
  704         uint32_t                cur_nseg;
  705 
  706         /*
  707          * If the mapping operation failed, return immediately.  The caller
  708          *  is responsible for detecting the error status and failing the
  709          *  tracker manually.
  710          */
  711         if (error != 0)
  712                 return;
  713 
  714         /*
  715          * Note that we specified PAGE_SIZE for alignment and max
  716          *  segment size when creating the bus dma tags.  So here
  717          *  we can safely just transfer each segment to its
  718          *  associated PRP entry.
  719          */
  720         tr->req->cmd.prp1 = seg[0].ds_addr;
  721 
  722         if (nseg == 2) {
  723                 tr->req->cmd.prp2 = seg[1].ds_addr;
  724         } else if (nseg > 2) {
  725                 cur_nseg = 1;
  726                 tr->req->cmd.prp2 = (uint64_t)tr->prp_bus_addr;
  727                 while (cur_nseg < nseg) {
  728                         tr->prp[cur_nseg-1] =
  729                             (uint64_t)seg[cur_nseg].ds_addr;
  730                         cur_nseg++;
  731                 }
  732         }
  733 
  734         nvme_qpair_submit_tracker(tr->qpair, tr);
  735 }
  736 
  737 static void
  738 _nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
  739 {
  740         struct nvme_tracker     *tr;
  741         int                     err = 0;
  742 
  743         mtx_assert(&qpair->lock, MA_OWNED);
  744 
  745         tr = TAILQ_FIRST(&qpair->free_tr);
  746         req->qpair = qpair;
  747 
  748         if (tr == NULL || !qpair->is_enabled) {
  749                 /*
  750                  * No tracker is available, or the qpair is disabled due to
  751                  *  an in-progress controller-level reset or controller
  752                  *  failure.
  753                  */
  754 
  755                 if (qpair->ctrlr->is_failed) {
  756                         /*
  757                          * The controller has failed.  Post the request to a
  758                          *  task where it will be aborted, so that we do not
  759                          *  invoke the request's callback in the context
  760                          *  of the submission.
  761                          */
  762                         nvme_ctrlr_post_failed_request(qpair->ctrlr, req);
  763                 } else {
  764                         /*
  765                          * Put the request on the qpair's request queue to be
  766                          *  processed when a tracker frees up via a command
  767                          *  completion or when the controller reset is
  768                          *  completed.
  769                          */
  770                         STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
  771                 }
  772                 return;
  773         }
  774 
  775         TAILQ_REMOVE(&qpair->free_tr, tr, tailq);
  776         TAILQ_INSERT_TAIL(&qpair->outstanding_tr, tr, tailq);
  777         tr->req = req;
  778 
  779         switch (req->type) {
  780         case NVME_REQUEST_VADDR:
  781                 KASSERT(req->payload_size <= qpair->ctrlr->max_xfer_size,
  782                     ("payload_size (%d) exceeds max_xfer_size (%d)\n",
  783                     req->payload_size, qpair->ctrlr->max_xfer_size));
  784                 err = bus_dmamap_load(tr->qpair->dma_tag, tr->payload_dma_map,
  785                     req->u.payload, req->payload_size, nvme_payload_map, tr, 0);
  786                 if (err != 0)
  787                         nvme_printf(qpair->ctrlr,
  788                             "bus_dmamap_load returned 0x%x!\n", err);
  789                 break;
  790         case NVME_REQUEST_NULL:
  791                 nvme_qpair_submit_tracker(tr->qpair, tr);
  792                 break;
  793 #ifdef NVME_UNMAPPED_BIO_SUPPORT
  794         case NVME_REQUEST_BIO:
  795                 KASSERT(req->u.bio->bio_bcount <= qpair->ctrlr->max_xfer_size,
  796                     ("bio->bio_bcount (%jd) exceeds max_xfer_size (%d)\n",
  797                     (intmax_t)req->u.bio->bio_bcount,
  798                     qpair->ctrlr->max_xfer_size));
  799                 err = bus_dmamap_load_bio(tr->qpair->dma_tag,
  800                     tr->payload_dma_map, req->u.bio, nvme_payload_map, tr, 0);
  801                 if (err != 0)
  802                         nvme_printf(qpair->ctrlr,
  803                             "bus_dmamap_load_bio returned 0x%x!\n", err);
  804                 break;
  805 #endif
  806         default:
  807                 panic("unknown nvme request type 0x%x\n", req->type);
  808                 break;
  809         }
  810 
  811         if (err != 0) {
  812                 /*
  813                  * The dmamap operation failed, so we manually fail the
  814                  *  tracker here with DATA_TRANSFER_ERROR status.
  815                  *
  816                  * nvme_qpair_manual_complete_tracker must not be called
  817                  *  with the qpair lock held.
  818                  */
  819                 mtx_unlock(&qpair->lock);
  820                 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
  821                     NVME_SC_DATA_TRANSFER_ERROR, 1 /* do not retry */, TRUE);
  822                 mtx_lock(&qpair->lock);
  823         }
  824 }
  825 
  826 void
  827 nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
  828 {
  829 
  830         mtx_lock(&qpair->lock);
  831         _nvme_qpair_submit_request(qpair, req);
  832         mtx_unlock(&qpair->lock);
  833 }
  834 
  835 static void
  836 nvme_qpair_enable(struct nvme_qpair *qpair)
  837 {
  838 
  839         qpair->is_enabled = TRUE;
  840 }
  841 
  842 void
  843 nvme_qpair_reset(struct nvme_qpair *qpair)
  844 {
  845 
  846         qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0;
  847 
  848         /*
  849          * First time through the completion queue, HW will set phase
  850          *  bit on completions to 1.  So set this to 1 here, indicating
  851          *  we're looking for a 1 to know which entries have completed.
  852          *  we'll toggle the bit each time when the completion queue
  853          *  rolls over.
  854          */
  855         qpair->phase = 1;
  856 
  857         memset(qpair->cmd, 0,
  858             qpair->num_entries * sizeof(struct nvme_command));
  859         memset(qpair->cpl, 0,
  860             qpair->num_entries * sizeof(struct nvme_completion));
  861 }
  862 
  863 void
  864 nvme_admin_qpair_enable(struct nvme_qpair *qpair)
  865 {
  866         struct nvme_tracker             *tr;
  867         struct nvme_tracker             *tr_temp;
  868 
  869         /*
  870          * Manually abort each outstanding admin command.  Do not retry
  871          *  admin commands found here, since they will be left over from
  872          *  a controller reset and its likely the context in which the
  873          *  command was issued no longer applies.
  874          */
  875         TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) {
  876                 nvme_printf(qpair->ctrlr,
  877                     "aborting outstanding admin command\n");
  878                 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
  879                     NVME_SC_ABORTED_BY_REQUEST, 1 /* do not retry */, TRUE);
  880         }
  881 
  882         nvme_qpair_enable(qpair);
  883 }
  884 
  885 void
  886 nvme_io_qpair_enable(struct nvme_qpair *qpair)
  887 {
  888         STAILQ_HEAD(, nvme_request)     temp;
  889         struct nvme_tracker             *tr;
  890         struct nvme_tracker             *tr_temp;
  891         struct nvme_request             *req;
  892 
  893         /*
  894          * Manually abort each outstanding I/O.  This normally results in a
  895          *  retry, unless the retry count on the associated request has
  896          *  reached its limit.
  897          */
  898         TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) {
  899                 nvme_printf(qpair->ctrlr, "aborting outstanding i/o\n");
  900                 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
  901                     NVME_SC_ABORTED_BY_REQUEST, 0, TRUE);
  902         }
  903 
  904         mtx_lock(&qpair->lock);
  905 
  906         nvme_qpair_enable(qpair);
  907 
  908         STAILQ_INIT(&temp);
  909         STAILQ_SWAP(&qpair->queued_req, &temp, nvme_request);
  910 
  911         while (!STAILQ_EMPTY(&temp)) {
  912                 req = STAILQ_FIRST(&temp);
  913                 STAILQ_REMOVE_HEAD(&temp, stailq);
  914                 nvme_printf(qpair->ctrlr, "resubmitting queued i/o\n");
  915                 nvme_qpair_print_command(qpair, &req->cmd);
  916                 _nvme_qpair_submit_request(qpair, req);
  917         }
  918 
  919         mtx_unlock(&qpair->lock);
  920 }
  921 
  922 static void
  923 nvme_qpair_disable(struct nvme_qpair *qpair)
  924 {
  925         struct nvme_tracker *tr;
  926 
  927         qpair->is_enabled = FALSE;
  928         mtx_lock(&qpair->lock);
  929         TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq)
  930                 callout_stop(&tr->timer);
  931         mtx_unlock(&qpair->lock);
  932 }
  933 
  934 void
  935 nvme_admin_qpair_disable(struct nvme_qpair *qpair)
  936 {
  937 
  938         nvme_qpair_disable(qpair);
  939         nvme_admin_qpair_abort_aers(qpair);
  940 }
  941 
  942 void
  943 nvme_io_qpair_disable(struct nvme_qpair *qpair)
  944 {
  945 
  946         nvme_qpair_disable(qpair);
  947 }
  948 
  949 void
  950 nvme_qpair_fail(struct nvme_qpair *qpair)
  951 {
  952         struct nvme_tracker             *tr;
  953         struct nvme_request             *req;
  954 
  955         mtx_lock(&qpair->lock);
  956 
  957         while (!STAILQ_EMPTY(&qpair->queued_req)) {
  958                 req = STAILQ_FIRST(&qpair->queued_req);
  959                 STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
  960                 nvme_printf(qpair->ctrlr, "failing queued i/o\n");
  961                 mtx_unlock(&qpair->lock);
  962                 nvme_qpair_manual_complete_request(qpair, req, NVME_SCT_GENERIC,
  963                     NVME_SC_ABORTED_BY_REQUEST, TRUE);
  964                 mtx_lock(&qpair->lock);
  965         }
  966 
  967         /* Manually abort each outstanding I/O. */
  968         while (!TAILQ_EMPTY(&qpair->outstanding_tr)) {
  969                 tr = TAILQ_FIRST(&qpair->outstanding_tr);
  970                 /*
  971                  * Do not remove the tracker.  The abort_tracker path will
  972                  *  do that for us.
  973                  */
  974                 nvme_printf(qpair->ctrlr, "failing outstanding i/o\n");
  975                 mtx_unlock(&qpair->lock);
  976                 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
  977                     NVME_SC_ABORTED_BY_REQUEST, 1 /* do not retry */, TRUE);
  978                 mtx_lock(&qpair->lock);
  979         }
  980 
  981         mtx_unlock(&qpair->lock);
  982 }
  983 

Cache object: 7dcd1409e4a7fbab0feb96ed98defb98


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.