The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/nvme/nvme_qpair.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (C) 2012-2014 Intel Corporation
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD$");
   29 
   30 #include <sys/param.h>
   31 #include <sys/bus.h>
   32 
   33 #include <dev/pci/pcivar.h>
   34 
   35 #include "nvme_private.h"
   36 
   37 typedef enum error_print { ERROR_PRINT_NONE, ERROR_PRINT_NO_RETRY, ERROR_PRINT_ALL } error_print_t;
   38 #define DO_NOT_RETRY    1
   39 
   40 static void     _nvme_qpair_submit_request(struct nvme_qpair *qpair,
   41                                            struct nvme_request *req);
   42 static void     nvme_qpair_destroy(struct nvme_qpair *qpair);
   43 
   44 struct nvme_opcode_string {
   45 
   46         uint16_t        opc;
   47         const char *    str;
   48 };
   49 
   50 static struct nvme_opcode_string admin_opcode[] = {
   51         { NVME_OPC_DELETE_IO_SQ, "DELETE IO SQ" },
   52         { NVME_OPC_CREATE_IO_SQ, "CREATE IO SQ" },
   53         { NVME_OPC_GET_LOG_PAGE, "GET LOG PAGE" },
   54         { NVME_OPC_DELETE_IO_CQ, "DELETE IO CQ" },
   55         { NVME_OPC_CREATE_IO_CQ, "CREATE IO CQ" },
   56         { NVME_OPC_IDENTIFY, "IDENTIFY" },
   57         { NVME_OPC_ABORT, "ABORT" },
   58         { NVME_OPC_SET_FEATURES, "SET FEATURES" },
   59         { NVME_OPC_GET_FEATURES, "GET FEATURES" },
   60         { NVME_OPC_ASYNC_EVENT_REQUEST, "ASYNC EVENT REQUEST" },
   61         { NVME_OPC_FIRMWARE_ACTIVATE, "FIRMWARE ACTIVATE" },
   62         { NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD, "FIRMWARE IMAGE DOWNLOAD" },
   63         { NVME_OPC_DEVICE_SELF_TEST, "DEVICE SELF-TEST" },
   64         { NVME_OPC_NAMESPACE_ATTACHMENT, "NAMESPACE ATTACHMENT" },
   65         { NVME_OPC_KEEP_ALIVE, "KEEP ALIVE" },
   66         { NVME_OPC_DIRECTIVE_SEND, "DIRECTIVE SEND" },
   67         { NVME_OPC_DIRECTIVE_RECEIVE, "DIRECTIVE RECEIVE" },
   68         { NVME_OPC_VIRTUALIZATION_MANAGEMENT, "VIRTUALIZATION MANAGEMENT" },
   69         { NVME_OPC_NVME_MI_SEND, "NVME-MI SEND" },
   70         { NVME_OPC_NVME_MI_RECEIVE, "NVME-MI RECEIVE" },
   71         { NVME_OPC_DOORBELL_BUFFER_CONFIG, "DOORBELL BUFFER CONFIG" },
   72         { NVME_OPC_FORMAT_NVM, "FORMAT NVM" },
   73         { NVME_OPC_SECURITY_SEND, "SECURITY SEND" },
   74         { NVME_OPC_SECURITY_RECEIVE, "SECURITY RECEIVE" },
   75         { NVME_OPC_SANITIZE, "SANITIZE" },
   76         { 0xFFFF, "ADMIN COMMAND" }
   77 };
   78 
   79 static struct nvme_opcode_string io_opcode[] = {
   80         { NVME_OPC_FLUSH, "FLUSH" },
   81         { NVME_OPC_WRITE, "WRITE" },
   82         { NVME_OPC_READ, "READ" },
   83         { NVME_OPC_WRITE_UNCORRECTABLE, "WRITE UNCORRECTABLE" },
   84         { NVME_OPC_COMPARE, "COMPARE" },
   85         { NVME_OPC_WRITE_ZEROES, "WRITE ZEROES" },
   86         { NVME_OPC_DATASET_MANAGEMENT, "DATASET MANAGEMENT" },
   87         { NVME_OPC_RESERVATION_REGISTER, "RESERVATION REGISTER" },
   88         { NVME_OPC_RESERVATION_REPORT, "RESERVATION REPORT" },
   89         { NVME_OPC_RESERVATION_ACQUIRE, "RESERVATION ACQUIRE" },
   90         { NVME_OPC_RESERVATION_RELEASE, "RESERVATION RELEASE" },
   91         { 0xFFFF, "IO COMMAND" }
   92 };
   93 
   94 static const char *
   95 get_admin_opcode_string(uint16_t opc)
   96 {
   97         struct nvme_opcode_string *entry;
   98 
   99         entry = admin_opcode;
  100 
  101         while (entry->opc != 0xFFFF) {
  102                 if (entry->opc == opc)
  103                         return (entry->str);
  104                 entry++;
  105         }
  106         return (entry->str);
  107 }
  108 
  109 static const char *
  110 get_io_opcode_string(uint16_t opc)
  111 {
  112         struct nvme_opcode_string *entry;
  113 
  114         entry = io_opcode;
  115 
  116         while (entry->opc != 0xFFFF) {
  117                 if (entry->opc == opc)
  118                         return (entry->str);
  119                 entry++;
  120         }
  121         return (entry->str);
  122 }
  123 
  124 
  125 static void
  126 nvme_admin_qpair_print_command(struct nvme_qpair *qpair,
  127     struct nvme_command *cmd)
  128 {
  129 
  130         nvme_printf(qpair->ctrlr, "%s (%02x) sqid:%d cid:%d nsid:%x "
  131             "cdw10:%08x cdw11:%08x\n",
  132             get_admin_opcode_string(cmd->opc), cmd->opc, qpair->id, cmd->cid,
  133             cmd->nsid, cmd->cdw10, cmd->cdw11);
  134 }
  135 
  136 static void
  137 nvme_io_qpair_print_command(struct nvme_qpair *qpair,
  138     struct nvme_command *cmd)
  139 {
  140 
  141         switch (cmd->opc) {
  142         case NVME_OPC_WRITE:
  143         case NVME_OPC_READ:
  144         case NVME_OPC_WRITE_UNCORRECTABLE:
  145         case NVME_OPC_COMPARE:
  146         case NVME_OPC_WRITE_ZEROES:
  147                 nvme_printf(qpair->ctrlr, "%s sqid:%d cid:%d nsid:%d "
  148                     "lba:%llu len:%d\n",
  149                     get_io_opcode_string(cmd->opc), qpair->id, cmd->cid,
  150                     cmd->nsid,
  151                     ((unsigned long long)cmd->cdw11 << 32) + cmd->cdw10,
  152                     (cmd->cdw12 & 0xFFFF) + 1);
  153                 break;
  154         case NVME_OPC_FLUSH:
  155         case NVME_OPC_DATASET_MANAGEMENT:
  156         case NVME_OPC_RESERVATION_REGISTER:
  157         case NVME_OPC_RESERVATION_REPORT:
  158         case NVME_OPC_RESERVATION_ACQUIRE:
  159         case NVME_OPC_RESERVATION_RELEASE:
  160                 nvme_printf(qpair->ctrlr, "%s sqid:%d cid:%d nsid:%d\n",
  161                     get_io_opcode_string(cmd->opc), qpair->id, cmd->cid,
  162                     cmd->nsid);
  163                 break;
  164         default:
  165                 nvme_printf(qpair->ctrlr, "%s (%02x) sqid:%d cid:%d nsid:%d\n",
  166                     get_io_opcode_string(cmd->opc), cmd->opc, qpair->id,
  167                     cmd->cid, cmd->nsid);
  168                 break;
  169         }
  170 }
  171 
  172 static void
  173 nvme_qpair_print_command(struct nvme_qpair *qpair, struct nvme_command *cmd)
  174 {
  175         if (qpair->id == 0)
  176                 nvme_admin_qpair_print_command(qpair, cmd);
  177         else
  178                 nvme_io_qpair_print_command(qpair, cmd);
  179 }
  180 
  181 struct nvme_status_string {
  182 
  183         uint16_t        sc;
  184         const char *    str;
  185 };
  186 
  187 static struct nvme_status_string generic_status[] = {
  188         { NVME_SC_SUCCESS, "SUCCESS" },
  189         { NVME_SC_INVALID_OPCODE, "INVALID OPCODE" },
  190         { NVME_SC_INVALID_FIELD, "INVALID_FIELD" },
  191         { NVME_SC_COMMAND_ID_CONFLICT, "COMMAND ID CONFLICT" },
  192         { NVME_SC_DATA_TRANSFER_ERROR, "DATA TRANSFER ERROR" },
  193         { NVME_SC_ABORTED_POWER_LOSS, "ABORTED - POWER LOSS" },
  194         { NVME_SC_INTERNAL_DEVICE_ERROR, "INTERNAL DEVICE ERROR" },
  195         { NVME_SC_ABORTED_BY_REQUEST, "ABORTED - BY REQUEST" },
  196         { NVME_SC_ABORTED_SQ_DELETION, "ABORTED - SQ DELETION" },
  197         { NVME_SC_ABORTED_FAILED_FUSED, "ABORTED - FAILED FUSED" },
  198         { NVME_SC_ABORTED_MISSING_FUSED, "ABORTED - MISSING FUSED" },
  199         { NVME_SC_INVALID_NAMESPACE_OR_FORMAT, "INVALID NAMESPACE OR FORMAT" },
  200         { NVME_SC_COMMAND_SEQUENCE_ERROR, "COMMAND SEQUENCE ERROR" },
  201         { NVME_SC_INVALID_SGL_SEGMENT_DESCR, "INVALID SGL SEGMENT DESCRIPTOR" },
  202         { NVME_SC_INVALID_NUMBER_OF_SGL_DESCR, "INVALID NUMBER OF SGL DESCRIPTORS" },
  203         { NVME_SC_DATA_SGL_LENGTH_INVALID, "DATA SGL LENGTH INVALID" },
  204         { NVME_SC_METADATA_SGL_LENGTH_INVALID, "METADATA SGL LENGTH INVALID" },
  205         { NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID, "SGL DESCRIPTOR TYPE INVALID" },
  206         { NVME_SC_INVALID_USE_OF_CMB, "INVALID USE OF CONTROLLER MEMORY BUFFER" },
  207         { NVME_SC_PRP_OFFET_INVALID, "PRP OFFET INVALID" },
  208         { NVME_SC_ATOMIC_WRITE_UNIT_EXCEEDED, "ATOMIC WRITE UNIT EXCEEDED" },
  209         { NVME_SC_OPERATION_DENIED, "OPERATION DENIED" },
  210         { NVME_SC_SGL_OFFSET_INVALID, "SGL OFFSET INVALID" },
  211         { NVME_SC_HOST_ID_INCONSISTENT_FORMAT, "HOST IDENTIFIER INCONSISTENT FORMAT" },
  212         { NVME_SC_KEEP_ALIVE_TIMEOUT_EXPIRED, "KEEP ALIVE TIMEOUT EXPIRED" },
  213         { NVME_SC_KEEP_ALIVE_TIMEOUT_INVALID, "KEEP ALIVE TIMEOUT INVALID" },
  214         { NVME_SC_ABORTED_DUE_TO_PREEMPT, "COMMAND ABORTED DUE TO PREEMPT AND ABORT" },
  215         { NVME_SC_SANITIZE_FAILED, "SANITIZE FAILED" },
  216         { NVME_SC_SANITIZE_IN_PROGRESS, "SANITIZE IN PROGRESS" },
  217         { NVME_SC_SGL_DATA_BLOCK_GRAN_INVALID, "SGL_DATA_BLOCK_GRANULARITY_INVALID" },
  218         { NVME_SC_NOT_SUPPORTED_IN_CMB, "COMMAND NOT SUPPORTED FOR QUEUE IN CMB" },
  219 
  220         { NVME_SC_LBA_OUT_OF_RANGE, "LBA OUT OF RANGE" },
  221         { NVME_SC_CAPACITY_EXCEEDED, "CAPACITY EXCEEDED" },
  222         { NVME_SC_NAMESPACE_NOT_READY, "NAMESPACE NOT READY" },
  223         { NVME_SC_RESERVATION_CONFLICT, "RESERVATION CONFLICT" },
  224         { NVME_SC_FORMAT_IN_PROGRESS, "FORMAT IN PROGRESS" },
  225         { 0xFFFF, "GENERIC" }
  226 };
  227 
  228 static struct nvme_status_string command_specific_status[] = {
  229         { NVME_SC_COMPLETION_QUEUE_INVALID, "INVALID COMPLETION QUEUE" },
  230         { NVME_SC_INVALID_QUEUE_IDENTIFIER, "INVALID QUEUE IDENTIFIER" },
  231         { NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED, "MAX QUEUE SIZE EXCEEDED" },
  232         { NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED, "ABORT CMD LIMIT EXCEEDED" },
  233         { NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED, "ASYNC LIMIT EXCEEDED" },
  234         { NVME_SC_INVALID_FIRMWARE_SLOT, "INVALID FIRMWARE SLOT" },
  235         { NVME_SC_INVALID_FIRMWARE_IMAGE, "INVALID FIRMWARE IMAGE" },
  236         { NVME_SC_INVALID_INTERRUPT_VECTOR, "INVALID INTERRUPT VECTOR" },
  237         { NVME_SC_INVALID_LOG_PAGE, "INVALID LOG PAGE" },
  238         { NVME_SC_INVALID_FORMAT, "INVALID FORMAT" },
  239         { NVME_SC_FIRMWARE_REQUIRES_RESET, "FIRMWARE REQUIRES RESET" },
  240         { NVME_SC_INVALID_QUEUE_DELETION, "INVALID QUEUE DELETION" },
  241         { NVME_SC_FEATURE_NOT_SAVEABLE, "FEATURE IDENTIFIER NOT SAVEABLE" },
  242         { NVME_SC_FEATURE_NOT_CHANGEABLE, "FEATURE NOT CHANGEABLE" },
  243         { NVME_SC_FEATURE_NOT_NS_SPECIFIC, "FEATURE NOT NAMESPACE SPECIFIC" },
  244         { NVME_SC_FW_ACT_REQUIRES_NVMS_RESET, "FIRMWARE ACTIVATION REQUIRES NVM SUBSYSTEM RESET" },
  245         { NVME_SC_FW_ACT_REQUIRES_RESET, "FIRMWARE ACTIVATION REQUIRES RESET" },
  246         { NVME_SC_FW_ACT_REQUIRES_TIME, "FIRMWARE ACTIVATION REQUIRES MAXIMUM TIME VIOLATION" },
  247         { NVME_SC_FW_ACT_PROHIBITED, "FIRMWARE ACTIVATION PROHIBITED" },
  248         { NVME_SC_OVERLAPPING_RANGE, "OVERLAPPING RANGE" },
  249         { NVME_SC_NS_INSUFFICIENT_CAPACITY, "NAMESPACE INSUFFICIENT CAPACITY" },
  250         { NVME_SC_NS_ID_UNAVAILABLE, "NAMESPACE IDENTIFIER UNAVAILABLE" },
  251         { NVME_SC_NS_ALREADY_ATTACHED, "NAMESPACE ALREADY ATTACHED" },
  252         { NVME_SC_NS_IS_PRIVATE, "NAMESPACE IS PRIVATE" },
  253         { NVME_SC_NS_NOT_ATTACHED, "NS NOT ATTACHED" },
  254         { NVME_SC_THIN_PROV_NOT_SUPPORTED, "THIN PROVISIONING NOT SUPPORTED" },
  255         { NVME_SC_CTRLR_LIST_INVALID, "CONTROLLER LIST INVALID" },
  256         { NVME_SC_SELT_TEST_IN_PROGRESS, "DEVICE SELT-TEST IN PROGRESS" },
  257         { NVME_SC_BOOT_PART_WRITE_PROHIB, "BOOT PARTITION WRITE PROHIBITED" },
  258         { NVME_SC_INVALID_CTRLR_ID, "INVALID CONTROLLER IDENTIFIER" },
  259         { NVME_SC_INVALID_SEC_CTRLR_STATE, "INVALID SECONDARY CONTROLLER STATE" },
  260         { NVME_SC_INVALID_NUM_OF_CTRLR_RESRC, "INVALID NUMBER OF CONTROLLER RESOURCES" },
  261         { NVME_SC_INVALID_RESOURCE_ID, "INVALID RESOURCE IDENTIFIER" },
  262 
  263         { NVME_SC_CONFLICTING_ATTRIBUTES, "CONFLICTING ATTRIBUTES" },
  264         { NVME_SC_INVALID_PROTECTION_INFO, "INVALID PROTECTION INFO" },
  265         { NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE, "WRITE TO RO PAGE" },
  266         { 0xFFFF, "COMMAND SPECIFIC" }
  267 };
  268 
  269 static struct nvme_status_string media_error_status[] = {
  270         { NVME_SC_WRITE_FAULTS, "WRITE FAULTS" },
  271         { NVME_SC_UNRECOVERED_READ_ERROR, "UNRECOVERED READ ERROR" },
  272         { NVME_SC_GUARD_CHECK_ERROR, "GUARD CHECK ERROR" },
  273         { NVME_SC_APPLICATION_TAG_CHECK_ERROR, "APPLICATION TAG CHECK ERROR" },
  274         { NVME_SC_REFERENCE_TAG_CHECK_ERROR, "REFERENCE TAG CHECK ERROR" },
  275         { NVME_SC_COMPARE_FAILURE, "COMPARE FAILURE" },
  276         { NVME_SC_ACCESS_DENIED, "ACCESS DENIED" },
  277         { NVME_SC_DEALLOCATED_OR_UNWRITTEN, "DEALLOCATED OR UNWRITTEN LOGICAL BLOCK" },
  278         { 0xFFFF, "MEDIA ERROR" }
  279 };
  280 
  281 static const char *
  282 get_status_string(uint16_t sct, uint16_t sc)
  283 {
  284         struct nvme_status_string *entry;
  285 
  286         switch (sct) {
  287         case NVME_SCT_GENERIC:
  288                 entry = generic_status;
  289                 break;
  290         case NVME_SCT_COMMAND_SPECIFIC:
  291                 entry = command_specific_status;
  292                 break;
  293         case NVME_SCT_MEDIA_ERROR:
  294                 entry = media_error_status;
  295                 break;
  296         case NVME_SCT_VENDOR_SPECIFIC:
  297                 return ("VENDOR SPECIFIC");
  298         default:
  299                 return ("RESERVED");
  300         }
  301 
  302         while (entry->sc != 0xFFFF) {
  303                 if (entry->sc == sc)
  304                         return (entry->str);
  305                 entry++;
  306         }
  307         return (entry->str);
  308 }
  309 
  310 static void
  311 nvme_qpair_print_completion(struct nvme_qpair *qpair, 
  312     struct nvme_completion *cpl)
  313 {
  314         nvme_printf(qpair->ctrlr, "%s (%02x/%02x) sqid:%d cid:%d cdw0:%x\n",
  315             get_status_string(cpl->status.sct, cpl->status.sc),
  316             cpl->status.sct, cpl->status.sc, cpl->sqid, cpl->cid, cpl->cdw0);
  317 }
  318 
  319 static boolean_t
  320 nvme_completion_is_retry(const struct nvme_completion *cpl)
  321 {
  322         /*
  323          * TODO: spec is not clear how commands that are aborted due
  324          *  to TLER will be marked.  So for now, it seems
  325          *  NAMESPACE_NOT_READY is the only case where we should
  326          *  look at the DNR bit. Requests failed with ABORTED_BY_REQUEST
  327          *  set the DNR bit correctly since the driver controls that.
  328          */
  329         switch (cpl->status.sct) {
  330         case NVME_SCT_GENERIC:
  331                 switch (cpl->status.sc) {
  332                 case NVME_SC_ABORTED_BY_REQUEST:
  333                 case NVME_SC_NAMESPACE_NOT_READY:
  334                         if (cpl->status.dnr)
  335                                 return (0);
  336                         else
  337                                 return (1);
  338                 case NVME_SC_INVALID_OPCODE:
  339                 case NVME_SC_INVALID_FIELD:
  340                 case NVME_SC_COMMAND_ID_CONFLICT:
  341                 case NVME_SC_DATA_TRANSFER_ERROR:
  342                 case NVME_SC_ABORTED_POWER_LOSS:
  343                 case NVME_SC_INTERNAL_DEVICE_ERROR:
  344                 case NVME_SC_ABORTED_SQ_DELETION:
  345                 case NVME_SC_ABORTED_FAILED_FUSED:
  346                 case NVME_SC_ABORTED_MISSING_FUSED:
  347                 case NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
  348                 case NVME_SC_COMMAND_SEQUENCE_ERROR:
  349                 case NVME_SC_LBA_OUT_OF_RANGE:
  350                 case NVME_SC_CAPACITY_EXCEEDED:
  351                 default:
  352                         return (0);
  353                 }
  354         case NVME_SCT_COMMAND_SPECIFIC:
  355         case NVME_SCT_MEDIA_ERROR:
  356         case NVME_SCT_VENDOR_SPECIFIC:
  357         default:
  358                 return (0);
  359         }
  360 }
  361 
  362 static void
  363 nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
  364     struct nvme_completion *cpl, error_print_t print_on_error)
  365 {
  366         struct nvme_request     *req;
  367         boolean_t               retry, error;
  368 
  369         req = tr->req;
  370         error = nvme_completion_is_error(cpl);
  371         retry = error && nvme_completion_is_retry(cpl) &&
  372            req->retries < nvme_retry_count;
  373 
  374         if (error && (print_on_error == ERROR_PRINT_ALL ||
  375                 (!retry && print_on_error == ERROR_PRINT_NO_RETRY))) {
  376                 nvme_qpair_print_command(qpair, &req->cmd);
  377                 nvme_qpair_print_completion(qpair, cpl);
  378         }
  379 
  380         qpair->act_tr[cpl->cid] = NULL;
  381 
  382         KASSERT(cpl->cid == req->cmd.cid, ("cpl cid does not match cmd cid\n"));
  383 
  384         if (req->cb_fn && !retry)
  385                 req->cb_fn(req->cb_arg, cpl);
  386 
  387         mtx_lock(&qpair->lock);
  388         callout_stop(&tr->timer);
  389 
  390         if (retry) {
  391                 req->retries++;
  392                 nvme_qpair_submit_tracker(qpair, tr);
  393         } else {
  394                 if (req->type != NVME_REQUEST_NULL) {
  395                         bus_dmamap_sync(qpair->dma_tag_payload,
  396                             tr->payload_dma_map,
  397                             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
  398                         bus_dmamap_unload(qpair->dma_tag_payload,
  399                             tr->payload_dma_map);
  400                 }
  401 
  402                 nvme_free_request(req);
  403                 tr->req = NULL;
  404 
  405                 TAILQ_REMOVE(&qpair->outstanding_tr, tr, tailq);
  406                 TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq);
  407 
  408                 /*
  409                  * If the controller is in the middle of resetting, don't
  410                  *  try to submit queued requests here - let the reset logic
  411                  *  handle that instead.
  412                  */
  413                 if (!STAILQ_EMPTY(&qpair->queued_req) &&
  414                     !qpair->ctrlr->is_resetting) {
  415                         req = STAILQ_FIRST(&qpair->queued_req);
  416                         STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
  417                         _nvme_qpair_submit_request(qpair, req);
  418                 }
  419         }
  420 
  421         mtx_unlock(&qpair->lock);
  422 }
  423 
  424 static void
  425 nvme_qpair_manual_complete_tracker(struct nvme_qpair *qpair,
  426     struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr,
  427     error_print_t print_on_error)
  428 {
  429         struct nvme_completion  cpl;
  430 
  431         memset(&cpl, 0, sizeof(cpl));
  432         cpl.sqid = qpair->id;
  433         cpl.cid = tr->cid;
  434         cpl.status.sct = sct;
  435         cpl.status.sc = sc;
  436         cpl.status.dnr = dnr;
  437         nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error);
  438 }
  439 
  440 void
  441 nvme_qpair_manual_complete_request(struct nvme_qpair *qpair,
  442     struct nvme_request *req, uint32_t sct, uint32_t sc)
  443 {
  444         struct nvme_completion  cpl;
  445         boolean_t               error;
  446 
  447         memset(&cpl, 0, sizeof(cpl));
  448         cpl.sqid = qpair->id;
  449         cpl.status.sct = sct;
  450         cpl.status.sc = sc;
  451 
  452         error = nvme_completion_is_error(&cpl);
  453 
  454         if (error) {
  455                 nvme_qpair_print_command(qpair, &req->cmd);
  456                 nvme_qpair_print_completion(qpair, &cpl);
  457         }
  458 
  459         if (req->cb_fn)
  460                 req->cb_fn(req->cb_arg, &cpl);
  461 
  462         nvme_free_request(req);
  463 }
  464 
  465 bool
  466 nvme_qpair_process_completions(struct nvme_qpair *qpair)
  467 {
  468         struct nvme_tracker     *tr;
  469         struct nvme_completion  *cpl;
  470         int done = 0;
  471 
  472         qpair->num_intr_handler_calls++;
  473 
  474         if (!qpair->is_enabled)
  475                 /*
  476                  * qpair is not enabled, likely because a controller reset is
  477                  *  is in progress.  Ignore the interrupt - any I/O that was
  478                  *  associated with this interrupt will get retried when the
  479                  *  reset is complete.
  480                  */
  481                 return (false);
  482 
  483         bus_dmamap_sync(qpair->dma_tag, qpair->queuemem_map,
  484             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
  485         while (1) {
  486                 cpl = &qpair->cpl[qpair->cq_head];
  487 
  488                 if (cpl->status.p != qpair->phase)
  489                         break;
  490 
  491                 tr = qpair->act_tr[cpl->cid];
  492 
  493                 if (tr != NULL) {
  494                         nvme_qpair_complete_tracker(qpair, tr, cpl, ERROR_PRINT_ALL);
  495                         qpair->sq_head = cpl->sqhd;
  496                         done++;
  497                 } else {
  498                         nvme_printf(qpair->ctrlr, 
  499                             "cpl does not map to outstanding cmd\n");
  500                         nvme_dump_completion(cpl);
  501                         KASSERT(0, ("received completion for unknown cmd\n"));
  502                 }
  503 
  504                 if (++qpair->cq_head == qpair->num_entries) {
  505                         qpair->cq_head = 0;
  506                         qpair->phase = !qpair->phase;
  507                 }
  508 
  509                 nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl,
  510                     qpair->cq_head);
  511         }
  512         return (done != 0);
  513 }
  514 
  515 static void
  516 nvme_qpair_msix_handler(void *arg)
  517 {
  518         struct nvme_qpair *qpair = arg;
  519 
  520         nvme_qpair_process_completions(qpair);
  521 }
  522 
  523 int
  524 nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
  525     uint16_t vector, uint32_t num_entries, uint32_t num_trackers,
  526     struct nvme_controller *ctrlr)
  527 {
  528         struct nvme_tracker     *tr;
  529         size_t                  cmdsz, cplsz, prpsz, allocsz, prpmemsz;
  530         uint64_t                queuemem_phys, prpmem_phys, list_phys;
  531         uint8_t                 *queuemem, *prpmem, *prp_list;
  532         int                     i, err;
  533 
  534         qpair->id = id;
  535         qpair->vector = vector;
  536         qpair->num_entries = num_entries;
  537         qpair->num_trackers = num_trackers;
  538         qpair->ctrlr = ctrlr;
  539 
  540         if (ctrlr->msix_enabled) {
  541 
  542                 /*
  543                  * MSI-X vector resource IDs start at 1, so we add one to
  544                  *  the queue's vector to get the corresponding rid to use.
  545                  */
  546                 qpair->rid = vector + 1;
  547 
  548                 qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
  549                     &qpair->rid, RF_ACTIVE);
  550                 bus_setup_intr(ctrlr->dev, qpair->res,
  551                     INTR_TYPE_MISC | INTR_MPSAFE, NULL,
  552                     nvme_qpair_msix_handler, qpair, &qpair->tag);
  553                 if (id == 0) {
  554                         bus_describe_intr(ctrlr->dev, qpair->res, qpair->tag,
  555                             "admin");
  556                 } else {
  557                         bus_describe_intr(ctrlr->dev, qpair->res, qpair->tag,
  558                             "io%d", id - 1);
  559                 }
  560         }
  561 
  562         mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF);
  563 
  564         /* Note: NVMe PRP format is restricted to 4-byte alignment. */
  565         err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
  566             4, PAGE_SIZE, BUS_SPACE_MAXADDR,
  567             BUS_SPACE_MAXADDR, NULL, NULL, NVME_MAX_XFER_SIZE,
  568             (NVME_MAX_XFER_SIZE/PAGE_SIZE)+1, PAGE_SIZE, 0,
  569             NULL, NULL, &qpair->dma_tag_payload);
  570         if (err != 0) {
  571                 nvme_printf(ctrlr, "payload tag create failed %d\n", err);
  572                 goto out;
  573         }
  574 
  575         /*
  576          * Each component must be page aligned, and individual PRP lists
  577          * cannot cross a page boundary.
  578          */
  579         cmdsz = qpair->num_entries * sizeof(struct nvme_command);
  580         cmdsz = roundup2(cmdsz, PAGE_SIZE);
  581         cplsz = qpair->num_entries * sizeof(struct nvme_completion);
  582         cplsz = roundup2(cplsz, PAGE_SIZE);
  583         prpsz = sizeof(uint64_t) * NVME_MAX_PRP_LIST_ENTRIES;;
  584         prpmemsz = qpair->num_trackers * prpsz;
  585         allocsz = cmdsz + cplsz + prpmemsz;
  586 
  587         err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
  588             PAGE_SIZE, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
  589             allocsz, 1, allocsz, 0, NULL, NULL, &qpair->dma_tag);
  590         if (err != 0) {
  591                 nvme_printf(ctrlr, "tag create failed %d\n", err);
  592                 goto out;
  593         }
  594 
  595         if (bus_dmamem_alloc(qpair->dma_tag, (void **)&queuemem,
  596             BUS_DMA_NOWAIT, &qpair->queuemem_map)) {
  597                 nvme_printf(ctrlr, "failed to alloc qpair memory\n");
  598                 goto out;
  599         }
  600 
  601         if (bus_dmamap_load(qpair->dma_tag, qpair->queuemem_map,
  602             queuemem, allocsz, nvme_single_map, &queuemem_phys, 0) != 0) {
  603                 nvme_printf(ctrlr, "failed to load qpair memory\n");
  604                 goto out;
  605         }
  606 
  607         qpair->num_cmds = 0;
  608         qpair->num_intr_handler_calls = 0;
  609         qpair->cmd = (struct nvme_command *)queuemem;
  610         qpair->cpl = (struct nvme_completion *)(queuemem + cmdsz);
  611         prpmem = (uint8_t *)(queuemem + cmdsz + cplsz);
  612         qpair->cmd_bus_addr = queuemem_phys;
  613         qpair->cpl_bus_addr = queuemem_phys + cmdsz;
  614         prpmem_phys = queuemem_phys + cmdsz + cplsz;
  615 
  616         qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl);
  617         qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl);
  618 
  619         TAILQ_INIT(&qpair->free_tr);
  620         TAILQ_INIT(&qpair->outstanding_tr);
  621         STAILQ_INIT(&qpair->queued_req);
  622 
  623         list_phys = prpmem_phys;
  624         prp_list = prpmem;
  625         for (i = 0; i < qpair->num_trackers; i++) {
  626 
  627                 if (list_phys + prpsz > prpmem_phys + prpmemsz) {
  628                         qpair->num_trackers = i;
  629                         break;
  630                 }
  631 
  632                 /*
  633                  * Make sure that the PRP list for this tracker doesn't
  634                  * overflow to another page.
  635                  */
  636                 if (trunc_page(list_phys) !=
  637                     trunc_page(list_phys + prpsz - 1)) {
  638                         list_phys = roundup2(list_phys, PAGE_SIZE);
  639                         prp_list =
  640                             (uint8_t *)roundup2((uintptr_t)prp_list, PAGE_SIZE);
  641                 }
  642 
  643                 tr = malloc(sizeof(*tr), M_NVME, M_ZERO | M_WAITOK);
  644                 bus_dmamap_create(qpair->dma_tag_payload, 0,
  645                     &tr->payload_dma_map);
  646                 callout_init(&tr->timer, 1);
  647                 tr->cid = i;
  648                 tr->qpair = qpair;
  649                 tr->prp = (uint64_t *)prp_list;
  650                 tr->prp_bus_addr = list_phys;
  651                 TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq);
  652                 list_phys += prpsz;
  653                 prp_list += prpsz;
  654         }
  655 
  656         if (qpair->num_trackers == 0) {
  657                 nvme_printf(ctrlr, "failed to allocate enough trackers\n");
  658                 goto out;
  659         }
  660 
  661         qpair->act_tr = malloc(sizeof(struct nvme_tracker *) *
  662             qpair->num_entries, M_NVME, M_ZERO | M_WAITOK);
  663         return (0);
  664 
  665 out:
  666         nvme_qpair_destroy(qpair);
  667         return (ENOMEM);
  668 }
  669 
  670 static void
  671 nvme_qpair_destroy(struct nvme_qpair *qpair)
  672 {
  673         struct nvme_tracker     *tr;
  674 
  675         if (qpair->tag)
  676                 bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag);
  677 
  678         if (mtx_initialized(&qpair->lock))
  679                 mtx_destroy(&qpair->lock);
  680 
  681         if (qpair->res)
  682                 bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ,
  683                     rman_get_rid(qpair->res), qpair->res);
  684 
  685         if (qpair->cmd != NULL) {
  686                 bus_dmamap_unload(qpair->dma_tag, qpair->queuemem_map);
  687                 bus_dmamem_free(qpair->dma_tag, qpair->cmd,
  688                     qpair->queuemem_map);
  689         }
  690 
  691         if (qpair->act_tr)
  692                 free(qpair->act_tr, M_NVME);
  693 
  694         while (!TAILQ_EMPTY(&qpair->free_tr)) {
  695                 tr = TAILQ_FIRST(&qpair->free_tr);
  696                 TAILQ_REMOVE(&qpair->free_tr, tr, tailq);
  697                 bus_dmamap_destroy(qpair->dma_tag_payload,
  698                     tr->payload_dma_map);
  699                 free(tr, M_NVME);
  700         }
  701 
  702         if (qpair->dma_tag)
  703                 bus_dma_tag_destroy(qpair->dma_tag);
  704 
  705         if (qpair->dma_tag_payload)
  706                 bus_dma_tag_destroy(qpair->dma_tag_payload);
  707 }
  708 
  709 static void
  710 nvme_admin_qpair_abort_aers(struct nvme_qpair *qpair)
  711 {
  712         struct nvme_tracker     *tr;
  713 
  714         tr = TAILQ_FIRST(&qpair->outstanding_tr);
  715         while (tr != NULL) {
  716                 if (tr->req->cmd.opc == NVME_OPC_ASYNC_EVENT_REQUEST) {
  717                         nvme_qpair_manual_complete_tracker(qpair, tr,
  718                             NVME_SCT_GENERIC, NVME_SC_ABORTED_SQ_DELETION, 0,
  719                             ERROR_PRINT_NONE);
  720                         tr = TAILQ_FIRST(&qpair->outstanding_tr);
  721                 } else {
  722                         tr = TAILQ_NEXT(tr, tailq);
  723                 }
  724         }
  725 }
  726 
  727 void
  728 nvme_admin_qpair_destroy(struct nvme_qpair *qpair)
  729 {
  730 
  731         nvme_admin_qpair_abort_aers(qpair);
  732         nvme_qpair_destroy(qpair);
  733 }
  734 
  735 void
  736 nvme_io_qpair_destroy(struct nvme_qpair *qpair)
  737 {
  738 
  739         nvme_qpair_destroy(qpair);
  740 }
  741 
  742 static void
  743 nvme_abort_complete(void *arg, const struct nvme_completion *status)
  744 {
  745         struct nvme_tracker     *tr = arg;
  746 
  747         /*
  748          * If cdw0 == 1, the controller was not able to abort the command
  749          *  we requested.  We still need to check the active tracker array,
  750          *  to cover race where I/O timed out at same time controller was
  751          *  completing the I/O.
  752          */
  753         if (status->cdw0 == 1 && tr->qpair->act_tr[tr->cid] != NULL) {
  754                 /*
  755                  * An I/O has timed out, and the controller was unable to
  756                  *  abort it for some reason.  Construct a fake completion
  757                  *  status, and then complete the I/O's tracker manually.
  758                  */
  759                 nvme_printf(tr->qpair->ctrlr,
  760                     "abort command failed, aborting command manually\n");
  761                 nvme_qpair_manual_complete_tracker(tr->qpair, tr,
  762                     NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, 0, ERROR_PRINT_ALL);
  763         }
  764 }
  765 
  766 static void
  767 nvme_timeout(void *arg)
  768 {
  769         struct nvme_tracker     *tr = arg;
  770         struct nvme_qpair       *qpair = tr->qpair;
  771         struct nvme_controller  *ctrlr = qpair->ctrlr;
  772         union csts_register     csts;
  773 
  774         /*
  775          * Read csts to get value of cfs - controller fatal status.
  776          * If no fatal status, try to call the completion routine, and
  777          * if completes transactions, report a missed interrupt and
  778          * return (this may need to be rate limited). Otherwise, if
  779          * aborts are enabled and the controller is not reporting
  780          * fatal status, abort the command. Otherwise, just reset the
  781          * controller and hope for the best.
  782          */
  783         csts.raw = nvme_mmio_read_4(ctrlr, csts);
  784         if (csts.bits.cfs == 0 && nvme_qpair_process_completions(qpair)) {
  785                 nvme_printf(ctrlr, "Missing interrupt\n");
  786                 return;
  787         }
  788         if (ctrlr->enable_aborts && csts.bits.cfs == 0) {
  789                 nvme_printf(ctrlr, "Aborting command due to a timeout.\n");
  790                 nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
  791                     nvme_abort_complete, tr);
  792         } else {
  793                 nvme_printf(ctrlr, "Resetting controller due to a timeout%s.\n",
  794                     (csts.raw == 0xffffffff) ? " and possible hot unplug" :
  795                     (csts.bits.cfs ? " and fatal error status" : ""));
  796                 nvme_ctrlr_reset(ctrlr);
  797         }
  798 }
  799 
  800 void
  801 nvme_qpair_submit_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr)
  802 {
  803         struct nvme_request     *req;
  804         struct nvme_controller  *ctrlr;
  805 
  806         mtx_assert(&qpair->lock, MA_OWNED);
  807 
  808         req = tr->req;
  809         req->cmd.cid = tr->cid;
  810         qpair->act_tr[tr->cid] = tr;
  811         ctrlr = qpair->ctrlr;
  812 
  813         if (req->timeout)
  814                 callout_reset_curcpu(&tr->timer, ctrlr->timeout_period * hz,
  815                     nvme_timeout, tr);
  816 
  817         /* Copy the command from the tracker to the submission queue. */
  818         memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd));
  819 
  820         if (++qpair->sq_tail == qpair->num_entries)
  821                 qpair->sq_tail = 0;
  822 
  823         bus_dmamap_sync(qpair->dma_tag, qpair->queuemem_map,
  824             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
  825 #ifndef __powerpc__
  826         /*
  827          * powerpc's bus_dmamap_sync() already includes a heavyweight sync, but
  828          * no other archs do.
  829          */
  830         wmb();
  831 #endif
  832 
  833         nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl,
  834             qpair->sq_tail);
  835 
  836         qpair->num_cmds++;
  837 }
  838 
  839 static void
  840 nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
  841 {
  842         struct nvme_tracker     *tr = arg;
  843         uint32_t                cur_nseg;
  844 
  845         /*
  846          * If the mapping operation failed, return immediately.  The caller
  847          *  is responsible for detecting the error status and failing the
  848          *  tracker manually.
  849          */
  850         if (error != 0) {
  851                 nvme_printf(tr->qpair->ctrlr,
  852                     "nvme_payload_map err %d\n", error);
  853                 return;
  854         }
  855 
  856         /*
  857          * Note that we specified PAGE_SIZE for alignment and max
  858          *  segment size when creating the bus dma tags.  So here
  859          *  we can safely just transfer each segment to its
  860          *  associated PRP entry.
  861          */
  862         tr->req->cmd.prp1 = seg[0].ds_addr;
  863 
  864         if (nseg == 2) {
  865                 tr->req->cmd.prp2 = seg[1].ds_addr;
  866         } else if (nseg > 2) {
  867                 cur_nseg = 1;
  868                 tr->req->cmd.prp2 = (uint64_t)tr->prp_bus_addr;
  869                 while (cur_nseg < nseg) {
  870                         tr->prp[cur_nseg-1] =
  871                             (uint64_t)seg[cur_nseg].ds_addr;
  872                         cur_nseg++;
  873                 }
  874         } else {
  875                 /*
  876                  * prp2 should not be used by the controller
  877                  *  since there is only one segment, but set
  878                  *  to 0 just to be safe.
  879                  */
  880                 tr->req->cmd.prp2 = 0;
  881         }
  882 
  883         bus_dmamap_sync(tr->qpair->dma_tag_payload, tr->payload_dma_map,
  884             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
  885         nvme_qpair_submit_tracker(tr->qpair, tr);
  886 }
  887 
  888 static void
  889 _nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
  890 {
  891         struct nvme_tracker     *tr;
  892         int                     err = 0;
  893 
  894         mtx_assert(&qpair->lock, MA_OWNED);
  895 
  896         tr = TAILQ_FIRST(&qpair->free_tr);
  897         req->qpair = qpair;
  898 
  899         if (tr == NULL || !qpair->is_enabled) {
  900                 /*
  901                  * No tracker is available, or the qpair is disabled due to
  902                  *  an in-progress controller-level reset or controller
  903                  *  failure.
  904                  */
  905 
  906                 if (qpair->ctrlr->is_failed) {
  907                         /*
  908                          * The controller has failed.  Post the request to a
  909                          *  task where it will be aborted, so that we do not
  910                          *  invoke the request's callback in the context
  911                          *  of the submission.
  912                          */
  913                         nvme_ctrlr_post_failed_request(qpair->ctrlr, req);
  914                 } else {
  915                         /*
  916                          * Put the request on the qpair's request queue to be
  917                          *  processed when a tracker frees up via a command
  918                          *  completion or when the controller reset is
  919                          *  completed.
  920                          */
  921                         STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
  922                 }
  923                 return;
  924         }
  925 
  926         TAILQ_REMOVE(&qpair->free_tr, tr, tailq);
  927         TAILQ_INSERT_TAIL(&qpair->outstanding_tr, tr, tailq);
  928         tr->req = req;
  929 
  930         switch (req->type) {
  931         case NVME_REQUEST_VADDR:
  932                 KASSERT(req->payload_size <= qpair->ctrlr->max_xfer_size,
  933                     ("payload_size (%d) exceeds max_xfer_size (%d)\n",
  934                     req->payload_size, qpair->ctrlr->max_xfer_size));
  935                 err = bus_dmamap_load(tr->qpair->dma_tag_payload,
  936                     tr->payload_dma_map, req->u.payload, req->payload_size,
  937                     nvme_payload_map, tr, 0);
  938                 if (err != 0)
  939                         nvme_printf(qpair->ctrlr,
  940                             "bus_dmamap_load returned 0x%x!\n", err);
  941                 break;
  942         case NVME_REQUEST_NULL:
  943                 nvme_qpair_submit_tracker(tr->qpair, tr);
  944                 break;
  945         case NVME_REQUEST_BIO:
  946                 KASSERT(req->u.bio->bio_bcount <= qpair->ctrlr->max_xfer_size,
  947                     ("bio->bio_bcount (%jd) exceeds max_xfer_size (%d)\n",
  948                     (intmax_t)req->u.bio->bio_bcount,
  949                     qpair->ctrlr->max_xfer_size));
  950                 err = bus_dmamap_load_bio(tr->qpair->dma_tag_payload,
  951                     tr->payload_dma_map, req->u.bio, nvme_payload_map, tr, 0);
  952                 if (err != 0)
  953                         nvme_printf(qpair->ctrlr,
  954                             "bus_dmamap_load_bio returned 0x%x!\n", err);
  955                 break;
  956         case NVME_REQUEST_CCB:
  957                 err = bus_dmamap_load_ccb(tr->qpair->dma_tag_payload,
  958                     tr->payload_dma_map, req->u.payload,
  959                     nvme_payload_map, tr, 0);
  960                 if (err != 0)
  961                         nvme_printf(qpair->ctrlr,
  962                             "bus_dmamap_load_ccb returned 0x%x!\n", err);
  963                 break;
  964         default:
  965                 panic("unknown nvme request type 0x%x\n", req->type);
  966                 break;
  967         }
  968 
  969         if (err != 0) {
  970                 /*
  971                  * The dmamap operation failed, so we manually fail the
  972                  *  tracker here with DATA_TRANSFER_ERROR status.
  973                  *
  974                  * nvme_qpair_manual_complete_tracker must not be called
  975                  *  with the qpair lock held.
  976                  */
  977                 mtx_unlock(&qpair->lock);
  978                 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
  979                     NVME_SC_DATA_TRANSFER_ERROR, DO_NOT_RETRY, ERROR_PRINT_ALL);
  980                 mtx_lock(&qpair->lock);
  981         }
  982 }
  983 
  984 void
  985 nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
  986 {
  987 
  988         mtx_lock(&qpair->lock);
  989         _nvme_qpair_submit_request(qpair, req);
  990         mtx_unlock(&qpair->lock);
  991 }
  992 
  993 static void
  994 nvme_qpair_enable(struct nvme_qpair *qpair)
  995 {
  996 
  997         qpair->is_enabled = TRUE;
  998 }
  999 
 1000 void
 1001 nvme_qpair_reset(struct nvme_qpair *qpair)
 1002 {
 1003 
 1004         qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0;
 1005 
 1006         /*
 1007          * First time through the completion queue, HW will set phase
 1008          *  bit on completions to 1.  So set this to 1 here, indicating
 1009          *  we're looking for a 1 to know which entries have completed.
 1010          *  we'll toggle the bit each time when the completion queue
 1011          *  rolls over.
 1012          */
 1013         qpair->phase = 1;
 1014 
 1015         memset(qpair->cmd, 0,
 1016             qpair->num_entries * sizeof(struct nvme_command));
 1017         memset(qpair->cpl, 0,
 1018             qpair->num_entries * sizeof(struct nvme_completion));
 1019 }
 1020 
 1021 void
 1022 nvme_admin_qpair_enable(struct nvme_qpair *qpair)
 1023 {
 1024         struct nvme_tracker             *tr;
 1025         struct nvme_tracker             *tr_temp;
 1026 
 1027         /*
 1028          * Manually abort each outstanding admin command.  Do not retry
 1029          *  admin commands found here, since they will be left over from
 1030          *  a controller reset and its likely the context in which the
 1031          *  command was issued no longer applies.
 1032          */
 1033         TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) {
 1034                 nvme_printf(qpair->ctrlr,
 1035                     "aborting outstanding admin command\n");
 1036                 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
 1037                     NVME_SC_ABORTED_BY_REQUEST, DO_NOT_RETRY, ERROR_PRINT_ALL);
 1038         }
 1039 
 1040         nvme_qpair_enable(qpair);
 1041 }
 1042 
 1043 void
 1044 nvme_io_qpair_enable(struct nvme_qpair *qpair)
 1045 {
 1046         STAILQ_HEAD(, nvme_request)     temp;
 1047         struct nvme_tracker             *tr;
 1048         struct nvme_tracker             *tr_temp;
 1049         struct nvme_request             *req;
 1050 
 1051         /*
 1052          * Manually abort each outstanding I/O.  This normally results in a
 1053          *  retry, unless the retry count on the associated request has
 1054          *  reached its limit.
 1055          */
 1056         TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) {
 1057                 nvme_printf(qpair->ctrlr, "aborting outstanding i/o\n");
 1058                 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
 1059                     NVME_SC_ABORTED_BY_REQUEST, 0, ERROR_PRINT_NO_RETRY);
 1060         }
 1061 
 1062         mtx_lock(&qpair->lock);
 1063 
 1064         nvme_qpair_enable(qpair);
 1065 
 1066         STAILQ_INIT(&temp);
 1067         STAILQ_SWAP(&qpair->queued_req, &temp, nvme_request);
 1068 
 1069         while (!STAILQ_EMPTY(&temp)) {
 1070                 req = STAILQ_FIRST(&temp);
 1071                 STAILQ_REMOVE_HEAD(&temp, stailq);
 1072                 nvme_printf(qpair->ctrlr, "resubmitting queued i/o\n");
 1073                 nvme_qpair_print_command(qpair, &req->cmd);
 1074                 _nvme_qpair_submit_request(qpair, req);
 1075         }
 1076 
 1077         mtx_unlock(&qpair->lock);
 1078 }
 1079 
 1080 static void
 1081 nvme_qpair_disable(struct nvme_qpair *qpair)
 1082 {
 1083         struct nvme_tracker *tr;
 1084 
 1085         qpair->is_enabled = FALSE;
 1086         mtx_lock(&qpair->lock);
 1087         TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq)
 1088                 callout_stop(&tr->timer);
 1089         mtx_unlock(&qpair->lock);
 1090 }
 1091 
 1092 void
 1093 nvme_admin_qpair_disable(struct nvme_qpair *qpair)
 1094 {
 1095 
 1096         nvme_qpair_disable(qpair);
 1097         nvme_admin_qpair_abort_aers(qpair);
 1098 }
 1099 
 1100 void
 1101 nvme_io_qpair_disable(struct nvme_qpair *qpair)
 1102 {
 1103 
 1104         nvme_qpair_disable(qpair);
 1105 }
 1106 
 1107 void
 1108 nvme_qpair_fail(struct nvme_qpair *qpair)
 1109 {
 1110         struct nvme_tracker             *tr;
 1111         struct nvme_request             *req;
 1112 
 1113         if (!mtx_initialized(&qpair->lock))
 1114                 return;
 1115 
 1116         mtx_lock(&qpair->lock);
 1117 
 1118         while (!STAILQ_EMPTY(&qpair->queued_req)) {
 1119                 req = STAILQ_FIRST(&qpair->queued_req);
 1120                 STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
 1121                 nvme_printf(qpair->ctrlr, "failing queued i/o\n");
 1122                 mtx_unlock(&qpair->lock);
 1123                 nvme_qpair_manual_complete_request(qpair, req, NVME_SCT_GENERIC,
 1124                     NVME_SC_ABORTED_BY_REQUEST);
 1125                 mtx_lock(&qpair->lock);
 1126         }
 1127 
 1128         /* Manually abort each outstanding I/O. */
 1129         while (!TAILQ_EMPTY(&qpair->outstanding_tr)) {
 1130                 tr = TAILQ_FIRST(&qpair->outstanding_tr);
 1131                 /*
 1132                  * Do not remove the tracker.  The abort_tracker path will
 1133                  *  do that for us.
 1134                  */
 1135                 nvme_printf(qpair->ctrlr, "failing outstanding i/o\n");
 1136                 mtx_unlock(&qpair->lock);
 1137                 nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
 1138                     NVME_SC_ABORTED_BY_REQUEST, DO_NOT_RETRY, ERROR_PRINT_ALL);
 1139                 mtx_lock(&qpair->lock);
 1140         }
 1141 
 1142         mtx_unlock(&qpair->lock);
 1143 }
 1144 

Cache object: 0927d86d29e16785bffb26a1e729779d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.