The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/mpr/mpr.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2009 Yahoo! Inc.
    3  * Copyright (c) 2011-2015 LSI Corp.
    4  * Copyright (c) 2013-2016 Avago Technologies
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  *
   28  * Avago Technologies (LSI) MPT-Fusion Host Adapter FreeBSD
   29  *
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD$");
   34 
   35 /* Communications core for Avago Technologies (LSI) MPT3 */
   36 
   37 /* TODO Move headers to mprvar */
   38 #include <sys/types.h>
   39 #include <sys/param.h>
   40 #include <sys/systm.h>
   41 #include <sys/kernel.h>
   42 #include <sys/selinfo.h>
   43 #include <sys/lock.h>
   44 #include <sys/mutex.h>
   45 #include <sys/module.h>
   46 #include <sys/bus.h>
   47 #include <sys/conf.h>
   48 #include <sys/bio.h>
   49 #include <sys/malloc.h>
   50 #include <sys/uio.h>
   51 #include <sys/sysctl.h>
   52 #include <sys/queue.h>
   53 #include <sys/kthread.h>
   54 #include <sys/taskqueue.h>
   55 #include <sys/endian.h>
   56 #include <sys/eventhandler.h>
   57 
   58 #include <machine/bus.h>
   59 #include <machine/resource.h>
   60 #include <sys/rman.h>
   61 #include <sys/proc.h>
   62 
   63 #include <dev/pci/pcivar.h>
   64 
   65 #include <cam/cam.h>
   66 #include <cam/cam_ccb.h>
   67 #include <cam/scsi/scsi_all.h>
   68 
   69 #include <dev/mpr/mpi/mpi2_type.h>
   70 #include <dev/mpr/mpi/mpi2.h>
   71 #include <dev/mpr/mpi/mpi2_ioc.h>
   72 #include <dev/mpr/mpi/mpi2_sas.h>
   73 #include <dev/mpr/mpi/mpi2_pci.h>
   74 #include <dev/mpr/mpi/mpi2_cnfg.h>
   75 #include <dev/mpr/mpi/mpi2_init.h>
   76 #include <dev/mpr/mpi/mpi2_tool.h>
   77 #include <dev/mpr/mpr_ioctl.h>
   78 #include <dev/mpr/mprvar.h>
   79 #include <dev/mpr/mpr_table.h>
   80 #include <dev/mpr/mpr_sas.h>
   81 
   82 static int mpr_diag_reset(struct mpr_softc *sc, int sleep_flag);
   83 static int mpr_init_queues(struct mpr_softc *sc);
   84 static int mpr_message_unit_reset(struct mpr_softc *sc, int sleep_flag);
   85 static int mpr_transition_operational(struct mpr_softc *sc);
   86 static int mpr_iocfacts_allocate(struct mpr_softc *sc, uint8_t attaching);
   87 static void mpr_iocfacts_free(struct mpr_softc *sc);
   88 static void mpr_startup(void *arg);
   89 static int mpr_send_iocinit(struct mpr_softc *sc);
   90 static int mpr_alloc_queues(struct mpr_softc *sc);
   91 static int mpr_alloc_replies(struct mpr_softc *sc);
   92 static int mpr_alloc_requests(struct mpr_softc *sc);
   93 static int mpr_alloc_nvme_prp_pages(struct mpr_softc *sc);
   94 static int mpr_attach_log(struct mpr_softc *sc);
   95 static __inline void mpr_complete_command(struct mpr_softc *sc,
   96     struct mpr_command *cm);
   97 static void mpr_dispatch_event(struct mpr_softc *sc, uintptr_t data,
   98     MPI2_EVENT_NOTIFICATION_REPLY *reply);
   99 static void mpr_config_complete(struct mpr_softc *sc, struct mpr_command *cm);
  100 static void mpr_periodic(void *);
  101 static int mpr_reregister_events(struct mpr_softc *sc);
  102 static void mpr_enqueue_request(struct mpr_softc *sc, struct mpr_command *cm);
  103 static int mpr_get_iocfacts(struct mpr_softc *sc, MPI2_IOC_FACTS_REPLY *facts);
  104 static int mpr_wait_db_ack(struct mpr_softc *sc, int timeout, int sleep_flag);
  105 SYSCTL_NODE(_hw, OID_AUTO, mpr, CTLFLAG_RD, 0, "MPR Driver Parameters");
  106 
  107 MALLOC_DEFINE(M_MPR, "mpr", "mpr driver memory");
  108 
  109 /*
  110  * Do a "Diagnostic Reset" aka a hard reset.  This should get the chip out of
  111  * any state and back to its initialization state machine.
  112  */
  113 static char mpt2_reset_magic[] = { 0x00, 0x0f, 0x04, 0x0b, 0x02, 0x07, 0x0d };
  114 
  115 /* 
  116  * Added this union to smoothly convert le64toh cm->cm_desc.Words.
  117  * Compiler only supports uint64_t to be passed as an argument.
  118  * Otherwise it will through this error:
  119  * "aggregate value used where an integer was expected"
  120  */
  121 typedef union _reply_descriptor {
  122         u64 word;
  123         struct {
  124                 u32 low;
  125                 u32 high;
  126         } u;
  127 } reply_descriptor, request_descriptor;
  128 
  129 /* Rate limit chain-fail messages to 1 per minute */
  130 static struct timeval mpr_chainfail_interval = { 60, 0 };
  131 
  132 /* 
  133  * sleep_flag can be either CAN_SLEEP or NO_SLEEP.
  134  * If this function is called from process context, it can sleep
  135  * and there is no harm to sleep, in case if this fuction is called
  136  * from Interrupt handler, we can not sleep and need NO_SLEEP flag set.
  137  * based on sleep flags driver will call either msleep, pause or DELAY.
  138  * msleep and pause are of same variant, but pause is used when mpr_mtx
  139  * is not hold by driver.
  140  */
  141 static int
  142 mpr_diag_reset(struct mpr_softc *sc,int sleep_flag)
  143 {
  144         uint32_t reg;
  145         int i, error, tries = 0;
  146         uint8_t first_wait_done = FALSE;
  147 
  148         mpr_dprint(sc, MPR_TRACE, "%s\n", __func__);
  149 
  150         /* Clear any pending interrupts */
  151         mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0);
  152 
  153         /*
  154          * Force NO_SLEEP for threads prohibited to sleep
  155          * e.a Thread from interrupt handler are prohibited to sleep.
  156          */
  157 #if __FreeBSD_version >= 1000029
  158         if (curthread->td_no_sleeping)
  159 #else //__FreeBSD_version < 1000029
  160         if (curthread->td_pflags & TDP_NOSLEEPING)
  161 #endif //__FreeBSD_version >= 1000029
  162                 sleep_flag = NO_SLEEP;
  163 
  164         /* Push the magic sequence */
  165         error = ETIMEDOUT;
  166         while (tries++ < 20) {
  167                 for (i = 0; i < sizeof(mpt2_reset_magic); i++)
  168                         mpr_regwrite(sc, MPI2_WRITE_SEQUENCE_OFFSET,
  169                             mpt2_reset_magic[i]);
  170 
  171                 /* wait 100 msec */
  172                 if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP)
  173                         msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0,
  174                             "mprdiag", hz/10);
  175                 else if (sleep_flag == CAN_SLEEP)
  176                         pause("mprdiag", hz/10);
  177                 else
  178                         DELAY(100 * 1000);
  179 
  180                 reg = mpr_regread(sc, MPI2_HOST_DIAGNOSTIC_OFFSET);
  181                 if (reg & MPI2_DIAG_DIAG_WRITE_ENABLE) {
  182                         error = 0;
  183                         break;
  184                 }
  185         }
  186         if (error)
  187                 return (error);
  188 
  189         /* Send the actual reset.  XXX need to refresh the reg? */
  190         mpr_regwrite(sc, MPI2_HOST_DIAGNOSTIC_OFFSET,
  191             reg | MPI2_DIAG_RESET_ADAPTER);
  192 
  193         /* Wait up to 300 seconds in 50ms intervals */
  194         error = ETIMEDOUT;
  195         for (i = 0; i < 6000; i++) {
  196                 /*
  197                  * Wait 50 msec. If this is the first time through, wait 256
  198                  * msec to satisfy Diag Reset timing requirements.
  199                  */
  200                 if (first_wait_done) {
  201                         if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP)
  202                                 msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0,
  203                                     "mprdiag", hz/20);
  204                         else if (sleep_flag == CAN_SLEEP)
  205                                 pause("mprdiag", hz/20);
  206                         else
  207                                 DELAY(50 * 1000);
  208                 } else {
  209                         DELAY(256 * 1000);
  210                         first_wait_done = TRUE;
  211                 }
  212                 /*
  213                  * Check for the RESET_ADAPTER bit to be cleared first, then
  214                  * wait for the RESET state to be cleared, which takes a little
  215                  * longer.
  216                  */
  217                 reg = mpr_regread(sc, MPI2_HOST_DIAGNOSTIC_OFFSET);
  218                 if (reg & MPI2_DIAG_RESET_ADAPTER) {
  219                         continue;
  220                 }
  221                 reg = mpr_regread(sc, MPI2_DOORBELL_OFFSET);
  222                 if ((reg & MPI2_IOC_STATE_MASK) != MPI2_IOC_STATE_RESET) {
  223                         error = 0;
  224                         break;
  225                 }
  226         }
  227         if (error)
  228                 return (error);
  229 
  230         mpr_regwrite(sc, MPI2_WRITE_SEQUENCE_OFFSET, 0x0);
  231 
  232         return (0);
  233 }
  234 
  235 static int
  236 mpr_message_unit_reset(struct mpr_softc *sc, int sleep_flag)
  237 {
  238 
  239         MPR_FUNCTRACE(sc);
  240 
  241         mpr_regwrite(sc, MPI2_DOORBELL_OFFSET,
  242             MPI2_FUNCTION_IOC_MESSAGE_UNIT_RESET <<
  243             MPI2_DOORBELL_FUNCTION_SHIFT);
  244 
  245         if (mpr_wait_db_ack(sc, 5, sleep_flag) != 0) {
  246                 mpr_dprint(sc, MPR_FAULT, "Doorbell handshake failed : <%s>\n",
  247                                 __func__);
  248                 return (ETIMEDOUT);
  249         }
  250 
  251         return (0);
  252 }
  253 
  254 static int
  255 mpr_transition_ready(struct mpr_softc *sc)
  256 {
  257         uint32_t reg, state;
  258         int error, tries = 0;
  259         int sleep_flags;
  260 
  261         MPR_FUNCTRACE(sc);
  262         /* If we are in attach call, do not sleep */
  263         sleep_flags = (sc->mpr_flags & MPR_FLAGS_ATTACH_DONE)
  264             ? CAN_SLEEP : NO_SLEEP;
  265 
  266         error = 0;
  267         while (tries++ < 1200) {
  268                 reg = mpr_regread(sc, MPI2_DOORBELL_OFFSET);
  269                 mpr_dprint(sc, MPR_INIT, "Doorbell= 0x%x\n", reg);
  270 
  271                 /*
  272                  * Ensure the IOC is ready to talk.  If it's not, try
  273                  * resetting it.
  274                  */
  275                 if (reg & MPI2_DOORBELL_USED) {
  276                         mpr_diag_reset(sc, sleep_flags);
  277                         DELAY(50000);
  278                         continue;
  279                 }
  280 
  281                 /* Is the adapter owned by another peer? */
  282                 if ((reg & MPI2_DOORBELL_WHO_INIT_MASK) ==
  283                     (MPI2_WHOINIT_PCI_PEER << MPI2_DOORBELL_WHO_INIT_SHIFT)) {
  284                         device_printf(sc->mpr_dev, "IOC is under the control "
  285                             "of another peer host, aborting initialization.\n");
  286                         return (ENXIO);
  287                 }
  288                 
  289                 state = reg & MPI2_IOC_STATE_MASK;
  290                 if (state == MPI2_IOC_STATE_READY) {
  291                         /* Ready to go! */
  292                         error = 0;
  293                         break;
  294                 } else if (state == MPI2_IOC_STATE_FAULT) {
  295                         mpr_dprint(sc, MPR_FAULT, "IOC in fault state 0x%x\n",
  296                             state & MPI2_DOORBELL_FAULT_CODE_MASK);
  297                         mpr_diag_reset(sc, sleep_flags);
  298                 } else if (state == MPI2_IOC_STATE_OPERATIONAL) {
  299                         /* Need to take ownership */
  300                         mpr_message_unit_reset(sc, sleep_flags);
  301                 } else if (state == MPI2_IOC_STATE_RESET) {
  302                         /* Wait a bit, IOC might be in transition */
  303                         mpr_dprint(sc, MPR_FAULT,
  304                             "IOC in unexpected reset state\n");
  305                 } else {
  306                         mpr_dprint(sc, MPR_FAULT,
  307                             "IOC in unknown state 0x%x\n", state);
  308                         error = EINVAL;
  309                         break;
  310                 }
  311         
  312                 /* Wait 50ms for things to settle down. */
  313                 DELAY(50000);
  314         }
  315 
  316         if (error)
  317                 device_printf(sc->mpr_dev, "Cannot transition IOC to ready\n");
  318         return (error);
  319 }
  320 
  321 static int
  322 mpr_transition_operational(struct mpr_softc *sc)
  323 {
  324         uint32_t reg, state;
  325         int error;
  326 
  327         MPR_FUNCTRACE(sc);
  328 
  329         error = 0;
  330         reg = mpr_regread(sc, MPI2_DOORBELL_OFFSET);
  331         mpr_dprint(sc, MPR_INIT, "Doorbell= 0x%x\n", reg);
  332 
  333         state = reg & MPI2_IOC_STATE_MASK;
  334         if (state != MPI2_IOC_STATE_READY) {
  335                 if ((error = mpr_transition_ready(sc)) != 0) {
  336                         mpr_dprint(sc, MPR_FAULT, 
  337                             "%s failed to transition ready\n", __func__);
  338                         return (error);
  339                 }
  340         }
  341 
  342         error = mpr_send_iocinit(sc);
  343         return (error);
  344 }
  345 
  346 /*
  347  * This is called during attach and when re-initializing due to a Diag Reset.
  348  * IOC Facts is used to allocate many of the structures needed by the driver.
  349  * If called from attach, de-allocation is not required because the driver has
  350  * not allocated any structures yet, but if called from a Diag Reset, previously
  351  * allocated structures based on IOC Facts will need to be freed and re-
  352  * allocated bases on the latest IOC Facts.
  353  */
  354 static int
  355 mpr_iocfacts_allocate(struct mpr_softc *sc, uint8_t attaching)
  356 {
  357         int error;
  358         Mpi2IOCFactsReply_t saved_facts;
  359         uint8_t saved_mode, reallocating;
  360 
  361         mpr_dprint(sc, MPR_TRACE, "%s\n", __func__);
  362 
  363         /* Save old IOC Facts and then only reallocate if Facts have changed */
  364         if (!attaching) {
  365                 bcopy(sc->facts, &saved_facts, sizeof(MPI2_IOC_FACTS_REPLY));
  366         }
  367 
  368         /*
  369          * Get IOC Facts.  In all cases throughout this function, panic if doing
  370          * a re-initialization and only return the error if attaching so the OS
  371          * can handle it.
  372          */
  373         if ((error = mpr_get_iocfacts(sc, sc->facts)) != 0) {
  374                 if (attaching) {
  375                         mpr_dprint(sc, MPR_FAULT, "%s failed to get IOC Facts "
  376                             "with error %d\n", __func__, error);
  377                         return (error);
  378                 } else {
  379                         panic("%s failed to get IOC Facts with error %d\n",
  380                             __func__, error);
  381                 }
  382         }
  383 
  384         MPR_DPRINT_PAGE(sc, MPR_XINFO, iocfacts, sc->facts);
  385 
  386         snprintf(sc->fw_version, sizeof(sc->fw_version), 
  387             "%02d.%02d.%02d.%02d", 
  388             sc->facts->FWVersion.Struct.Major,
  389             sc->facts->FWVersion.Struct.Minor,
  390             sc->facts->FWVersion.Struct.Unit,
  391             sc->facts->FWVersion.Struct.Dev);
  392 
  393         mpr_printf(sc, "Firmware: %s, Driver: %s\n", sc->fw_version,
  394             MPR_DRIVER_VERSION);
  395         mpr_printf(sc, "IOCCapabilities: %b\n", sc->facts->IOCCapabilities,
  396             "\2" "\3ScsiTaskFull" "\4DiagTrace" "\5SnapBuf" "\6ExtBuf"
  397             "\7EEDP" "\10BiDirTarg" "\11Multicast" "\14TransRetry" "\15IR"
  398             "\16EventReplay" "\17RaidAccel" "\20MSIXIndex" "\21HostDisc"
  399             "\22FastPath" "\23RDPQArray" "\24AtomicReqDesc" "\25PCIeSRIOV");
  400 
  401         /*
  402          * If the chip doesn't support event replay then a hard reset will be
  403          * required to trigger a full discovery.  Do the reset here then
  404          * retransition to Ready.  A hard reset might have already been done,
  405          * but it doesn't hurt to do it again.  Only do this if attaching, not
  406          * for a Diag Reset.
  407          */
  408         if (attaching) {
  409                 if ((sc->facts->IOCCapabilities &
  410                     MPI2_IOCFACTS_CAPABILITY_EVENT_REPLAY) == 0) {
  411                         mpr_diag_reset(sc, NO_SLEEP);
  412                         if ((error = mpr_transition_ready(sc)) != 0) {
  413                                 mpr_dprint(sc, MPR_FAULT, "%s failed to "
  414                                     "transition to ready with error %d\n",
  415                                     __func__, error);
  416                                 return (error);
  417                         }
  418                 }
  419         }
  420 
  421         /*
  422          * Set flag if IR Firmware is loaded.  If the RAID Capability has
  423          * changed from the previous IOC Facts, log a warning, but only if
  424          * checking this after a Diag Reset and not during attach.
  425          */
  426         saved_mode = sc->ir_firmware;
  427         if (sc->facts->IOCCapabilities &
  428             MPI2_IOCFACTS_CAPABILITY_INTEGRATED_RAID)
  429                 sc->ir_firmware = 1;
  430         if (!attaching) {
  431                 if (sc->ir_firmware != saved_mode) {
  432                         mpr_dprint(sc, MPR_FAULT, "%s new IR/IT mode in IOC "
  433                             "Facts does not match previous mode\n", __func__);
  434                 }
  435         }
  436 
  437         /* Only deallocate and reallocate if relevant IOC Facts have changed */
  438         reallocating = FALSE;
  439         sc->mpr_flags &= ~MPR_FLAGS_REALLOCATED;
  440 
  441         if ((!attaching) &&
  442             ((saved_facts.MsgVersion != sc->facts->MsgVersion) ||
  443             (saved_facts.HeaderVersion != sc->facts->HeaderVersion) ||
  444             (saved_facts.MaxChainDepth != sc->facts->MaxChainDepth) ||
  445             (saved_facts.RequestCredit != sc->facts->RequestCredit) ||
  446             (saved_facts.ProductID != sc->facts->ProductID) ||
  447             (saved_facts.IOCCapabilities != sc->facts->IOCCapabilities) ||
  448             (saved_facts.IOCRequestFrameSize !=
  449             sc->facts->IOCRequestFrameSize) ||
  450             (saved_facts.IOCMaxChainSegmentSize !=
  451             sc->facts->IOCMaxChainSegmentSize) ||
  452             (saved_facts.MaxTargets != sc->facts->MaxTargets) ||
  453             (saved_facts.MaxSasExpanders != sc->facts->MaxSasExpanders) ||
  454             (saved_facts.MaxEnclosures != sc->facts->MaxEnclosures) ||
  455             (saved_facts.HighPriorityCredit != sc->facts->HighPriorityCredit) ||
  456             (saved_facts.MaxReplyDescriptorPostQueueDepth !=
  457             sc->facts->MaxReplyDescriptorPostQueueDepth) ||
  458             (saved_facts.ReplyFrameSize != sc->facts->ReplyFrameSize) ||
  459             (saved_facts.MaxVolumes != sc->facts->MaxVolumes) ||
  460             (saved_facts.MaxPersistentEntries !=
  461             sc->facts->MaxPersistentEntries))) {
  462                 reallocating = TRUE;
  463 
  464                 /* Record that we reallocated everything */
  465                 sc->mpr_flags |= MPR_FLAGS_REALLOCATED;
  466         }
  467 
  468         /*
  469          * Some things should be done if attaching or re-allocating after a Diag
  470          * Reset, but are not needed after a Diag Reset if the FW has not
  471          * changed.
  472          */
  473         if (attaching || reallocating) {
  474                 /*
  475                  * Check if controller supports FW diag buffers and set flag to
  476                  * enable each type.
  477                  */
  478                 if (sc->facts->IOCCapabilities &
  479                     MPI2_IOCFACTS_CAPABILITY_DIAG_TRACE_BUFFER)
  480                         sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_TRACE].
  481                             enabled = TRUE;
  482                 if (sc->facts->IOCCapabilities &
  483                     MPI2_IOCFACTS_CAPABILITY_SNAPSHOT_BUFFER)
  484                         sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_SNAPSHOT].
  485                             enabled = TRUE;
  486                 if (sc->facts->IOCCapabilities &
  487                     MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER)
  488                         sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_EXTENDED].
  489                             enabled = TRUE;
  490 
  491                 /*
  492                  * Set flags for some supported items.
  493                  */
  494                 if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EEDP)
  495                         sc->eedp_enabled = TRUE;
  496                 if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_TLR)
  497                         sc->control_TLR = TRUE;
  498                 if (sc->facts->IOCCapabilities &
  499                     MPI26_IOCFACTS_CAPABILITY_ATOMIC_REQ)
  500                         sc->atomic_desc_capable = TRUE;
  501 
  502                 /*
  503                  * Size the queues. Since the reply queues always need one free
  504                  * entry, we'll just deduct one reply message here.
  505                  */
  506                 sc->num_prireqs = MIN(MPR_PRI_REQ_FRAMES,
  507                     sc->facts->HighPriorityCredit);
  508                 sc->num_reqs = MIN(MPR_REQ_FRAMES, sc->facts->RequestCredit) +
  509                     sc->num_prireqs;
  510                 sc->num_replies = MIN(MPR_REPLY_FRAMES + MPR_EVT_REPLY_FRAMES,
  511                     sc->facts->MaxReplyDescriptorPostQueueDepth) - 1;
  512 
  513                 /*
  514                  * Initialize all Tail Queues
  515                  */
  516                 TAILQ_INIT(&sc->req_list);
  517                 TAILQ_INIT(&sc->high_priority_req_list);
  518                 TAILQ_INIT(&sc->chain_list);
  519                 TAILQ_INIT(&sc->prp_page_list);
  520                 TAILQ_INIT(&sc->tm_list);
  521         }
  522 
  523         /*
  524          * If doing a Diag Reset and the FW is significantly different
  525          * (reallocating will be set above in IOC Facts comparison), then all
  526          * buffers based on the IOC Facts will need to be freed before they are
  527          * reallocated.
  528          */
  529         if (reallocating) {
  530                 mpr_iocfacts_free(sc);
  531                 mprsas_realloc_targets(sc, saved_facts.MaxTargets +
  532                     saved_facts.MaxVolumes);
  533         }
  534 
  535         /*
  536          * Any deallocation has been completed.  Now start reallocating
  537          * if needed.  Will only need to reallocate if attaching or if the new
  538          * IOC Facts are different from the previous IOC Facts after a Diag
  539          * Reset. Targets have already been allocated above if needed.
  540          */
  541         if (attaching || reallocating) {
  542                 if (((error = mpr_alloc_queues(sc)) != 0) ||
  543                     ((error = mpr_alloc_replies(sc)) != 0) ||
  544                     ((error = mpr_alloc_requests(sc)) != 0)) {
  545                         if (attaching ) {
  546                                 mpr_dprint(sc, MPR_FAULT, "%s failed to alloc "
  547                                     "queues with error %d\n", __func__, error);
  548                                 mpr_free(sc);
  549                                 return (error);
  550                         } else {
  551                                 panic("%s failed to alloc queues with error "
  552                                     "%d\n", __func__, error);
  553                         }
  554                 }
  555         }
  556 
  557         /* Always initialize the queues */
  558         bzero(sc->free_queue, sc->fqdepth * 4);
  559         mpr_init_queues(sc);
  560 
  561         /*
  562          * Always get the chip out of the reset state, but only panic if not
  563          * attaching.  If attaching and there is an error, that is handled by
  564          * the OS.
  565          */
  566         error = mpr_transition_operational(sc);
  567         if (error != 0) {
  568                 if (attaching) {
  569                         mpr_printf(sc, "%s failed to transition to operational "
  570                             "with error %d\n", __func__, error);
  571                         mpr_free(sc);
  572                         return (error);
  573                 } else {
  574                         panic("%s failed to transition to operational with "
  575                             "error %d\n", __func__, error);
  576                 }
  577         }
  578 
  579         /*
  580          * Finish the queue initialization.
  581          * These are set here instead of in mpr_init_queues() because the
  582          * IOC resets these values during the state transition in
  583          * mpr_transition_operational().  The free index is set to 1
  584          * because the corresponding index in the IOC is set to 0, and the
  585          * IOC treats the queues as full if both are set to the same value.
  586          * Hence the reason that the queue can't hold all of the possible
  587          * replies.
  588          */
  589         sc->replypostindex = 0;
  590         mpr_regwrite(sc, MPI2_REPLY_FREE_HOST_INDEX_OFFSET, sc->replyfreeindex);
  591         mpr_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, 0);
  592 
  593         /*
  594          * Attach the subsystems so they can prepare their event masks.
  595          */
  596         /* XXX Should be dynamic so that IM/IR and user modules can attach */
  597         if (attaching) {
  598                 if (((error = mpr_attach_log(sc)) != 0) ||
  599                     ((error = mpr_attach_sas(sc)) != 0) ||
  600                     ((error = mpr_attach_user(sc)) != 0)) {
  601                         mpr_printf(sc, "%s failed to attach all subsystems: "
  602                             "error %d\n", __func__, error);
  603                         mpr_free(sc);
  604                         return (error);
  605                 }
  606 
  607                 if ((error = mpr_pci_setup_interrupts(sc)) != 0) {
  608                         mpr_printf(sc, "%s failed to setup interrupts\n",
  609                             __func__);
  610                         mpr_free(sc);
  611                         return (error);
  612                 }
  613         }
  614 
  615         return (error);
  616 }
  617 
  618 /*
  619  * This is called if memory is being free (during detach for example) and when
  620  * buffers need to be reallocated due to a Diag Reset.
  621  */
  622 static void
  623 mpr_iocfacts_free(struct mpr_softc *sc)
  624 {
  625         struct mpr_command *cm;
  626         int i;
  627 
  628         mpr_dprint(sc, MPR_TRACE, "%s\n", __func__);
  629 
  630         if (sc->free_busaddr != 0)
  631                 bus_dmamap_unload(sc->queues_dmat, sc->queues_map);
  632         if (sc->free_queue != NULL)
  633                 bus_dmamem_free(sc->queues_dmat, sc->free_queue,
  634                     sc->queues_map);
  635         if (sc->queues_dmat != NULL)
  636                 bus_dma_tag_destroy(sc->queues_dmat);
  637 
  638         if (sc->chain_busaddr != 0)
  639                 bus_dmamap_unload(sc->chain_dmat, sc->chain_map);
  640         if (sc->chain_frames != NULL)
  641                 bus_dmamem_free(sc->chain_dmat, sc->chain_frames,
  642                     sc->chain_map);
  643         if (sc->chain_dmat != NULL)
  644                 bus_dma_tag_destroy(sc->chain_dmat);
  645 
  646         if (sc->sense_busaddr != 0)
  647                 bus_dmamap_unload(sc->sense_dmat, sc->sense_map);
  648         if (sc->sense_frames != NULL)
  649                 bus_dmamem_free(sc->sense_dmat, sc->sense_frames,
  650                     sc->sense_map);
  651         if (sc->sense_dmat != NULL)
  652                 bus_dma_tag_destroy(sc->sense_dmat);
  653 
  654         if (sc->prp_page_busaddr != 0)
  655                 bus_dmamap_unload(sc->prp_page_dmat, sc->prp_page_map);
  656         if (sc->prp_pages != NULL)
  657                 bus_dmamem_free(sc->prp_page_dmat, sc->prp_pages,
  658                     sc->prp_page_map);
  659         if (sc->prp_page_dmat != NULL)
  660                 bus_dma_tag_destroy(sc->prp_page_dmat);
  661 
  662         if (sc->reply_busaddr != 0)
  663                 bus_dmamap_unload(sc->reply_dmat, sc->reply_map);
  664         if (sc->reply_frames != NULL)
  665                 bus_dmamem_free(sc->reply_dmat, sc->reply_frames,
  666                     sc->reply_map);
  667         if (sc->reply_dmat != NULL)
  668                 bus_dma_tag_destroy(sc->reply_dmat);
  669 
  670         if (sc->req_busaddr != 0)
  671                 bus_dmamap_unload(sc->req_dmat, sc->req_map);
  672         if (sc->req_frames != NULL)
  673                 bus_dmamem_free(sc->req_dmat, sc->req_frames, sc->req_map);
  674         if (sc->req_dmat != NULL)
  675                 bus_dma_tag_destroy(sc->req_dmat);
  676 
  677         if (sc->chains != NULL)
  678                 free(sc->chains, M_MPR);
  679         if (sc->prps != NULL)
  680                 free(sc->prps, M_MPR);
  681         if (sc->commands != NULL) {
  682                 for (i = 1; i < sc->num_reqs; i++) {
  683                         cm = &sc->commands[i];
  684                         bus_dmamap_destroy(sc->buffer_dmat, cm->cm_dmamap);
  685                 }
  686                 free(sc->commands, M_MPR);
  687         }
  688         if (sc->buffer_dmat != NULL)
  689                 bus_dma_tag_destroy(sc->buffer_dmat);
  690 }
  691 
  692 /* 
  693  * The terms diag reset and hard reset are used interchangeably in the MPI
  694  * docs to mean resetting the controller chip.  In this code diag reset
  695  * cleans everything up, and the hard reset function just sends the reset
  696  * sequence to the chip.  This should probably be refactored so that every
  697  * subsystem gets a reset notification of some sort, and can clean up
  698  * appropriately.
  699  */
  700 int
  701 mpr_reinit(struct mpr_softc *sc)
  702 {
  703         int error;
  704         struct mprsas_softc *sassc;
  705 
  706         sassc = sc->sassc;
  707 
  708         MPR_FUNCTRACE(sc);
  709 
  710         mtx_assert(&sc->mpr_mtx, MA_OWNED);
  711 
  712         if (sc->mpr_flags & MPR_FLAGS_DIAGRESET) {
  713                 mpr_dprint(sc, MPR_INIT, "%s reset already in progress\n",
  714                     __func__);
  715                 return 0;
  716         }
  717 
  718         mpr_dprint(sc, MPR_INFO, "Reinitializing controller,\n");
  719         /* make sure the completion callbacks can recognize they're getting
  720          * a NULL cm_reply due to a reset.
  721          */
  722         sc->mpr_flags |= MPR_FLAGS_DIAGRESET;
  723 
  724         /*
  725          * Mask interrupts here.
  726          */
  727         mpr_dprint(sc, MPR_INIT, "%s mask interrupts\n", __func__);
  728         mpr_mask_intr(sc);
  729 
  730         error = mpr_diag_reset(sc, CAN_SLEEP);
  731         if (error != 0) {
  732                 panic("%s hard reset failed with error %d\n", __func__, error);
  733         }
  734 
  735         /* Restore the PCI state, including the MSI-X registers */
  736         mpr_pci_restore(sc);
  737 
  738         /* Give the I/O subsystem special priority to get itself prepared */
  739         mprsas_handle_reinit(sc);
  740 
  741         /*
  742          * Get IOC Facts and allocate all structures based on this information.
  743          * The attach function will also call mpr_iocfacts_allocate at startup.
  744          * If relevant values have changed in IOC Facts, this function will free
  745          * all of the memory based on IOC Facts and reallocate that memory.
  746          */
  747         if ((error = mpr_iocfacts_allocate(sc, FALSE)) != 0) {
  748                 panic("%s IOC Facts based allocation failed with error %d\n",
  749                     __func__, error);
  750         }
  751 
  752         /*
  753          * Mapping structures will be re-allocated after getting IOC Page8, so
  754          * free these structures here.
  755          */
  756         mpr_mapping_exit(sc);
  757 
  758         /*
  759          * The static page function currently read is IOC Page8.  Others can be
  760          * added in future.  It's possible that the values in IOC Page8 have
  761          * changed after a Diag Reset due to user modification, so always read
  762          * these.  Interrupts are masked, so unmask them before getting config
  763          * pages.
  764          */
  765         mpr_unmask_intr(sc);
  766         sc->mpr_flags &= ~MPR_FLAGS_DIAGRESET;
  767         mpr_base_static_config_pages(sc);
  768 
  769         /*
  770          * Some mapping info is based in IOC Page8 data, so re-initialize the
  771          * mapping tables.
  772          */
  773         mpr_mapping_initialize(sc);
  774 
  775         /*
  776          * Restart will reload the event masks clobbered by the reset, and
  777          * then enable the port.
  778          */
  779         mpr_reregister_events(sc);
  780 
  781         /* the end of discovery will release the simq, so we're done. */
  782         mpr_dprint(sc, MPR_INFO, "%s finished sc %p post %u free %u\n", 
  783             __func__, sc, sc->replypostindex, sc->replyfreeindex);
  784         mprsas_release_simq_reinit(sassc);
  785 
  786         return 0;
  787 }
  788 
  789 /* Wait for the chip to ACK a word that we've put into its FIFO 
  790  * Wait for <timeout> seconds. In single loop wait for busy loop
  791  * for 500 microseconds.
  792  * Total is [ 0.5 * (2000 * <timeout>) ] in miliseconds.
  793  * */
  794 static int
  795 mpr_wait_db_ack(struct mpr_softc *sc, int timeout, int sleep_flag)
  796 {
  797         u32 cntdn, count;
  798         u32 int_status;
  799         u32 doorbell;
  800 
  801         count = 0;
  802         cntdn = (sleep_flag == CAN_SLEEP) ? 1000*timeout : 2000*timeout;
  803         do {
  804                 int_status = mpr_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET);
  805                 if (!(int_status & MPI2_HIS_SYS2IOC_DB_STATUS)) {
  806                         mpr_dprint(sc, MPR_INIT, "%s: successful count(%d), "
  807                             "timeout(%d)\n", __func__, count, timeout);
  808                         return 0;
  809                 } else if (int_status & MPI2_HIS_IOC2SYS_DB_STATUS) {
  810                         doorbell = mpr_regread(sc, MPI2_DOORBELL_OFFSET);
  811                         if ((doorbell & MPI2_IOC_STATE_MASK) ==
  812                             MPI2_IOC_STATE_FAULT) {
  813                                 mpr_dprint(sc, MPR_FAULT,
  814                                     "fault_state(0x%04x)!\n", doorbell);
  815                                 return (EFAULT);
  816                         }
  817                 } else if (int_status == 0xFFFFFFFF)
  818                         goto out;
  819                         
  820                 /*
  821                  * If it can sleep, sleep for 1 milisecond, else busy loop for
  822                  * 0.5 milisecond
  823                  */
  824                 if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP)
  825                         msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0, "mprdba",
  826                             hz/1000);
  827                 else if (sleep_flag == CAN_SLEEP)
  828                         pause("mprdba", hz/1000);
  829                 else
  830                         DELAY(500);
  831                 count++;
  832         } while (--cntdn);
  833 
  834 out:
  835         mpr_dprint(sc, MPR_FAULT, "%s: failed due to timeout count(%d), "
  836                 "int_status(%x)!\n", __func__, count, int_status);
  837         return (ETIMEDOUT);
  838 }
  839 
  840 /* Wait for the chip to signal that the next word in its FIFO can be fetched */
  841 static int
  842 mpr_wait_db_int(struct mpr_softc *sc)
  843 {
  844         int retry;
  845 
  846         for (retry = 0; retry < MPR_DB_MAX_WAIT; retry++) {
  847                 if ((mpr_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET) &
  848                     MPI2_HIS_IOC2SYS_DB_STATUS) != 0)
  849                         return (0);
  850                 DELAY(2000);
  851         }
  852         return (ETIMEDOUT);
  853 }
  854 
  855 /* Step through the synchronous command state machine, i.e. "Doorbell mode" */
  856 static int
  857 mpr_request_sync(struct mpr_softc *sc, void *req, MPI2_DEFAULT_REPLY *reply,
  858     int req_sz, int reply_sz, int timeout)
  859 {
  860         uint32_t *data32;
  861         uint16_t *data16;
  862         int i, count, ioc_sz, residual;
  863         int sleep_flags = CAN_SLEEP;
  864         
  865 #if __FreeBSD_version >= 1000029
  866         if (curthread->td_no_sleeping)
  867 #else //__FreeBSD_version < 1000029
  868         if (curthread->td_pflags & TDP_NOSLEEPING)
  869 #endif //__FreeBSD_version >= 1000029
  870                 sleep_flags = NO_SLEEP;
  871 
  872         /* Step 1 */
  873         mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0);
  874 
  875         /* Step 2 */
  876         if (mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED)
  877                 return (EBUSY);
  878 
  879         /* Step 3
  880          * Announce that a message is coming through the doorbell.  Messages
  881          * are pushed at 32bit words, so round up if needed.
  882          */
  883         count = (req_sz + 3) / 4;
  884         mpr_regwrite(sc, MPI2_DOORBELL_OFFSET,
  885             (MPI2_FUNCTION_HANDSHAKE << MPI2_DOORBELL_FUNCTION_SHIFT) |
  886             (count << MPI2_DOORBELL_ADD_DWORDS_SHIFT));
  887 
  888         /* Step 4 */
  889         if (mpr_wait_db_int(sc) ||
  890             (mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) == 0) {
  891                 mpr_dprint(sc, MPR_FAULT, "Doorbell failed to activate\n");
  892                 return (ENXIO);
  893         }
  894         mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0);
  895         if (mpr_wait_db_ack(sc, 5, sleep_flags) != 0) {
  896                 mpr_dprint(sc, MPR_FAULT, "Doorbell handshake failed\n");
  897                 return (ENXIO);
  898         }
  899 
  900         /* Step 5 */
  901         /* Clock out the message data synchronously in 32-bit dwords*/
  902         data32 = (uint32_t *)req;
  903         for (i = 0; i < count; i++) {
  904                 mpr_regwrite(sc, MPI2_DOORBELL_OFFSET, htole32(data32[i]));
  905                 if (mpr_wait_db_ack(sc, 5, sleep_flags) != 0) {
  906                         mpr_dprint(sc, MPR_FAULT,
  907                             "Timeout while writing doorbell\n");
  908                         return (ENXIO);
  909                 }
  910         }
  911 
  912         /* Step 6 */
  913         /* Clock in the reply in 16-bit words.  The total length of the
  914          * message is always in the 4th byte, so clock out the first 2 words
  915          * manually, then loop the rest.
  916          */
  917         data16 = (uint16_t *)reply;
  918         if (mpr_wait_db_int(sc) != 0) {
  919                 mpr_dprint(sc, MPR_FAULT, "Timeout reading doorbell 0\n");
  920                 return (ENXIO);
  921         }
  922         data16[0] =
  923             mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK;
  924         mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0);
  925         if (mpr_wait_db_int(sc) != 0) {
  926                 mpr_dprint(sc, MPR_FAULT, "Timeout reading doorbell 1\n");
  927                 return (ENXIO);
  928         }
  929         data16[1] =
  930             mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK;
  931         mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0);
  932 
  933         /* Number of 32bit words in the message */
  934         ioc_sz = reply->MsgLength;
  935 
  936         /*
  937          * Figure out how many 16bit words to clock in without overrunning.
  938          * The precision loss with dividing reply_sz can safely be
  939          * ignored because the messages can only be multiples of 32bits.
  940          */
  941         residual = 0;
  942         count = MIN((reply_sz / 4), ioc_sz) * 2;
  943         if (count < ioc_sz * 2) {
  944                 residual = ioc_sz * 2 - count;
  945                 mpr_dprint(sc, MPR_ERROR, "Driver error, throwing away %d "
  946                     "residual message words\n", residual);
  947         }
  948 
  949         for (i = 2; i < count; i++) {
  950                 if (mpr_wait_db_int(sc) != 0) {
  951                         mpr_dprint(sc, MPR_FAULT,
  952                             "Timeout reading doorbell %d\n", i);
  953                         return (ENXIO);
  954                 }
  955                 data16[i] = mpr_regread(sc, MPI2_DOORBELL_OFFSET) &
  956                     MPI2_DOORBELL_DATA_MASK;
  957                 mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0);
  958         }
  959 
  960         /*
  961          * Pull out residual words that won't fit into the provided buffer.
  962          * This keeps the chip from hanging due to a driver programming
  963          * error.
  964          */
  965         while (residual--) {
  966                 if (mpr_wait_db_int(sc) != 0) {
  967                         mpr_dprint(sc, MPR_FAULT, "Timeout reading doorbell\n");
  968                         return (ENXIO);
  969                 }
  970                 (void)mpr_regread(sc, MPI2_DOORBELL_OFFSET);
  971                 mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0);
  972         }
  973 
  974         /* Step 7 */
  975         if (mpr_wait_db_int(sc) != 0) {
  976                 mpr_dprint(sc, MPR_FAULT, "Timeout waiting to exit doorbell\n");
  977                 return (ENXIO);
  978         }
  979         if (mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED)
  980                 mpr_dprint(sc, MPR_FAULT, "Warning, doorbell still active\n");
  981         mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0);
  982 
  983         return (0);
  984 }
  985 
  986 static void
  987 mpr_enqueue_request(struct mpr_softc *sc, struct mpr_command *cm)
  988 {
  989         request_descriptor rd;
  990 
  991         MPR_FUNCTRACE(sc);
  992         mpr_dprint(sc, MPR_TRACE, "SMID %u cm %p ccb %p\n",
  993             cm->cm_desc.Default.SMID, cm, cm->cm_ccb);
  994 
  995         if (sc->mpr_flags & MPR_FLAGS_ATTACH_DONE && !(sc->mpr_flags &
  996             MPR_FLAGS_SHUTDOWN))
  997                 mtx_assert(&sc->mpr_mtx, MA_OWNED);
  998 
  999         if (++sc->io_cmds_active > sc->io_cmds_highwater)
 1000                 sc->io_cmds_highwater++;
 1001 
 1002         if (sc->atomic_desc_capable) {
 1003                 rd.u.low = cm->cm_desc.Words.Low;
 1004                 mpr_regwrite(sc, MPI26_ATOMIC_REQUEST_DESCRIPTOR_POST_OFFSET,
 1005                     rd.u.low);
 1006         } else {
 1007                 rd.u.low = cm->cm_desc.Words.Low;
 1008                 rd.u.high = cm->cm_desc.Words.High;
 1009                 rd.word = htole64(rd.word);
 1010                 mpr_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_LOW_OFFSET,
 1011                     rd.u.low);
 1012                 mpr_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_HIGH_OFFSET,
 1013                     rd.u.high);
 1014         }
 1015 }
 1016 
 1017 /*
 1018  * Just the FACTS, ma'am.
 1019  */
 1020 static int
 1021 mpr_get_iocfacts(struct mpr_softc *sc, MPI2_IOC_FACTS_REPLY *facts)
 1022 {
 1023         MPI2_DEFAULT_REPLY *reply;
 1024         MPI2_IOC_FACTS_REQUEST request;
 1025         int error, req_sz, reply_sz;
 1026 
 1027         MPR_FUNCTRACE(sc);
 1028 
 1029         req_sz = sizeof(MPI2_IOC_FACTS_REQUEST);
 1030         reply_sz = sizeof(MPI2_IOC_FACTS_REPLY);
 1031         reply = (MPI2_DEFAULT_REPLY *)facts;
 1032 
 1033         bzero(&request, req_sz);
 1034         request.Function = MPI2_FUNCTION_IOC_FACTS;
 1035         error = mpr_request_sync(sc, &request, reply, req_sz, reply_sz, 5);
 1036 
 1037         return (error);
 1038 }
 1039 
 1040 static int
 1041 mpr_send_iocinit(struct mpr_softc *sc)
 1042 {
 1043         MPI2_IOC_INIT_REQUEST   init;
 1044         MPI2_DEFAULT_REPLY      reply;
 1045         int req_sz, reply_sz, error;
 1046         struct timeval now;
 1047         uint64_t time_in_msec;
 1048 
 1049         MPR_FUNCTRACE(sc);
 1050 
 1051         req_sz = sizeof(MPI2_IOC_INIT_REQUEST);
 1052         reply_sz = sizeof(MPI2_IOC_INIT_REPLY);
 1053         bzero(&init, req_sz);
 1054         bzero(&reply, reply_sz);
 1055 
 1056         /*
 1057          * Fill in the init block.  Note that most addresses are
 1058          * deliberately in the lower 32bits of memory.  This is a micro-
 1059          * optimzation for PCI/PCIX, though it's not clear if it helps PCIe.
 1060          */
 1061         init.Function = MPI2_FUNCTION_IOC_INIT;
 1062         init.WhoInit = MPI2_WHOINIT_HOST_DRIVER;
 1063         init.MsgVersion = htole16(MPI2_VERSION);
 1064         init.HeaderVersion = htole16(MPI2_HEADER_VERSION);
 1065         init.SystemRequestFrameSize = htole16(sc->facts->IOCRequestFrameSize);
 1066         init.ReplyDescriptorPostQueueDepth = htole16(sc->pqdepth);
 1067         init.ReplyFreeQueueDepth = htole16(sc->fqdepth);
 1068         init.SenseBufferAddressHigh = 0;
 1069         init.SystemReplyAddressHigh = 0;
 1070         init.SystemRequestFrameBaseAddress.High = 0;
 1071         init.SystemRequestFrameBaseAddress.Low =
 1072             htole32((uint32_t)sc->req_busaddr);
 1073         init.ReplyDescriptorPostQueueAddress.High = 0;
 1074         init.ReplyDescriptorPostQueueAddress.Low =
 1075             htole32((uint32_t)sc->post_busaddr);
 1076         init.ReplyFreeQueueAddress.High = 0;
 1077         init.ReplyFreeQueueAddress.Low = htole32((uint32_t)sc->free_busaddr);
 1078         getmicrotime(&now);
 1079         time_in_msec = (now.tv_sec * 1000 + now.tv_usec/1000);
 1080         init.TimeStamp.High = htole32((time_in_msec >> 32) & 0xFFFFFFFF);
 1081         init.TimeStamp.Low = htole32(time_in_msec & 0xFFFFFFFF);
 1082         init.HostPageSize = HOST_PAGE_SIZE_4K;
 1083 
 1084         error = mpr_request_sync(sc, &init, &reply, req_sz, reply_sz, 5);
 1085         if ((reply.IOCStatus & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SUCCESS)
 1086                 error = ENXIO;
 1087 
 1088         mpr_dprint(sc, MPR_INIT, "IOCInit status= 0x%x\n", reply.IOCStatus);
 1089         return (error);
 1090 }
 1091 
 1092 void
 1093 mpr_memaddr_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 1094 {
 1095         bus_addr_t *addr;
 1096 
 1097         addr = arg;
 1098         *addr = segs[0].ds_addr;
 1099 }
 1100 
 1101 static int
 1102 mpr_alloc_queues(struct mpr_softc *sc)
 1103 {
 1104         bus_addr_t queues_busaddr;
 1105         uint8_t *queues;
 1106         int qsize, fqsize, pqsize;
 1107 
 1108         /*
 1109          * The reply free queue contains 4 byte entries in multiples of 16 and
 1110          * aligned on a 16 byte boundary. There must always be an unused entry.
 1111          * This queue supplies fresh reply frames for the firmware to use.
 1112          *
 1113          * The reply descriptor post queue contains 8 byte entries in
 1114          * multiples of 16 and aligned on a 16 byte boundary.  This queue
 1115          * contains filled-in reply frames sent from the firmware to the host.
 1116          *
 1117          * These two queues are allocated together for simplicity.
 1118          */
 1119         sc->fqdepth = roundup2(sc->num_replies + 1, 16);
 1120         sc->pqdepth = roundup2(sc->num_replies + 1, 16);
 1121         fqsize= sc->fqdepth * 4;
 1122         pqsize = sc->pqdepth * 8;
 1123         qsize = fqsize + pqsize;
 1124 
 1125         if (bus_dma_tag_create( sc->mpr_parent_dmat,    /* parent */
 1126                                 16, 0,                  /* algnmnt, boundary */
 1127                                 BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
 1128                                 BUS_SPACE_MAXADDR,      /* highaddr */
 1129                                 NULL, NULL,             /* filter, filterarg */
 1130                                 qsize,                  /* maxsize */
 1131                                 1,                      /* nsegments */
 1132                                 qsize,                  /* maxsegsize */
 1133                                 0,                      /* flags */
 1134                                 NULL, NULL,             /* lockfunc, lockarg */
 1135                                 &sc->queues_dmat)) {
 1136                 device_printf(sc->mpr_dev, "Cannot allocate queues DMA tag\n");
 1137                 return (ENOMEM);
 1138         }
 1139         if (bus_dmamem_alloc(sc->queues_dmat, (void **)&queues, BUS_DMA_NOWAIT,
 1140             &sc->queues_map)) {
 1141                 device_printf(sc->mpr_dev, "Cannot allocate queues memory\n");
 1142                 return (ENOMEM);
 1143         }
 1144         bzero(queues, qsize);
 1145         bus_dmamap_load(sc->queues_dmat, sc->queues_map, queues, qsize,
 1146             mpr_memaddr_cb, &queues_busaddr, 0);
 1147 
 1148         sc->free_queue = (uint32_t *)queues;
 1149         sc->free_busaddr = queues_busaddr;
 1150         sc->post_queue = (MPI2_REPLY_DESCRIPTORS_UNION *)(queues + fqsize);
 1151         sc->post_busaddr = queues_busaddr + fqsize;
 1152 
 1153         return (0);
 1154 }
 1155 
 1156 static int
 1157 mpr_alloc_replies(struct mpr_softc *sc)
 1158 {
 1159         int rsize, num_replies;
 1160 
 1161         /*
 1162          * sc->num_replies should be one less than sc->fqdepth.  We need to
 1163          * allocate space for sc->fqdepth replies, but only sc->num_replies
 1164          * replies can be used at once.
 1165          */
 1166         num_replies = max(sc->fqdepth, sc->num_replies);
 1167 
 1168         rsize = sc->facts->ReplyFrameSize * num_replies * 4; 
 1169         if (bus_dma_tag_create( sc->mpr_parent_dmat,    /* parent */
 1170                                 4, 0,                   /* algnmnt, boundary */
 1171                                 BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
 1172                                 BUS_SPACE_MAXADDR,      /* highaddr */
 1173                                 NULL, NULL,             /* filter, filterarg */
 1174                                 rsize,                  /* maxsize */
 1175                                 1,                      /* nsegments */
 1176                                 rsize,                  /* maxsegsize */
 1177                                 0,                      /* flags */
 1178                                 NULL, NULL,             /* lockfunc, lockarg */
 1179                                 &sc->reply_dmat)) {
 1180                 device_printf(sc->mpr_dev, "Cannot allocate replies DMA tag\n");
 1181                 return (ENOMEM);
 1182         }
 1183         if (bus_dmamem_alloc(sc->reply_dmat, (void **)&sc->reply_frames,
 1184             BUS_DMA_NOWAIT, &sc->reply_map)) {
 1185                 device_printf(sc->mpr_dev, "Cannot allocate replies memory\n");
 1186                 return (ENOMEM);
 1187         }
 1188         bzero(sc->reply_frames, rsize);
 1189         bus_dmamap_load(sc->reply_dmat, sc->reply_map, sc->reply_frames, rsize,
 1190             mpr_memaddr_cb, &sc->reply_busaddr, 0);
 1191 
 1192         return (0);
 1193 }
 1194 
 1195 static int
 1196 mpr_alloc_requests(struct mpr_softc *sc)
 1197 {
 1198         struct mpr_command *cm;
 1199         struct mpr_chain *chain;
 1200         int i, rsize, nsegs;
 1201 
 1202         rsize = sc->facts->IOCRequestFrameSize * sc->num_reqs * 4;
 1203         if (bus_dma_tag_create( sc->mpr_parent_dmat,    /* parent */
 1204                                 16, 0,                  /* algnmnt, boundary */
 1205                                 BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
 1206                                 BUS_SPACE_MAXADDR,      /* highaddr */
 1207                                 NULL, NULL,             /* filter, filterarg */
 1208                                 rsize,                  /* maxsize */
 1209                                 1,                      /* nsegments */
 1210                                 rsize,                  /* maxsegsize */
 1211                                 0,                      /* flags */
 1212                                 NULL, NULL,             /* lockfunc, lockarg */
 1213                                 &sc->req_dmat)) {
 1214                 device_printf(sc->mpr_dev, "Cannot allocate request DMA tag\n");
 1215                 return (ENOMEM);
 1216         }
 1217         if (bus_dmamem_alloc(sc->req_dmat, (void **)&sc->req_frames,
 1218             BUS_DMA_NOWAIT, &sc->req_map)) {
 1219                 device_printf(sc->mpr_dev, "Cannot allocate request memory\n");
 1220                 return (ENOMEM);
 1221         }
 1222         bzero(sc->req_frames, rsize);
 1223         bus_dmamap_load(sc->req_dmat, sc->req_map, sc->req_frames, rsize,
 1224             mpr_memaddr_cb, &sc->req_busaddr, 0);
 1225 
 1226         /*
 1227          * Gen3 and beyond uses the IOCMaxChainSegmentSize from IOC Facts to
 1228          * get the size of a Chain Frame.  Previous versions use the size as a
 1229          * Request Frame for the Chain Frame size.  If IOCMaxChainSegmentSize
 1230          * is 0, use the default value.  The IOCMaxChainSegmentSize is the
 1231          * number of 16-byte elelements that can fit in a Chain Frame, which is
 1232          * the size of an IEEE Simple SGE.
 1233          */
 1234         if (sc->facts->MsgVersion >= MPI2_VERSION_02_05) {
 1235                 sc->chain_seg_size =
 1236                     htole16(sc->facts->IOCMaxChainSegmentSize);
 1237                 if (sc->chain_seg_size == 0) {
 1238                         sc->chain_frame_size = MPR_DEFAULT_CHAIN_SEG_SIZE *
 1239                             MPR_MAX_CHAIN_ELEMENT_SIZE;
 1240                 } else {
 1241                         sc->chain_frame_size = sc->chain_seg_size *
 1242                             MPR_MAX_CHAIN_ELEMENT_SIZE;
 1243                 }
 1244         } else {
 1245                 sc->chain_frame_size = sc->facts->IOCRequestFrameSize * 4;
 1246         }
 1247         rsize = sc->chain_frame_size * sc->max_chains;
 1248         if (bus_dma_tag_create( sc->mpr_parent_dmat,    /* parent */
 1249                                 16, 0,                  /* algnmnt, boundary */
 1250                                 BUS_SPACE_MAXADDR,      /* lowaddr */
 1251                                 BUS_SPACE_MAXADDR,      /* highaddr */
 1252                                 NULL, NULL,             /* filter, filterarg */
 1253                                 rsize,                  /* maxsize */
 1254                                 1,                      /* nsegments */
 1255                                 rsize,                  /* maxsegsize */
 1256                                 0,                      /* flags */
 1257                                 NULL, NULL,             /* lockfunc, lockarg */
 1258                                 &sc->chain_dmat)) {
 1259                 device_printf(sc->mpr_dev, "Cannot allocate chain DMA tag\n");
 1260                 return (ENOMEM);
 1261         }
 1262         if (bus_dmamem_alloc(sc->chain_dmat, (void **)&sc->chain_frames,
 1263             BUS_DMA_NOWAIT, &sc->chain_map)) {
 1264                 device_printf(sc->mpr_dev, "Cannot allocate chain memory\n");
 1265                 return (ENOMEM);
 1266         }
 1267         bzero(sc->chain_frames, rsize);
 1268         bus_dmamap_load(sc->chain_dmat, sc->chain_map, sc->chain_frames, rsize,
 1269             mpr_memaddr_cb, &sc->chain_busaddr, 0);
 1270 
 1271         rsize = MPR_SENSE_LEN * sc->num_reqs;
 1272         if (bus_dma_tag_create( sc->mpr_parent_dmat,    /* parent */
 1273                                 1, 0,                   /* algnmnt, boundary */
 1274                                 BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
 1275                                 BUS_SPACE_MAXADDR,      /* highaddr */
 1276                                 NULL, NULL,             /* filter, filterarg */
 1277                                 rsize,                  /* maxsize */
 1278                                 1,                      /* nsegments */
 1279                                 rsize,                  /* maxsegsize */
 1280                                 0,                      /* flags */
 1281                                 NULL, NULL,             /* lockfunc, lockarg */
 1282                                 &sc->sense_dmat)) {
 1283                 device_printf(sc->mpr_dev, "Cannot allocate sense DMA tag\n");
 1284                 return (ENOMEM);
 1285         }
 1286         if (bus_dmamem_alloc(sc->sense_dmat, (void **)&sc->sense_frames,
 1287             BUS_DMA_NOWAIT, &sc->sense_map)) {
 1288                 device_printf(sc->mpr_dev, "Cannot allocate sense memory\n");
 1289                 return (ENOMEM);
 1290         }
 1291         bzero(sc->sense_frames, rsize);
 1292         bus_dmamap_load(sc->sense_dmat, sc->sense_map, sc->sense_frames, rsize,
 1293             mpr_memaddr_cb, &sc->sense_busaddr, 0);
 1294 
 1295         sc->chains = malloc(sizeof(struct mpr_chain) * sc->max_chains, M_MPR,
 1296             M_WAITOK | M_ZERO);
 1297         if (!sc->chains) {
 1298                 device_printf(sc->mpr_dev, "Cannot allocate memory %s %d\n",
 1299                     __func__, __LINE__);
 1300                 return (ENOMEM);
 1301         }
 1302         for (i = 0; i < sc->max_chains; i++) {
 1303                 chain = &sc->chains[i];
 1304                 chain->chain = (MPI2_SGE_IO_UNION *)(sc->chain_frames +
 1305                     i * sc->chain_frame_size);
 1306                 chain->chain_busaddr = sc->chain_busaddr +
 1307                     i * sc->chain_frame_size;
 1308                 mpr_free_chain(sc, chain);
 1309                 sc->chain_free_lowwater++;
 1310         }
 1311 
 1312         /*
 1313          * Allocate NVMe PRP Pages for NVMe SGL support only if the FW supports
 1314          * these devices.
 1315          */
 1316         if ((sc->facts->MsgVersion >= MPI2_VERSION_02_06) &&
 1317             (sc->facts->ProtocolFlags & MPI2_IOCFACTS_PROTOCOL_NVME_DEVICES)) {
 1318                 if (mpr_alloc_nvme_prp_pages(sc) == ENOMEM)
 1319                         return (ENOMEM);
 1320         }
 1321 
 1322         /* XXX Need to pick a more precise value */
 1323         nsegs = (MAXPHYS / PAGE_SIZE) + 1;
 1324         if (bus_dma_tag_create( sc->mpr_parent_dmat,    /* parent */
 1325                                 1, 0,                   /* algnmnt, boundary */
 1326                                 BUS_SPACE_MAXADDR,      /* lowaddr */
 1327                                 BUS_SPACE_MAXADDR,      /* highaddr */
 1328                                 NULL, NULL,             /* filter, filterarg */
 1329                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
 1330                                 nsegs,                  /* nsegments */
 1331                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
 1332                                 BUS_DMA_ALLOCNOW,       /* flags */
 1333                                 busdma_lock_mutex,      /* lockfunc */
 1334                                 &sc->mpr_mtx,           /* lockarg */
 1335                                 &sc->buffer_dmat)) {
 1336                 device_printf(sc->mpr_dev, "Cannot allocate buffer DMA tag\n");
 1337                 return (ENOMEM);
 1338         }
 1339 
 1340         /*
 1341          * SMID 0 cannot be used as a free command per the firmware spec.
 1342          * Just drop that command instead of risking accounting bugs.
 1343          */
 1344         sc->commands = malloc(sizeof(struct mpr_command) * sc->num_reqs,
 1345             M_MPR, M_WAITOK | M_ZERO);
 1346         if (!sc->commands) {
 1347                 device_printf(sc->mpr_dev, "Cannot allocate memory %s %d\n",
 1348                     __func__, __LINE__);
 1349                 return (ENOMEM);
 1350         }
 1351         for (i = 1; i < sc->num_reqs; i++) {
 1352                 cm = &sc->commands[i];
 1353                 cm->cm_req = sc->req_frames +
 1354                     i * sc->facts->IOCRequestFrameSize * 4;
 1355                 cm->cm_req_busaddr = sc->req_busaddr +
 1356                     i * sc->facts->IOCRequestFrameSize * 4;
 1357                 cm->cm_sense = &sc->sense_frames[i];
 1358                 cm->cm_sense_busaddr = sc->sense_busaddr + i * MPR_SENSE_LEN;
 1359                 cm->cm_desc.Default.SMID = i;
 1360                 cm->cm_sc = sc;
 1361                 TAILQ_INIT(&cm->cm_chain_list);
 1362                 TAILQ_INIT(&cm->cm_prp_page_list);
 1363                 callout_init_mtx(&cm->cm_callout, &sc->mpr_mtx, 0);
 1364 
 1365                 /* XXX Is a failure here a critical problem? */
 1366                 if (bus_dmamap_create(sc->buffer_dmat, 0, &cm->cm_dmamap)
 1367                     == 0) {
 1368                         if (i <= sc->num_prireqs)
 1369                                 mpr_free_high_priority_command(sc, cm);
 1370                         else
 1371                                 mpr_free_command(sc, cm);
 1372                 } else {
 1373                         panic("failed to allocate command %d\n", i);
 1374                         sc->num_reqs = i;
 1375                         break;
 1376                 }
 1377         }
 1378 
 1379         return (0);
 1380 }
 1381 
 1382 /*
 1383  * Allocate contiguous buffers for PCIe NVMe devices for building native PRPs,
 1384  * which are scatter/gather lists for NVMe devices. 
 1385  *
 1386  * This buffer must be contiguous due to the nature of how NVMe PRPs are built
 1387  * and translated by FW.
 1388  *
 1389  * returns ENOMEM if memory could not be allocated, otherwise returns 0.
 1390  */
 1391 static int
 1392 mpr_alloc_nvme_prp_pages(struct mpr_softc *sc)
 1393 {
 1394         int PRPs_per_page, PRPs_required, pages_required;
 1395         int rsize, i;
 1396         struct mpr_prp_page *prp_page;
 1397 
 1398         /*
 1399          * Assuming a MAX_IO_SIZE of 1MB and a PAGE_SIZE of 4k, the max number
 1400          * of PRPs (NVMe's Scatter/Gather Element) needed per I/O is:
 1401          * MAX_IO_SIZE / PAGE_SIZE = 256
 1402          * 
 1403          * 1 PRP entry in main frame for PRP list pointer still leaves 255 PRPs
 1404          * required for the remainder of the 1MB I/O. 512 PRPs can fit into one
 1405          * page (4096 / 8 = 512), so only one page is required for each I/O.
 1406          *
 1407          * Each of these buffers will need to be contiguous. For simplicity,
 1408          * only one buffer is allocated here, which has all of the space
 1409          * required for the NVMe Queue Depth. If there are problems allocating
 1410          * this one buffer, this function will need to change to allocate
 1411          * individual, contiguous NVME_QDEPTH buffers.
 1412          *
 1413          * The real calculation will use the real max io size. Above is just an
 1414          * example.
 1415          *
 1416          */
 1417         PRPs_required = sc->maxio / PAGE_SIZE;
 1418         PRPs_per_page = (PAGE_SIZE / PRP_ENTRY_SIZE) - 1;
 1419         pages_required = (PRPs_required / PRPs_per_page) + 1;
 1420 
 1421         sc->prp_buffer_size = PAGE_SIZE * pages_required; 
 1422         rsize = sc->prp_buffer_size * NVME_QDEPTH; 
 1423         if (bus_dma_tag_create( sc->mpr_parent_dmat,    /* parent */
 1424                                 4, 0,                   /* algnmnt, boundary */
 1425                                 BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
 1426                                 BUS_SPACE_MAXADDR,      /* highaddr */
 1427                                 NULL, NULL,             /* filter, filterarg */
 1428                                 rsize,                  /* maxsize */
 1429                                 1,                      /* nsegments */
 1430                                 rsize,                  /* maxsegsize */
 1431                                 0,                      /* flags */
 1432                                 NULL, NULL,             /* lockfunc, lockarg */
 1433                                 &sc->prp_page_dmat)) {
 1434                 device_printf(sc->mpr_dev, "Cannot allocate NVMe PRP DMA "
 1435                     "tag\n");
 1436                 return (ENOMEM);
 1437         }
 1438         if (bus_dmamem_alloc(sc->prp_page_dmat, (void **)&sc->prp_pages,
 1439             BUS_DMA_NOWAIT, &sc->prp_page_map)) {
 1440                 device_printf(sc->mpr_dev, "Cannot allocate NVMe PRP memory\n");
 1441                 return (ENOMEM);
 1442         }
 1443         bzero(sc->prp_pages, rsize);
 1444         bus_dmamap_load(sc->prp_page_dmat, sc->prp_page_map, sc->prp_pages,
 1445             rsize, mpr_memaddr_cb, &sc->prp_page_busaddr, 0);
 1446 
 1447         sc->prps = malloc(sizeof(struct mpr_prp_page) * NVME_QDEPTH, M_MPR,
 1448             M_WAITOK | M_ZERO);
 1449         for (i = 0; i < NVME_QDEPTH; i++) {
 1450                 prp_page = &sc->prps[i];
 1451                 prp_page->prp_page = (uint64_t *)(sc->prp_pages +
 1452                     i * sc->prp_buffer_size);
 1453                 prp_page->prp_page_busaddr = (uint64_t)(sc->prp_page_busaddr +
 1454                     i * sc->prp_buffer_size);
 1455                 mpr_free_prp_page(sc, prp_page);
 1456                 sc->prp_pages_free_lowwater++;
 1457         }
 1458 
 1459         return (0);
 1460 }
 1461 
 1462 static int
 1463 mpr_init_queues(struct mpr_softc *sc)
 1464 {
 1465         int i;
 1466 
 1467         memset((uint8_t *)sc->post_queue, 0xff, sc->pqdepth * 8);
 1468 
 1469         /*
 1470          * According to the spec, we need to use one less reply than we
 1471          * have space for on the queue.  So sc->num_replies (the number we
 1472          * use) should be less than sc->fqdepth (allocated size).
 1473          */
 1474         if (sc->num_replies >= sc->fqdepth)
 1475                 return (EINVAL);
 1476 
 1477         /*
 1478          * Initialize all of the free queue entries.
 1479          */
 1480         for (i = 0; i < sc->fqdepth; i++) {
 1481                 sc->free_queue[i] = sc->reply_busaddr +
 1482                     (i * sc->facts->ReplyFrameSize * 4);
 1483         }
 1484         sc->replyfreeindex = sc->num_replies;
 1485 
 1486         return (0);
 1487 }
 1488 
 1489 /* Get the driver parameter tunables.  Lowest priority are the driver defaults.
 1490  * Next are the global settings, if they exist.  Highest are the per-unit
 1491  * settings, if they exist.
 1492  */
 1493 void
 1494 mpr_get_tunables(struct mpr_softc *sc)
 1495 {
 1496         char tmpstr[80];
 1497 
 1498         /* XXX default to some debugging for now */
 1499         sc->mpr_debug = MPR_INFO | MPR_FAULT;
 1500         sc->disable_msix = 0;
 1501         sc->disable_msi = 0;
 1502         sc->max_chains = MPR_CHAIN_FRAMES;
 1503         sc->max_io_pages = MPR_MAXIO_PAGES;
 1504         sc->enable_ssu = MPR_SSU_ENABLE_SSD_DISABLE_HDD;
 1505         sc->spinup_wait_time = DEFAULT_SPINUP_WAIT;
 1506         sc->use_phynum = 1;
 1507 
 1508         /*
 1509          * Grab the global variables.
 1510          */
 1511         TUNABLE_INT_FETCH("hw.mpr.debug_level", &sc->mpr_debug);
 1512         TUNABLE_INT_FETCH("hw.mpr.disable_msix", &sc->disable_msix);
 1513         TUNABLE_INT_FETCH("hw.mpr.disable_msi", &sc->disable_msi);
 1514         TUNABLE_INT_FETCH("hw.mpr.max_chains", &sc->max_chains);
 1515         TUNABLE_INT_FETCH("hw.mpr.max_io_pages", &sc->max_io_pages);
 1516         TUNABLE_INT_FETCH("hw.mpr.enable_ssu", &sc->enable_ssu);
 1517         TUNABLE_INT_FETCH("hw.mpr.spinup_wait_time", &sc->spinup_wait_time);
 1518         TUNABLE_INT_FETCH("hw.mpr.use_phy_num", &sc->use_phynum);
 1519 
 1520         /* Grab the unit-instance variables */
 1521         snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.debug_level",
 1522             device_get_unit(sc->mpr_dev));
 1523         TUNABLE_INT_FETCH(tmpstr, &sc->mpr_debug);
 1524 
 1525         snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.disable_msix",
 1526             device_get_unit(sc->mpr_dev));
 1527         TUNABLE_INT_FETCH(tmpstr, &sc->disable_msix);
 1528 
 1529         snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.disable_msi",
 1530             device_get_unit(sc->mpr_dev));
 1531         TUNABLE_INT_FETCH(tmpstr, &sc->disable_msi);
 1532 
 1533         snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_chains",
 1534             device_get_unit(sc->mpr_dev));
 1535         TUNABLE_INT_FETCH(tmpstr, &sc->max_chains);
 1536 
 1537         snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_io_pages",
 1538             device_get_unit(sc->mpr_dev));
 1539         TUNABLE_INT_FETCH(tmpstr, &sc->max_io_pages);
 1540 
 1541         bzero(sc->exclude_ids, sizeof(sc->exclude_ids));
 1542         snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.exclude_ids",
 1543             device_get_unit(sc->mpr_dev));
 1544         TUNABLE_STR_FETCH(tmpstr, sc->exclude_ids, sizeof(sc->exclude_ids));
 1545 
 1546         snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.enable_ssu",
 1547             device_get_unit(sc->mpr_dev));
 1548         TUNABLE_INT_FETCH(tmpstr, &sc->enable_ssu);
 1549 
 1550         snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.spinup_wait_time",
 1551             device_get_unit(sc->mpr_dev));
 1552         TUNABLE_INT_FETCH(tmpstr, &sc->spinup_wait_time);
 1553 
 1554         snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.use_phy_num",
 1555             device_get_unit(sc->mpr_dev));
 1556         TUNABLE_INT_FETCH(tmpstr, &sc->use_phynum);
 1557 }
 1558 
 1559 static void
 1560 mpr_setup_sysctl(struct mpr_softc *sc)
 1561 {
 1562         struct sysctl_ctx_list  *sysctl_ctx = NULL;
 1563         struct sysctl_oid       *sysctl_tree = NULL;
 1564         char tmpstr[80], tmpstr2[80];
 1565 
 1566         /*
 1567          * Setup the sysctl variable so the user can change the debug level
 1568          * on the fly.
 1569          */
 1570         snprintf(tmpstr, sizeof(tmpstr), "MPR controller %d",
 1571             device_get_unit(sc->mpr_dev));
 1572         snprintf(tmpstr2, sizeof(tmpstr2), "%d", device_get_unit(sc->mpr_dev));
 1573 
 1574         sysctl_ctx = device_get_sysctl_ctx(sc->mpr_dev);
 1575         if (sysctl_ctx != NULL)
 1576                 sysctl_tree = device_get_sysctl_tree(sc->mpr_dev);
 1577 
 1578         if (sysctl_tree == NULL) {
 1579                 sysctl_ctx_init(&sc->sysctl_ctx);
 1580                 sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx,
 1581                     SYSCTL_STATIC_CHILDREN(_hw_mpr), OID_AUTO, tmpstr2,
 1582                     CTLFLAG_RD, 0, tmpstr);
 1583                 if (sc->sysctl_tree == NULL)
 1584                         return;
 1585                 sysctl_ctx = &sc->sysctl_ctx;
 1586                 sysctl_tree = sc->sysctl_tree;
 1587         }
 1588 
 1589         SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1590             OID_AUTO, "debug_level", CTLFLAG_RW, &sc->mpr_debug, 0,
 1591             "mpr debug level");
 1592 
 1593         SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1594             OID_AUTO, "disable_msix", CTLFLAG_RD, &sc->disable_msix, 0,
 1595             "Disable the use of MSI-X interrupts");
 1596 
 1597         SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1598             OID_AUTO, "disable_msi", CTLFLAG_RD, &sc->disable_msi, 0,
 1599             "Disable the use of MSI interrupts");
 1600 
 1601         SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1602             OID_AUTO, "firmware_version", CTLFLAG_RW, sc->fw_version,
 1603             strlen(sc->fw_version), "firmware version");
 1604 
 1605         SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1606             OID_AUTO, "driver_version", CTLFLAG_RW, MPR_DRIVER_VERSION,
 1607             strlen(MPR_DRIVER_VERSION), "driver version");
 1608 
 1609         SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1610             OID_AUTO, "io_cmds_active", CTLFLAG_RD,
 1611             &sc->io_cmds_active, 0, "number of currently active commands");
 1612 
 1613         SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1614             OID_AUTO, "io_cmds_highwater", CTLFLAG_RD,
 1615             &sc->io_cmds_highwater, 0, "maximum active commands seen");
 1616 
 1617         SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1618             OID_AUTO, "chain_free", CTLFLAG_RD,
 1619             &sc->chain_free, 0, "number of free chain elements");
 1620 
 1621         SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1622             OID_AUTO, "chain_free_lowwater", CTLFLAG_RD,
 1623             &sc->chain_free_lowwater, 0,"lowest number of free chain elements");
 1624 
 1625         SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1626             OID_AUTO, "max_chains", CTLFLAG_RD,
 1627             &sc->max_chains, 0,"maximum chain frames that will be allocated");
 1628 
 1629         SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1630             OID_AUTO, "max_io_pages", CTLFLAG_RD,
 1631             &sc->max_io_pages, 0,"maximum pages to allow per I/O (if <1 use "
 1632             "IOCFacts)");
 1633 
 1634         SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1635             OID_AUTO, "enable_ssu", CTLFLAG_RW, &sc->enable_ssu, 0,
 1636             "enable SSU to SATA SSD/HDD at shutdown");
 1637 
 1638         SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1639             OID_AUTO, "chain_alloc_fail", CTLFLAG_RD,
 1640             &sc->chain_alloc_fail, "chain allocation failures");
 1641 
 1642         SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1643             OID_AUTO, "spinup_wait_time", CTLFLAG_RD,
 1644             &sc->spinup_wait_time, DEFAULT_SPINUP_WAIT, "seconds to wait for "
 1645             "spinup after SATA ID error");
 1646 
 1647         SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1648             OID_AUTO, "use_phy_num", CTLFLAG_RD, &sc->use_phynum, 0,
 1649             "Use the phy number for enumeration");
 1650 
 1651         SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1652             OID_AUTO, "prp_pages_free", CTLFLAG_RD,
 1653             &sc->prp_pages_free, 0, "number of free PRP pages");
 1654 
 1655         SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1656             OID_AUTO, "prp_pages_free_lowwater", CTLFLAG_RD,
 1657             &sc->prp_pages_free_lowwater, 0,"lowest number of free PRP pages");
 1658 
 1659         SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
 1660             OID_AUTO, "prp_page_alloc_fail", CTLFLAG_RD,
 1661             &sc->prp_page_alloc_fail, "PRP page allocation failures");
 1662 }
 1663 
 1664 int
 1665 mpr_attach(struct mpr_softc *sc)
 1666 {
 1667         int error;
 1668 
 1669         MPR_FUNCTRACE(sc);
 1670 
 1671         mtx_init(&sc->mpr_mtx, "MPR lock", NULL, MTX_DEF);
 1672         callout_init_mtx(&sc->periodic, &sc->mpr_mtx, 0);
 1673         callout_init_mtx(&sc->device_check_callout, &sc->mpr_mtx, 0);
 1674         TAILQ_INIT(&sc->event_list);
 1675         timevalclear(&sc->lastfail);
 1676 
 1677         if ((error = mpr_transition_ready(sc)) != 0) {
 1678                 mpr_printf(sc, "%s failed to transition ready\n", __func__);
 1679                 return (error);
 1680         }
 1681 
 1682         sc->facts = malloc(sizeof(MPI2_IOC_FACTS_REPLY), M_MPR,
 1683             M_ZERO|M_NOWAIT);
 1684         if (!sc->facts) {
 1685                 device_printf(sc->mpr_dev, "Cannot allocate memory %s %d\n",
 1686                     __func__, __LINE__);
 1687                 return (ENOMEM);
 1688         }
 1689 
 1690         /*
 1691          * Get IOC Facts and allocate all structures based on this information.
 1692          * A Diag Reset will also call mpr_iocfacts_allocate and re-read the IOC
 1693          * Facts. If relevant values have changed in IOC Facts, this function
 1694          * will free all of the memory based on IOC Facts and reallocate that
 1695          * memory.  If this fails, any allocated memory should already be freed.
 1696          */
 1697         if ((error = mpr_iocfacts_allocate(sc, TRUE)) != 0) {
 1698                 mpr_dprint(sc, MPR_FAULT, "%s IOC Facts based allocation "
 1699                     "failed with error %d\n", __func__, error);
 1700                 return (error);
 1701         }
 1702 
 1703         /* Start the periodic watchdog check on the IOC Doorbell */
 1704         mpr_periodic(sc);
 1705 
 1706         /*
 1707          * The portenable will kick off discovery events that will drive the
 1708          * rest of the initialization process.  The CAM/SAS module will
 1709          * hold up the boot sequence until discovery is complete.
 1710          */
 1711         sc->mpr_ich.ich_func = mpr_startup;
 1712         sc->mpr_ich.ich_arg = sc;
 1713         if (config_intrhook_establish(&sc->mpr_ich) != 0) {
 1714                 mpr_dprint(sc, MPR_ERROR, "Cannot establish MPR config hook\n");
 1715                 error = EINVAL;
 1716         }
 1717 
 1718         /*
 1719          * Allow IR to shutdown gracefully when shutdown occurs.
 1720          */
 1721         sc->shutdown_eh = EVENTHANDLER_REGISTER(shutdown_final,
 1722             mprsas_ir_shutdown, sc, SHUTDOWN_PRI_DEFAULT);
 1723 
 1724         if (sc->shutdown_eh == NULL)
 1725                 mpr_dprint(sc, MPR_ERROR, "shutdown event registration "
 1726                     "failed\n");
 1727 
 1728         mpr_setup_sysctl(sc);
 1729 
 1730         sc->mpr_flags |= MPR_FLAGS_ATTACH_DONE;
 1731 
 1732         return (error);
 1733 }
 1734 
 1735 /* Run through any late-start handlers. */
 1736 static void
 1737 mpr_startup(void *arg)
 1738 {
 1739         struct mpr_softc *sc;
 1740 
 1741         sc = (struct mpr_softc *)arg;
 1742 
 1743         mpr_lock(sc);
 1744         mpr_unmask_intr(sc);
 1745 
 1746         /* initialize device mapping tables */
 1747         mpr_base_static_config_pages(sc);
 1748         mpr_mapping_initialize(sc);
 1749         mprsas_startup(sc);
 1750         mpr_unlock(sc);
 1751 }
 1752 
 1753 /* Periodic watchdog.  Is called with the driver lock already held. */
 1754 static void
 1755 mpr_periodic(void *arg)
 1756 {
 1757         struct mpr_softc *sc;
 1758         uint32_t db;
 1759 
 1760         sc = (struct mpr_softc *)arg;
 1761         if (sc->mpr_flags & MPR_FLAGS_SHUTDOWN)
 1762                 return;
 1763 
 1764         db = mpr_regread(sc, MPI2_DOORBELL_OFFSET);
 1765         if ((db & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) {
 1766                 if ((db & MPI2_DOORBELL_FAULT_CODE_MASK) ==
 1767                     IFAULT_IOP_OVER_TEMP_THRESHOLD_EXCEEDED) {
 1768                         panic("TEMPERATURE FAULT: STOPPING.");
 1769                 }
 1770                 mpr_dprint(sc, MPR_FAULT, "IOC Fault 0x%08x, Resetting\n", db);
 1771                 mpr_reinit(sc);
 1772         }
 1773 
 1774         callout_reset(&sc->periodic, MPR_PERIODIC_DELAY * hz, mpr_periodic, sc);
 1775 }
 1776 
 1777 static void
 1778 mpr_log_evt_handler(struct mpr_softc *sc, uintptr_t data,
 1779     MPI2_EVENT_NOTIFICATION_REPLY *event)
 1780 {
 1781         MPI2_EVENT_DATA_LOG_ENTRY_ADDED *entry;
 1782 
 1783         MPR_DPRINT_EVENT(sc, generic, event);
 1784 
 1785         switch (event->Event) {
 1786         case MPI2_EVENT_LOG_DATA:
 1787                 mpr_dprint(sc, MPR_EVENT, "MPI2_EVENT_LOG_DATA:\n");
 1788                 if (sc->mpr_debug & MPR_EVENT)
 1789                         hexdump(event->EventData, event->EventDataLength, NULL,
 1790                             0);
 1791                 break;
 1792         case MPI2_EVENT_LOG_ENTRY_ADDED:
 1793                 entry = (MPI2_EVENT_DATA_LOG_ENTRY_ADDED *)event->EventData;
 1794                 mpr_dprint(sc, MPR_EVENT, "MPI2_EVENT_LOG_ENTRY_ADDED event "
 1795                     "0x%x Sequence %d:\n", entry->LogEntryQualifier,
 1796                      entry->LogSequence);
 1797                 break;
 1798         default:
 1799                 break;
 1800         }
 1801         return;
 1802 }
 1803 
 1804 static int
 1805 mpr_attach_log(struct mpr_softc *sc)
 1806 {
 1807         uint8_t events[16];
 1808 
 1809         bzero(events, 16);
 1810         setbit(events, MPI2_EVENT_LOG_DATA);
 1811         setbit(events, MPI2_EVENT_LOG_ENTRY_ADDED);
 1812 
 1813         mpr_register_events(sc, events, mpr_log_evt_handler, NULL,
 1814             &sc->mpr_log_eh);
 1815 
 1816         return (0);
 1817 }
 1818 
 1819 static int
 1820 mpr_detach_log(struct mpr_softc *sc)
 1821 {
 1822 
 1823         if (sc->mpr_log_eh != NULL)
 1824                 mpr_deregister_events(sc, sc->mpr_log_eh);
 1825         return (0);
 1826 }
 1827 
 1828 /*
 1829  * Free all of the driver resources and detach submodules.  Should be called
 1830  * without the lock held.
 1831  */
 1832 int
 1833 mpr_free(struct mpr_softc *sc)
 1834 {
 1835         int error;
 1836 
 1837         /* Turn off the watchdog */
 1838         mpr_lock(sc);
 1839         sc->mpr_flags |= MPR_FLAGS_SHUTDOWN;
 1840         mpr_unlock(sc);
 1841         /* Lock must not be held for this */
 1842         callout_drain(&sc->periodic);
 1843         callout_drain(&sc->device_check_callout);
 1844 
 1845         if (((error = mpr_detach_log(sc)) != 0) ||
 1846             ((error = mpr_detach_sas(sc)) != 0))
 1847                 return (error);
 1848 
 1849         mpr_detach_user(sc);
 1850 
 1851         /* Put the IOC back in the READY state. */
 1852         mpr_lock(sc);
 1853         if ((error = mpr_transition_ready(sc)) != 0) {
 1854                 mpr_unlock(sc);
 1855                 return (error);
 1856         }
 1857         mpr_unlock(sc);
 1858 
 1859         if (sc->facts != NULL)
 1860                 free(sc->facts, M_MPR);
 1861 
 1862         /*
 1863          * Free all buffers that are based on IOC Facts.  A Diag Reset may need
 1864          * to free these buffers too.
 1865          */
 1866         mpr_iocfacts_free(sc);
 1867 
 1868         if (sc->sysctl_tree != NULL)
 1869                 sysctl_ctx_free(&sc->sysctl_ctx);
 1870 
 1871         /* Deregister the shutdown function */
 1872         if (sc->shutdown_eh != NULL)
 1873                 EVENTHANDLER_DEREGISTER(shutdown_final, sc->shutdown_eh);
 1874 
 1875         mtx_destroy(&sc->mpr_mtx);
 1876 
 1877         return (0);
 1878 }
 1879 
 1880 static __inline void
 1881 mpr_complete_command(struct mpr_softc *sc, struct mpr_command *cm)
 1882 {
 1883         MPR_FUNCTRACE(sc);
 1884 
 1885         if (cm == NULL) {
 1886                 mpr_dprint(sc, MPR_ERROR, "Completing NULL command\n");
 1887                 return;
 1888         }
 1889 
 1890         if (cm->cm_flags & MPR_CM_FLAGS_POLLED)
 1891                 cm->cm_flags |= MPR_CM_FLAGS_COMPLETE;
 1892 
 1893         if (cm->cm_complete != NULL) {
 1894                 mpr_dprint(sc, MPR_TRACE,
 1895                     "%s cm %p calling cm_complete %p data %p reply %p\n",
 1896                     __func__, cm, cm->cm_complete, cm->cm_complete_data,
 1897                     cm->cm_reply);
 1898                 cm->cm_complete(sc, cm);
 1899         }
 1900 
 1901         if (cm->cm_flags & MPR_CM_FLAGS_WAKEUP) {
 1902                 mpr_dprint(sc, MPR_TRACE, "waking up %p\n", cm);
 1903                 wakeup(cm);
 1904         }
 1905 
 1906         if (sc->io_cmds_active != 0) {
 1907                 sc->io_cmds_active--;
 1908         } else {
 1909                 mpr_dprint(sc, MPR_ERROR, "Warning: io_cmds_active is "
 1910                     "out of sync - resynching to 0\n");
 1911         }
 1912 }
 1913 
 1914 static void
 1915 mpr_sas_log_info(struct mpr_softc *sc , u32 log_info)
 1916 {
 1917         union loginfo_type {
 1918                 u32     loginfo;
 1919                 struct {
 1920                         u32     subcode:16;
 1921                         u32     code:8;
 1922                         u32     originator:4;
 1923                         u32     bus_type:4;
 1924                 } dw;
 1925         };
 1926         union loginfo_type sas_loginfo;
 1927         char *originator_str = NULL;
 1928  
 1929         sas_loginfo.loginfo = log_info;
 1930         if (sas_loginfo.dw.bus_type != 3 /*SAS*/)
 1931                 return;
 1932  
 1933         /* each nexus loss loginfo */
 1934         if (log_info == 0x31170000)
 1935                 return;
 1936  
 1937         /* eat the loginfos associated with task aborts */
 1938         if ((log_info == 30050000) || (log_info == 0x31140000) ||
 1939             (log_info == 0x31130000))
 1940                 return;
 1941  
 1942         switch (sas_loginfo.dw.originator) {
 1943         case 0:
 1944                 originator_str = "IOP";
 1945                 break;
 1946         case 1:
 1947                 originator_str = "PL";
 1948                 break;
 1949         case 2:
 1950                 originator_str = "IR";
 1951                 break;
 1952         }
 1953  
 1954         mpr_dprint(sc, MPR_LOG, "log_info(0x%08x): originator(%s), "
 1955             "code(0x%02x), sub_code(0x%04x)\n", log_info, originator_str,
 1956             sas_loginfo.dw.code, sas_loginfo.dw.subcode);
 1957 }
 1958 
 1959 static void
 1960 mpr_display_reply_info(struct mpr_softc *sc, uint8_t *reply)
 1961 {
 1962         MPI2DefaultReply_t *mpi_reply;
 1963         u16 sc_status;
 1964  
 1965         mpi_reply = (MPI2DefaultReply_t*)reply;
 1966         sc_status = le16toh(mpi_reply->IOCStatus);
 1967         if (sc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE)
 1968                 mpr_sas_log_info(sc, le32toh(mpi_reply->IOCLogInfo));
 1969 }
 1970 
 1971 void
 1972 mpr_intr(void *data)
 1973 {
 1974         struct mpr_softc *sc;
 1975         uint32_t status;
 1976 
 1977         sc = (struct mpr_softc *)data;
 1978         mpr_dprint(sc, MPR_TRACE, "%s\n", __func__);
 1979 
 1980         /*
 1981          * Check interrupt status register to flush the bus.  This is
 1982          * needed for both INTx interrupts and driver-driven polling
 1983          */
 1984         status = mpr_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET);
 1985         if ((status & MPI2_HIS_REPLY_DESCRIPTOR_INTERRUPT) == 0)
 1986                 return;
 1987 
 1988         mpr_lock(sc);
 1989         mpr_intr_locked(data);
 1990         mpr_unlock(sc);
 1991         return;
 1992 }
 1993 
 1994 /*
 1995  * In theory, MSI/MSIX interrupts shouldn't need to read any registers on the
 1996  * chip.  Hopefully this theory is correct.
 1997  */
 1998 void
 1999 mpr_intr_msi(void *data)
 2000 {
 2001         struct mpr_softc *sc;
 2002 
 2003         sc = (struct mpr_softc *)data;
 2004         mpr_dprint(sc, MPR_TRACE, "%s\n", __func__);
 2005         mpr_lock(sc);
 2006         mpr_intr_locked(data);
 2007         mpr_unlock(sc);
 2008         return;
 2009 }
 2010 
 2011 /*
 2012  * The locking is overly broad and simplistic, but easy to deal with for now.
 2013  */
 2014 void
 2015 mpr_intr_locked(void *data)
 2016 {
 2017         MPI2_REPLY_DESCRIPTORS_UNION *desc;
 2018         struct mpr_softc *sc;
 2019         struct mpr_command *cm = NULL;
 2020         uint8_t flags;
 2021         u_int pq;
 2022         MPI2_DIAG_RELEASE_REPLY *rel_rep;
 2023         mpr_fw_diagnostic_buffer_t *pBuffer;
 2024 
 2025         sc = (struct mpr_softc *)data;
 2026 
 2027         pq = sc->replypostindex;
 2028         mpr_dprint(sc, MPR_TRACE,
 2029             "%s sc %p starting with replypostindex %u\n", 
 2030             __func__, sc, sc->replypostindex);
 2031 
 2032         for ( ;; ) {
 2033                 cm = NULL;
 2034                 desc = &sc->post_queue[sc->replypostindex];
 2035                 flags = desc->Default.ReplyFlags &
 2036                     MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK;
 2037                 if ((flags == MPI2_RPY_DESCRIPT_FLAGS_UNUSED) ||
 2038                     (le32toh(desc->Words.High) == 0xffffffff))
 2039                         break;
 2040 
 2041                 /* increment the replypostindex now, so that event handlers
 2042                  * and cm completion handlers which decide to do a diag
 2043                  * reset can zero it without it getting incremented again
 2044                  * afterwards, and we break out of this loop on the next
 2045                  * iteration since the reply post queue has been cleared to
 2046                  * 0xFF and all descriptors look unused (which they are).
 2047                  */
 2048                 if (++sc->replypostindex >= sc->pqdepth)
 2049                         sc->replypostindex = 0;
 2050 
 2051                 switch (flags) {
 2052                 case MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS:
 2053                 case MPI25_RPY_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO_SUCCESS:
 2054                 case MPI26_RPY_DESCRIPT_FLAGS_PCIE_ENCAPSULATED_SUCCESS:
 2055                         cm = &sc->commands[le16toh(desc->SCSIIOSuccess.SMID)];
 2056                         cm->cm_reply = NULL;
 2057                         break;
 2058                 case MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY:
 2059                 {
 2060                         uint32_t baddr;
 2061                         uint8_t *reply;
 2062 
 2063                         /*
 2064                          * Re-compose the reply address from the address
 2065                          * sent back from the chip.  The ReplyFrameAddress
 2066                          * is the lower 32 bits of the physical address of
 2067                          * particular reply frame.  Convert that address to
 2068                          * host format, and then use that to provide the
 2069                          * offset against the virtual address base
 2070                          * (sc->reply_frames).
 2071                          */
 2072                         baddr = le32toh(desc->AddressReply.ReplyFrameAddress);
 2073                         reply = sc->reply_frames +
 2074                                 (baddr - ((uint32_t)sc->reply_busaddr));
 2075                         /*
 2076                          * Make sure the reply we got back is in a valid
 2077                          * range.  If not, go ahead and panic here, since
 2078                          * we'll probably panic as soon as we deference the
 2079                          * reply pointer anyway.
 2080                          */
 2081                         if ((reply < sc->reply_frames)
 2082                          || (reply > (sc->reply_frames +
 2083                              (sc->fqdepth * sc->facts->ReplyFrameSize * 4)))) {
 2084                                 printf("%s: WARNING: reply %p out of range!\n",
 2085                                        __func__, reply);
 2086                                 printf("%s: reply_frames %p, fqdepth %d, "
 2087                                        "frame size %d\n", __func__,
 2088                                        sc->reply_frames, sc->fqdepth,
 2089                                        sc->facts->ReplyFrameSize * 4);
 2090                                 printf("%s: baddr %#x,\n", __func__, baddr);
 2091                                 /* LSI-TODO. See Linux Code for Graceful exit */
 2092                                 panic("Reply address out of range");
 2093                         }
 2094                         if (le16toh(desc->AddressReply.SMID) == 0) {
 2095                                 if (((MPI2_DEFAULT_REPLY *)reply)->Function ==
 2096                                     MPI2_FUNCTION_DIAG_BUFFER_POST) {
 2097                                         /*
 2098                                          * If SMID is 0 for Diag Buffer Post,
 2099                                          * this implies that the reply is due to
 2100                                          * a release function with a status that
 2101                                          * the buffer has been released.  Set
 2102                                          * the buffer flags accordingly.
 2103                                          */
 2104                                         rel_rep =
 2105                                             (MPI2_DIAG_RELEASE_REPLY *)reply;
 2106                                         if ((le16toh(rel_rep->IOCStatus) &
 2107                                             MPI2_IOCSTATUS_MASK) ==
 2108                                             MPI2_IOCSTATUS_DIAGNOSTIC_RELEASED)
 2109                                         {
 2110                                                 pBuffer =
 2111                                                     &sc->fw_diag_buffer_list[
 2112                                                     rel_rep->BufferType];
 2113                                                 pBuffer->valid_data = TRUE;
 2114                                                 pBuffer->owned_by_firmware =
 2115                                                     FALSE;
 2116                                                 pBuffer->immediate = FALSE;
 2117                                         }
 2118                                 } else
 2119                                         mpr_dispatch_event(sc, baddr,
 2120                                             (MPI2_EVENT_NOTIFICATION_REPLY *)
 2121                                             reply);
 2122                         } else {
 2123                                 cm = &sc->commands[
 2124                                     le16toh(desc->AddressReply.SMID)];
 2125                                 cm->cm_reply = reply;
 2126                                 cm->cm_reply_data =
 2127                                     le32toh(desc->AddressReply.
 2128                                     ReplyFrameAddress);
 2129                         }
 2130                         break;
 2131                 }
 2132                 case MPI2_RPY_DESCRIPT_FLAGS_TARGETASSIST_SUCCESS:
 2133                 case MPI2_RPY_DESCRIPT_FLAGS_TARGET_COMMAND_BUFFER:
 2134                 case MPI2_RPY_DESCRIPT_FLAGS_RAID_ACCELERATOR_SUCCESS:
 2135                 default:
 2136                         /* Unhandled */
 2137                         mpr_dprint(sc, MPR_ERROR, "Unhandled reply 0x%x\n",
 2138                             desc->Default.ReplyFlags);
 2139                         cm = NULL;
 2140                         break;
 2141                 }
 2142 
 2143                 if (cm != NULL) {
 2144                         // Print Error reply frame
 2145                         if (cm->cm_reply)
 2146                                 mpr_display_reply_info(sc,cm->cm_reply);
 2147                         mpr_complete_command(sc, cm);
 2148                 }
 2149 
 2150                 desc->Words.Low = 0xffffffff;
 2151                 desc->Words.High = 0xffffffff;
 2152         }
 2153 
 2154         if (pq != sc->replypostindex) {
 2155                 mpr_dprint(sc, MPR_TRACE,
 2156                     "%s sc %p writing postindex %d\n",
 2157                     __func__, sc, sc->replypostindex);
 2158                 mpr_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET,
 2159                     sc->replypostindex);
 2160         }
 2161 
 2162         return;
 2163 }
 2164 
 2165 static void
 2166 mpr_dispatch_event(struct mpr_softc *sc, uintptr_t data,
 2167     MPI2_EVENT_NOTIFICATION_REPLY *reply)
 2168 {
 2169         struct mpr_event_handle *eh;
 2170         int event, handled = 0;
 2171 
 2172         event = le16toh(reply->Event);
 2173         TAILQ_FOREACH(eh, &sc->event_list, eh_list) {
 2174                 if (isset(eh->mask, event)) {
 2175                         eh->callback(sc, data, reply);
 2176                         handled++;
 2177                 }
 2178         }
 2179 
 2180         if (handled == 0)
 2181                 mpr_dprint(sc, MPR_EVENT, "Unhandled event 0x%x\n",
 2182                     le16toh(event));
 2183 
 2184         /*
 2185          * This is the only place that the event/reply should be freed.
 2186          * Anything wanting to hold onto the event data should have
 2187          * already copied it into their own storage.
 2188          */
 2189         mpr_free_reply(sc, data);
 2190 }
 2191 
 2192 static void
 2193 mpr_reregister_events_complete(struct mpr_softc *sc, struct mpr_command *cm)
 2194 {
 2195         mpr_dprint(sc, MPR_TRACE, "%s\n", __func__);
 2196 
 2197         if (cm->cm_reply)
 2198                 MPR_DPRINT_EVENT(sc, generic,
 2199                         (MPI2_EVENT_NOTIFICATION_REPLY *)cm->cm_reply);
 2200 
 2201         mpr_free_command(sc, cm);
 2202 
 2203         /* next, send a port enable */
 2204         mprsas_startup(sc);
 2205 }
 2206 
 2207 /*
 2208  * For both register_events and update_events, the caller supplies a bitmap
 2209  * of events that it _wants_.  These functions then turn that into a bitmask
 2210  * suitable for the controller.
 2211  */
 2212 int
 2213 mpr_register_events(struct mpr_softc *sc, uint8_t *mask,
 2214     mpr_evt_callback_t *cb, void *data, struct mpr_event_handle **handle)
 2215 {
 2216         struct mpr_event_handle *eh;
 2217         int error = 0;
 2218 
 2219         eh = malloc(sizeof(struct mpr_event_handle), M_MPR, M_WAITOK|M_ZERO);
 2220         if (!eh) {
 2221                 device_printf(sc->mpr_dev, "Cannot allocate memory %s %d\n",
 2222                     __func__, __LINE__);
 2223                 return (ENOMEM);
 2224         }
 2225         eh->callback = cb;
 2226         eh->data = data;
 2227         TAILQ_INSERT_TAIL(&sc->event_list, eh, eh_list);
 2228         if (mask != NULL)
 2229                 error = mpr_update_events(sc, eh, mask);
 2230         *handle = eh;
 2231 
 2232         return (error);
 2233 }
 2234 
 2235 int
 2236 mpr_update_events(struct mpr_softc *sc, struct mpr_event_handle *handle,
 2237     uint8_t *mask)
 2238 {
 2239         MPI2_EVENT_NOTIFICATION_REQUEST *evtreq;
 2240         MPI2_EVENT_NOTIFICATION_REPLY *reply = NULL;
 2241         struct mpr_command *cm = NULL;
 2242         struct mpr_event_handle *eh;
 2243         int error, i;
 2244 
 2245         mpr_dprint(sc, MPR_TRACE, "%s\n", __func__);
 2246 
 2247         if ((mask != NULL) && (handle != NULL))
 2248                 bcopy(mask, &handle->mask[0], 16);
 2249         memset(sc->event_mask, 0xff, 16);
 2250 
 2251         TAILQ_FOREACH(eh, &sc->event_list, eh_list) {
 2252                 for (i = 0; i < 16; i++)
 2253                         sc->event_mask[i] &= ~eh->mask[i];
 2254         }
 2255 
 2256         if ((cm = mpr_alloc_command(sc)) == NULL)
 2257                 return (EBUSY);
 2258         evtreq = (MPI2_EVENT_NOTIFICATION_REQUEST *)cm->cm_req;
 2259         evtreq->Function = MPI2_FUNCTION_EVENT_NOTIFICATION;
 2260         evtreq->MsgFlags = 0;
 2261         evtreq->SASBroadcastPrimitiveMasks = 0;
 2262 #ifdef MPR_DEBUG_ALL_EVENTS
 2263         {
 2264                 u_char fullmask[16];
 2265                 memset(fullmask, 0x00, 16);
 2266                 bcopy(fullmask, (uint8_t *)&evtreq->EventMasks, 16);
 2267         }
 2268 #else
 2269                 bcopy(sc->event_mask, (uint8_t *)&evtreq->EventMasks, 16);
 2270 #endif
 2271         cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE;
 2272         cm->cm_data = NULL;
 2273 
 2274         error = mpr_request_polled(sc, &cm);
 2275         if (cm != NULL)
 2276                 reply = (MPI2_EVENT_NOTIFICATION_REPLY *)cm->cm_reply;
 2277         if ((reply == NULL) ||
 2278             (reply->IOCStatus & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SUCCESS)
 2279                 error = ENXIO;
 2280         
 2281         if (reply)
 2282                 MPR_DPRINT_EVENT(sc, generic, reply);
 2283 
 2284         mpr_dprint(sc, MPR_TRACE, "%s finished error %d\n", __func__, error);
 2285 
 2286         if (cm != NULL)
 2287                 mpr_free_command(sc, cm);
 2288         return (error);
 2289 }
 2290 
 2291 static int
 2292 mpr_reregister_events(struct mpr_softc *sc)
 2293 {
 2294         MPI2_EVENT_NOTIFICATION_REQUEST *evtreq;
 2295         struct mpr_command *cm;
 2296         struct mpr_event_handle *eh;
 2297         int error, i;
 2298 
 2299         mpr_dprint(sc, MPR_TRACE, "%s\n", __func__);
 2300 
 2301         /* first, reregister events */
 2302 
 2303         memset(sc->event_mask, 0xff, 16);
 2304 
 2305         TAILQ_FOREACH(eh, &sc->event_list, eh_list) {
 2306                 for (i = 0; i < 16; i++)
 2307                         sc->event_mask[i] &= ~eh->mask[i];
 2308         }
 2309 
 2310         if ((cm = mpr_alloc_command(sc)) == NULL)
 2311                 return (EBUSY);
 2312         evtreq = (MPI2_EVENT_NOTIFICATION_REQUEST *)cm->cm_req;
 2313         evtreq->Function = MPI2_FUNCTION_EVENT_NOTIFICATION;
 2314         evtreq->MsgFlags = 0;
 2315         evtreq->SASBroadcastPrimitiveMasks = 0;
 2316 #ifdef MPR_DEBUG_ALL_EVENTS
 2317         {
 2318                 u_char fullmask[16];
 2319                 memset(fullmask, 0x00, 16);
 2320                 bcopy(fullmask, (uint8_t *)&evtreq->EventMasks, 16);
 2321         }
 2322 #else
 2323                 bcopy(sc->event_mask, (uint8_t *)&evtreq->EventMasks, 16);
 2324 #endif
 2325         cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE;
 2326         cm->cm_data = NULL;
 2327         cm->cm_complete = mpr_reregister_events_complete;
 2328 
 2329         error = mpr_map_command(sc, cm);
 2330 
 2331         mpr_dprint(sc, MPR_TRACE, "%s finished with error %d\n", __func__,
 2332             error);
 2333         return (error);
 2334 }
 2335 
 2336 int
 2337 mpr_deregister_events(struct mpr_softc *sc, struct mpr_event_handle *handle)
 2338 {
 2339 
 2340         TAILQ_REMOVE(&sc->event_list, handle, eh_list);
 2341         free(handle, M_MPR);
 2342         return (mpr_update_events(sc, NULL, NULL));
 2343 }
 2344 
 2345 /**
 2346 * mpr_build_nvme_prp - This function is called for NVMe end devices to build a
 2347 * native SGL (NVMe PRP). The native SGL is built starting in the first PRP entry
 2348 * of the NVMe message (PRP1). If the data buffer is small enough to be described
 2349 * entirely using PRP1, then PRP2 is not used. If needed, PRP2 is used to
 2350 * describe a larger data buffer. If the data buffer is too large to describe
 2351 * using the two PRP entriess inside the NVMe message, then PRP1 describes the
 2352 * first data memory segment, and PRP2 contains a pointer to a PRP list located
 2353 * elsewhere in memory to describe the remaining data memory segments. The PRP
 2354 * list will be contiguous.
 2355 
 2356 * The native SGL for NVMe devices is a Physical Region Page (PRP). A PRP
 2357 * consists of a list of PRP entries to describe a number of noncontigous
 2358 * physical memory segments as a single memory buffer, just as a SGL does. Note
 2359 * however, that this function is only used by the IOCTL call, so the memory
 2360 * given will be guaranteed to be contiguous. There is no need to translate
 2361 * non-contiguous SGL into a PRP in this case. All PRPs will describe contiguous
 2362 * space that is one page size each.
 2363 *
 2364 * Each NVMe message contains two PRP entries. The first (PRP1) either contains
 2365 * a PRP list pointer or a PRP element, depending upon the command. PRP2 contains
 2366 * the second PRP element if the memory being described fits within 2 PRP
 2367 * entries, or a PRP list pointer if the PRP spans more than two entries.
 2368 *
 2369 * A PRP list pointer contains the address of a PRP list, structured as a linear
 2370 * array of PRP entries. Each PRP entry in this list describes a segment of
 2371 * physical memory.
 2372 *
 2373 * Each 64-bit PRP entry comprises an address and an offset field. The address
 2374 * always points to the beginning of a PAGE_SIZE physical memory page, and the
 2375 * offset describes where within that page the memory segment begins. Only the
 2376 * first element in a PRP list may contain a non-zero offest, implying that all
 2377 * memory segments following the first begin at the start of a PAGE_SIZE page.
 2378 *
 2379 * Each PRP element normally describes a chunck of PAGE_SIZE physical memory,
 2380 * with exceptions for the first and last elements in the list. If the memory
 2381 * being described by the list begins at a non-zero offset within the first page,
 2382 * then the first PRP element will contain a non-zero offset indicating where the
 2383 * region begins within the page. The last memory segment may end before the end
 2384 * of the PAGE_SIZE segment, depending upon the overall size of the memory being
 2385 * described by the PRP list. 
 2386 *
 2387 * Since PRP entries lack any indication of size, the overall data buffer length
 2388 * is used to determine where the end of the data memory buffer is located, and
 2389 * how many PRP entries are required to describe it.
 2390 *
 2391 * Returns nothing.
 2392 */
 2393 void 
 2394 mpr_build_nvme_prp(struct mpr_softc *sc, struct mpr_command *cm,
 2395     Mpi26NVMeEncapsulatedRequest_t *nvme_encap_request, void *data,
 2396     uint32_t data_in_sz, uint32_t data_out_sz)
 2397 {
 2398         int                     prp_size = PRP_ENTRY_SIZE;
 2399         uint64_t                *prp_entry, *prp1_entry, *prp2_entry;
 2400         uint64_t                *prp_entry_phys, *prp_page, *prp_page_phys;
 2401         uint32_t                offset, entry_len, page_mask_result, page_mask;
 2402         bus_addr_t              paddr;
 2403         size_t                  length;
 2404         struct mpr_prp_page     *prp_page_info = NULL;
 2405 
 2406         /*
 2407          * Not all commands require a data transfer. If no data, just return
 2408          * without constructing any PRP.
 2409          */
 2410         if (!data_in_sz && !data_out_sz)
 2411                 return;
 2412 
 2413         /*
 2414          * Set pointers to PRP1 and PRP2, which are in the NVMe command. PRP1 is
 2415          * located at a 24 byte offset from the start of the NVMe command. Then
 2416          * set the current PRP entry pointer to PRP1.
 2417          */
 2418         prp1_entry = (uint64_t *)(nvme_encap_request->NVMe_Command +
 2419             NVME_CMD_PRP1_OFFSET);
 2420         prp2_entry = (uint64_t *)(nvme_encap_request->NVMe_Command +
 2421             NVME_CMD_PRP2_OFFSET);
 2422         prp_entry = prp1_entry;
 2423 
 2424         /*
 2425          * For the PRP entries, use the specially allocated buffer of
 2426          * contiguous memory. PRP Page allocation failures should not happen
 2427          * because there should be enough PRP page buffers to account for the
 2428          * possible NVMe QDepth.
 2429          */
 2430         prp_page_info = mpr_alloc_prp_page(sc);
 2431         KASSERT(prp_page_info != NULL, ("%s: There are no PRP Pages left to be "
 2432             "used for building a native NVMe SGL.\n", __func__));
 2433         prp_page = (uint64_t *)prp_page_info->prp_page;
 2434         prp_page_phys = (uint64_t *)(uintptr_t)prp_page_info->prp_page_busaddr;
 2435 
 2436         /*
 2437          * Insert the allocated PRP page into the command's PRP page list. This
 2438          * will be freed when the command is freed.
 2439          */
 2440         TAILQ_INSERT_TAIL(&cm->cm_prp_page_list, prp_page_info, prp_page_link);
 2441 
 2442         /*
 2443          * Check if we are within 1 entry of a page boundary we don't want our
 2444          * first entry to be a PRP List entry.
 2445          */
 2446         page_mask = PAGE_SIZE - 1;
 2447         page_mask_result = (uintptr_t)((uint8_t *)prp_page + prp_size) &
 2448             page_mask;
 2449         if (!page_mask_result)
 2450         {
 2451                 /* Bump up to next page boundary. */
 2452                 prp_page = (uint64_t *)((uint8_t *)prp_page + prp_size);
 2453                 prp_page_phys = (uint64_t *)((uint8_t *)prp_page_phys +
 2454                     prp_size);
 2455         }
 2456 
 2457         /*
 2458          * Set PRP physical pointer, which initially points to the current PRP
 2459          * DMA memory page.
 2460          */
 2461         prp_entry_phys = prp_page_phys;
 2462 
 2463         /* Get physical address and length of the data buffer. */
 2464         paddr = (bus_addr_t)data;
 2465         if (data_in_sz)
 2466                 length = data_in_sz;
 2467         else
 2468                 length = data_out_sz;
 2469 
 2470         /* Loop while the length is not zero. */
 2471         while (length)
 2472         {
 2473                 /*
 2474                  * Check if we need to put a list pointer here if we are at page
 2475                  * boundary - prp_size (8 bytes).
 2476                  */
 2477                 page_mask_result = (uintptr_t)((uint8_t *)prp_entry_phys +
 2478                     prp_size) & page_mask;
 2479                 if (!page_mask_result)
 2480                 {
 2481                         /*
 2482                          * This is the last entry in a PRP List, so we need to
 2483                          * put a PRP list pointer here. What this does is:
 2484                          *   - bump the current memory pointer to the next
 2485                          *     address, which will be the next full page.
 2486                          *   - set the PRP Entry to point to that page. This is
 2487                          *     now the PRP List pointer.
 2488                          *   - bump the PRP Entry pointer the start of the next
 2489                          *     page. Since all of this PRP memory is contiguous,
 2490                          *     no need to get a new page - it's just the next
 2491                          *     address.
 2492                          */
 2493                         prp_entry_phys++;
 2494                         *prp_entry =
 2495                             htole64((uint64_t)(uintptr_t)prp_entry_phys);
 2496                         prp_entry++;
 2497                 }
 2498 
 2499                 /* Need to handle if entry will be part of a page. */
 2500                 offset = (uint32_t)paddr & page_mask;
 2501                 entry_len = PAGE_SIZE - offset;
 2502 
 2503                 if (prp_entry == prp1_entry)
 2504                 {
 2505                         /*
 2506                          * Must fill in the first PRP pointer (PRP1) before
 2507                          * moving on.
 2508                          */
 2509                         *prp1_entry = htole64((uint64_t)paddr);
 2510 
 2511                         /*
 2512                          * Now point to the second PRP entry within the
 2513                          * command (PRP2).
 2514                          */
 2515                         prp_entry = prp2_entry;
 2516                 }
 2517                 else if (prp_entry == prp2_entry)
 2518                 {
 2519                         /*
 2520                          * Should the PRP2 entry be a PRP List pointer or just a
 2521                          * regular PRP pointer? If there is more than one more
 2522                          * page of data, must use a PRP List pointer.
 2523                          */
 2524                         if (length > PAGE_SIZE)
 2525                         {
 2526                                 /*
 2527                                  * PRP2 will contain a PRP List pointer because
 2528                                  * more PRP's are needed with this command. The
 2529                                  * list will start at the beginning of the
 2530                                  * contiguous buffer.
 2531                                  */
 2532                                 *prp2_entry =
 2533                                     htole64(
 2534                                     (uint64_t)(uintptr_t)prp_entry_phys);
 2535 
 2536                                 /*
 2537                                  * The next PRP Entry will be the start of the
 2538                                  * first PRP List.
 2539                                  */
 2540                                 prp_entry = prp_page;
 2541                         }
 2542                         else
 2543                         {
 2544                                 /*
 2545                                  * After this, the PRP Entries are complete.
 2546                                  * This command uses 2 PRP's and no PRP list.
 2547                                  */
 2548                                 *prp2_entry = htole64((uint64_t)paddr);
 2549                         }
 2550                 }
 2551                 else
 2552                 {
 2553                         /*
 2554                          * Put entry in list and bump the addresses.
 2555                          *
 2556                          * After PRP1 and PRP2 are filled in, this will fill in
 2557                          * all remaining PRP entries in a PRP List, one per each
 2558                          * time through the loop.
 2559                          */
 2560                         *prp_entry = htole64((uint64_t)paddr);
 2561                         prp_entry++;
 2562                         prp_entry_phys++;
 2563                 }
 2564 
 2565                 /*
 2566                  * Bump the phys address of the command's data buffer by the
 2567                  * entry_len.
 2568                  */
 2569                 paddr += entry_len;
 2570 
 2571                 /* Decrement length accounting for last partial page. */
 2572                 if (entry_len > length)
 2573                         length = 0;
 2574                 else
 2575                         length -= entry_len;
 2576         }
 2577 }
 2578 
 2579 /*
 2580  * mpr_check_pcie_native_sgl - This function is called for PCIe end devices to
 2581  * determine if the driver needs to build a native SGL. If so, that native SGL
 2582  * is built in the contiguous buffers allocated especially for PCIe SGL
 2583  * creation. If the driver will not build a native SGL, return TRUE and a
 2584  * normal IEEE SGL will be built. Currently this routine supports NVMe devices
 2585  * only.
 2586  *
 2587  * Returns FALSE (0) if native SGL was built, TRUE (1) if no SGL was built.
 2588  */
 2589 static int
 2590 mpr_check_pcie_native_sgl(struct mpr_softc *sc, struct mpr_command *cm,
 2591     bus_dma_segment_t *segs, int segs_left)
 2592 {
 2593         uint32_t                i, sge_dwords, length, offset, entry_len;
 2594         uint32_t                num_entries, buff_len = 0, sges_in_segment;
 2595         uint32_t                page_mask, page_mask_result, *curr_buff;
 2596         uint32_t                *ptr_sgl, *ptr_first_sgl, first_page_offset;
 2597         uint32_t                first_page_data_size, end_residual;
 2598         uint64_t                *msg_phys;
 2599         bus_addr_t              paddr;
 2600         int                     build_native_sgl = 0, first_prp_entry;
 2601         int                     prp_size = PRP_ENTRY_SIZE;
 2602         Mpi25IeeeSgeChain64_t   *main_chain_element = NULL;
 2603         struct mpr_prp_page     *prp_page_info = NULL;
 2604 
 2605         mpr_dprint(sc, MPR_TRACE, "%s\n", __func__);
 2606 
 2607         /*
 2608          * Add up the sizes of each segment length to get the total transfer
 2609          * size, which will be checked against the Maximum Data Transfer Size.
 2610          * If the data transfer length exceeds the MDTS for this device, just
 2611          * return 1 so a normal IEEE SGL will be built. F/W will break the I/O
 2612          * up into multiple I/O's. [nvme_mdts = 0 means unlimited]
 2613          */
 2614         for (i = 0; i < segs_left; i++)
 2615                 buff_len += htole32(segs[i].ds_len);
 2616         if ((cm->cm_targ->MDTS > 0) && (buff_len > cm->cm_targ->MDTS))
 2617                 return 1;
 2618 
 2619         /* Create page_mask (to get offset within page) */
 2620         page_mask = PAGE_SIZE - 1;
 2621 
 2622         /*
 2623          * Check if the number of elements exceeds the max number that can be
 2624          * put in the main message frame (H/W can only translate an SGL that
 2625          * is contained entirely in the main message frame).
 2626          */
 2627         sges_in_segment = (sc->facts->IOCRequestFrameSize -
 2628             offsetof(Mpi25SCSIIORequest_t, SGL)) / sizeof(MPI25_SGE_IO_UNION);
 2629         if (segs_left > sges_in_segment)
 2630                 build_native_sgl = 1;
 2631         else
 2632         {
 2633                 /*
 2634                  * NVMe uses one PRP for each physical page (or part of physical
 2635                  * page).
 2636                  *    if 4 pages or less then IEEE is OK
 2637                  *    if > 5 pages then we need to build a native SGL
 2638                  *    if > 4 and <= 5 pages, then check the physical address of
 2639                  *      the first SG entry, then if this first size in the page
 2640                  *      is >= the residual beyond 4 pages then use IEEE,
 2641                  *      otherwise use native SGL
 2642                  */
 2643                 if (buff_len > (PAGE_SIZE * 5))
 2644                         build_native_sgl = 1;
 2645                 else if ((buff_len > (PAGE_SIZE * 4)) &&
 2646                     (buff_len <= (PAGE_SIZE * 5)) )
 2647                 {
 2648                         msg_phys = (uint64_t *)segs[0].ds_addr;
 2649                         first_page_offset =
 2650                             ((uint32_t)(uint64_t)(uintptr_t)msg_phys &
 2651                             page_mask);
 2652                         first_page_data_size = PAGE_SIZE - first_page_offset;
 2653                         end_residual = buff_len % PAGE_SIZE;
 2654 
 2655                         /*
 2656                          * If offset into first page pushes the end of the data
 2657                          * beyond end of the 5th page, we need the extra PRP
 2658                          * list.
 2659                          */
 2660                         if (first_page_data_size < end_residual)
 2661                                 build_native_sgl = 1;
 2662 
 2663                         /*
 2664                          * Check if first SG entry size is < residual beyond 4
 2665                          * pages.
 2666                          */
 2667                         if (htole32(segs[0].ds_len) <
 2668                             (buff_len - (PAGE_SIZE * 4)))
 2669                                 build_native_sgl = 1;
 2670                 }
 2671         }
 2672 
 2673         /* check if native SGL is needed */
 2674         if (!build_native_sgl)
 2675                 return 1;
 2676 
 2677         /*
 2678          * Native SGL is needed.
 2679          * Put a chain element in main message frame that points to the first
 2680          * chain buffer.
 2681          *
 2682          * NOTE:  The ChainOffset field must be 0 when using a chain pointer to
 2683          *        a native SGL.
 2684          */
 2685 
 2686         /* Set main message chain element pointer */
 2687         main_chain_element = (pMpi25IeeeSgeChain64_t)cm->cm_sge;
 2688 
 2689         /*
 2690          * For NVMe the chain element needs to be the 2nd SGL entry in the main
 2691          * message.
 2692          */
 2693         main_chain_element = (Mpi25IeeeSgeChain64_t *)
 2694             ((uint8_t *)main_chain_element + sizeof(MPI25_IEEE_SGE_CHAIN64));
 2695 
 2696         /*
 2697          * For the PRP entries, use the specially allocated buffer of
 2698          * contiguous memory. PRP Page allocation failures should not happen
 2699          * because there should be enough PRP page buffers to account for the
 2700          * possible NVMe QDepth.
 2701          */
 2702         prp_page_info = mpr_alloc_prp_page(sc);
 2703         KASSERT(prp_page_info != NULL, ("%s: There are no PRP Pages left to be "
 2704             "used for building a native NVMe SGL.\n", __func__));
 2705         curr_buff = (uint32_t *)prp_page_info->prp_page;
 2706         msg_phys = (uint64_t *)(uintptr_t)prp_page_info->prp_page_busaddr;
 2707 
 2708         /*
 2709          * Insert the allocated PRP page into the command's PRP page list. This
 2710          * will be freed when the command is freed.
 2711          */
 2712         TAILQ_INSERT_TAIL(&cm->cm_prp_page_list, prp_page_info, prp_page_link);
 2713 
 2714         /*
 2715          * Check if we are within 1 entry of a page boundary we don't want our
 2716          * first entry to be a PRP List entry.
 2717          */
 2718         page_mask_result = (uintptr_t)((uint8_t *)curr_buff + prp_size) &
 2719             page_mask;
 2720         if (!page_mask_result) {
 2721                 /* Bump up to next page boundary. */
 2722                 curr_buff = (uint32_t *)((uint8_t *)curr_buff + prp_size);
 2723                 msg_phys = (uint64_t *)((uint8_t *)msg_phys + prp_size);
 2724         }
 2725 
 2726         /* Fill in the chain element and make it an NVMe segment type. */
 2727         main_chain_element->Address.High =
 2728             htole32((uint32_t)((uint64_t)(uintptr_t)msg_phys >> 32));
 2729         main_chain_element->Address.Low =
 2730             htole32((uint32_t)(uintptr_t)msg_phys);
 2731         main_chain_element->NextChainOffset = 0;
 2732         main_chain_element->Flags = MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT |
 2733             MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR |
 2734             MPI26_IEEE_SGE_FLAGS_NSF_NVME_PRP;
 2735 
 2736         /* Set SGL pointer to start of contiguous PCIe buffer. */
 2737         ptr_sgl = curr_buff;
 2738         sge_dwords = 2;
 2739         num_entries = 0;
 2740 
 2741         /*
 2742          * NVMe has a very convoluted PRP format. One PRP is required for each
 2743          * page or partial page. We need to split up OS SG entries if they are
 2744          * longer than one page or cross a page boundary. We also have to insert
 2745          * a PRP list pointer entry as the last entry in each physical page of
 2746          * the PRP list.
 2747          *
 2748          * NOTE: The first PRP "entry" is actually placed in the first SGL entry
 2749          * in the main message in IEEE 64 format. The 2nd entry in the main
 2750          * message is the chain element, and the rest of the PRP entries are
 2751          * built in the contiguous PCIe buffer.
 2752          */
 2753         first_prp_entry = 1;
 2754         ptr_first_sgl = (uint32_t *)cm->cm_sge;
 2755 
 2756         for (i = 0; i < segs_left; i++) {
 2757                 /* Get physical address and length of this SG entry. */
 2758                 paddr = segs[i].ds_addr;
 2759                 length = segs[i].ds_len;
 2760 
 2761                 /*
 2762                  * Check whether a given SGE buffer lies on a non-PAGED
 2763                  * boundary if this is not the first page. If so, this is not
 2764                  * expected so have FW build the SGL.
 2765                  */
 2766                 if (i) {
 2767                         if ((uint32_t)paddr & page_mask) {
 2768                                 mpr_dprint(sc, MPR_ERROR, "Unaligned SGE while "
 2769                                     "building NVMe PRPs, low address is 0x%x\n",
 2770                                     (uint32_t)paddr);
 2771                                 return 1;
 2772                         }
 2773                 }
 2774 
 2775                 /* Apart from last SGE, if any other SGE boundary is not page
 2776                  * aligned then it means that hole exists. Existence of hole
 2777                  * leads to data corruption. So fallback to IEEE SGEs.
 2778                  */
 2779                 if (i != (segs_left - 1)) {
 2780                         if (((uint32_t)paddr + length) & page_mask) {
 2781                                 mpr_dprint(sc, MPR_ERROR, "Unaligned SGE "
 2782                                     "boundary while building NVMe PRPs, low "
 2783                                     "address: 0x%x and length: %u\n",
 2784                                     (uint32_t)paddr, length);
 2785                                 return 1;
 2786                         }
 2787                 }
 2788 
 2789                 /* Loop while the length is not zero. */
 2790                 while (length) {
 2791                         /*
 2792                          * Check if we need to put a list pointer here if we are
 2793                          * at page boundary - prp_size.
 2794                          */
 2795                         page_mask_result = (uintptr_t)((uint8_t *)ptr_sgl +
 2796                             prp_size) & page_mask;
 2797                         if (!page_mask_result) {
 2798                                 /*
 2799                                  * Need to put a PRP list pointer here.
 2800                                  */
 2801                                 msg_phys = (uint64_t *)((uint8_t *)msg_phys +
 2802                                     prp_size);
 2803                                 *ptr_sgl = htole32((uintptr_t)msg_phys);
 2804                                 *(ptr_sgl+1) = htole32((uint64_t)(uintptr_t)
 2805                                     msg_phys >> 32);
 2806                                 ptr_sgl += sge_dwords;
 2807                                 num_entries++;
 2808                         }
 2809 
 2810                         /* Need to handle if entry will be part of a page. */
 2811                         offset = (uint32_t)paddr & page_mask;
 2812                         entry_len = PAGE_SIZE - offset;
 2813                         if (first_prp_entry) {
 2814                                 /*
 2815                                  * Put IEEE entry in first SGE in main message.
 2816                                  * (Simple element, System addr, not end of
 2817                                  * list.)
 2818                                  */
 2819                                 *ptr_first_sgl = htole32((uint32_t)paddr);
 2820                                 *(ptr_first_sgl + 1) =
 2821                                     htole32((uint32_t)((uint64_t)paddr >> 32));
 2822                                 *(ptr_first_sgl + 2) = htole32(entry_len);
 2823                                 *(ptr_first_sgl + 3) = 0;
 2824 
 2825                                 /* No longer the first PRP entry. */
 2826                                 first_prp_entry = 0;
 2827                         } else {
 2828                                 /* Put entry in list. */
 2829                                 *ptr_sgl = htole32((uint32_t)paddr);
 2830                                 *(ptr_sgl + 1) =
 2831                                     htole32((uint32_t)((uint64_t)paddr >> 32));
 2832 
 2833                                 /* Bump ptr_sgl, msg_phys, and num_entries. */
 2834                                 ptr_sgl += sge_dwords;
 2835                                 msg_phys = (uint64_t *)((uint8_t *)msg_phys +
 2836                                     prp_size);
 2837                                 num_entries++;
 2838                         }
 2839 
 2840                         /* Bump the phys address by the entry_len. */
 2841                         paddr += entry_len;
 2842 
 2843                         /* Decrement length accounting for last partial page. */
 2844                         if (entry_len > length)
 2845                                 length = 0;
 2846                         else
 2847                                 length -= entry_len;
 2848                 }
 2849         }
 2850 
 2851         /* Set chain element Length. */
 2852         main_chain_element->Length = htole32(num_entries * prp_size);
 2853 
 2854         /* Return 0, indicating we built a native SGL. */
 2855         return 0;
 2856 }
 2857 
 2858 /*
 2859  * Add a chain element as the next SGE for the specified command.
 2860  * Reset cm_sge and cm_sgesize to indicate all the available space. Chains are
 2861  * only required for IEEE commands.  Therefore there is no code for commands
 2862  * that have the MPR_CM_FLAGS_SGE_SIMPLE flag set (and those commands
 2863  * shouldn't be requesting chains).
 2864  */
 2865 static int
 2866 mpr_add_chain(struct mpr_command *cm, int segsleft)
 2867 {
 2868         struct mpr_softc *sc = cm->cm_sc;
 2869         MPI2_REQUEST_HEADER *req;
 2870         MPI25_IEEE_SGE_CHAIN64 *ieee_sgc;
 2871         struct mpr_chain *chain;
 2872         int sgc_size, current_segs, rem_segs, segs_per_frame;
 2873         uint8_t next_chain_offset = 0;
 2874 
 2875         /*
 2876          * Fail if a command is requesting a chain for SIMPLE SGE's.  For SAS3
 2877          * only IEEE commands should be requesting chains.  Return some error
 2878          * code other than 0.
 2879          */
 2880         if (cm->cm_flags & MPR_CM_FLAGS_SGE_SIMPLE) {
 2881                 mpr_dprint(sc, MPR_ERROR, "A chain element cannot be added to "
 2882                     "an MPI SGL.\n");
 2883                 return(ENOBUFS);
 2884         }
 2885 
 2886         sgc_size = sizeof(MPI25_IEEE_SGE_CHAIN64);
 2887         if (cm->cm_sglsize < sgc_size)
 2888                 panic("MPR: Need SGE Error Code\n");
 2889 
 2890         chain = mpr_alloc_chain(cm->cm_sc);
 2891         if (chain == NULL)
 2892                 return (ENOBUFS);
 2893 
 2894         /*
 2895          * Note: a double-linked list is used to make it easier to walk for
 2896          * debugging.
 2897          */
 2898         TAILQ_INSERT_TAIL(&cm->cm_chain_list, chain, chain_link);
 2899 
 2900         /*
 2901          * Need to know if the number of frames left is more than 1 or not.  If
 2902          * more than 1 frame is required, NextChainOffset will need to be set,
 2903          * which will just be the last segment of the frame.
 2904          */
 2905         rem_segs = 0;
 2906         if (cm->cm_sglsize < (sgc_size * segsleft)) {
 2907                 /*
 2908                  * rem_segs is the number of segements remaining after the
 2909                  * segments that will go into the current frame.  Since it is
 2910                  * known that at least one more frame is required, account for
 2911                  * the chain element.  To know if more than one more frame is
 2912                  * required, just check if there will be a remainder after using
 2913                  * the current frame (with this chain) and the next frame.  If
 2914                  * so the NextChainOffset must be the last element of the next
 2915                  * frame.
 2916                  */
 2917                 current_segs = (cm->cm_sglsize / sgc_size) - 1;
 2918                 rem_segs = segsleft - current_segs;
 2919                 segs_per_frame = sc->chain_frame_size / sgc_size;
 2920                 if (rem_segs > segs_per_frame) {
 2921                         next_chain_offset = segs_per_frame - 1;
 2922                 }
 2923         }
 2924         ieee_sgc = &((MPI25_SGE_IO_UNION *)cm->cm_sge)->IeeeChain;
 2925         ieee_sgc->Length = next_chain_offset ?
 2926             htole32((uint32_t)sc->chain_frame_size) :
 2927             htole32((uint32_t)rem_segs * (uint32_t)sgc_size);
 2928         ieee_sgc->NextChainOffset = next_chain_offset;
 2929         ieee_sgc->Flags = (MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT |
 2930             MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR);
 2931         ieee_sgc->Address.Low = htole32(chain->chain_busaddr);
 2932         ieee_sgc->Address.High = htole32(chain->chain_busaddr >> 32);
 2933         cm->cm_sge = &((MPI25_SGE_IO_UNION *)chain->chain)->IeeeSimple;
 2934         req = (MPI2_REQUEST_HEADER *)cm->cm_req;
 2935         req->ChainOffset = (sc->chain_frame_size - sgc_size) >> 4;
 2936 
 2937         cm->cm_sglsize = sc->chain_frame_size;
 2938         return (0);
 2939 }
 2940 
 2941 /*
 2942  * Add one scatter-gather element to the scatter-gather list for a command.
 2943  * Maintain cm_sglsize and cm_sge as the remaining size and pointer to the
 2944  * next SGE to fill in, respectively.  In Gen3, the MPI SGL does not have a
 2945  * chain, so don't consider any chain additions.
 2946  */
 2947 int
 2948 mpr_push_sge(struct mpr_command *cm, MPI2_SGE_SIMPLE64 *sge, size_t len,
 2949     int segsleft)
 2950 {
 2951         uint32_t saved_buf_len, saved_address_low, saved_address_high;
 2952         u32 sge_flags;
 2953 
 2954         /*
 2955          * case 1: >=1 more segment, no room for anything (error)
 2956          * case 2: 1 more segment and enough room for it
 2957          */
 2958 
 2959         if (cm->cm_sglsize < (segsleft * sizeof(MPI2_SGE_SIMPLE64))) {
 2960                 mpr_dprint(cm->cm_sc, MPR_ERROR,
 2961                     "%s: warning: Not enough room for MPI SGL in frame.\n",
 2962                     __func__);
 2963                 return(ENOBUFS);
 2964         }
 2965 
 2966         KASSERT(segsleft == 1,
 2967             ("segsleft cannot be more than 1 for an MPI SGL; segsleft = %d\n",
 2968             segsleft));
 2969 
 2970         /*
 2971          * There is one more segment left to add for the MPI SGL and there is
 2972          * enough room in the frame to add it.  This is the normal case because
 2973          * MPI SGL's don't have chains, otherwise something is wrong.
 2974          *
 2975          * If this is a bi-directional request, need to account for that
 2976          * here.  Save the pre-filled sge values.  These will be used
 2977          * either for the 2nd SGL or for a single direction SGL.  If
 2978          * cm_out_len is non-zero, this is a bi-directional request, so
 2979          * fill in the OUT SGL first, then the IN SGL, otherwise just
 2980          * fill in the IN SGL.  Note that at this time, when filling in
 2981          * 2 SGL's for a bi-directional request, they both use the same
 2982          * DMA buffer (same cm command).
 2983          */
 2984         saved_buf_len = sge->FlagsLength & 0x00FFFFFF;
 2985         saved_address_low = sge->Address.Low;
 2986         saved_address_high = sge->Address.High;
 2987         if (cm->cm_out_len) {
 2988                 sge->FlagsLength = cm->cm_out_len |
 2989                     ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
 2990                     MPI2_SGE_FLAGS_END_OF_BUFFER |
 2991                     MPI2_SGE_FLAGS_HOST_TO_IOC |
 2992                     MPI2_SGE_FLAGS_64_BIT_ADDRESSING) <<
 2993                     MPI2_SGE_FLAGS_SHIFT);
 2994                 cm->cm_sglsize -= len;
 2995                 /* Endian Safe code */
 2996                 sge_flags = sge->FlagsLength;
 2997                 sge->FlagsLength = htole32(sge_flags);
 2998                 sge->Address.High = htole32(sge->Address.High); 
 2999                 sge->Address.Low = htole32(sge->Address.Low);
 3000                 bcopy(sge, cm->cm_sge, len);
 3001                 cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len);
 3002         }
 3003         sge->FlagsLength = saved_buf_len |
 3004             ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
 3005             MPI2_SGE_FLAGS_END_OF_BUFFER |
 3006             MPI2_SGE_FLAGS_LAST_ELEMENT |
 3007             MPI2_SGE_FLAGS_END_OF_LIST |
 3008             MPI2_SGE_FLAGS_64_BIT_ADDRESSING) <<
 3009             MPI2_SGE_FLAGS_SHIFT);
 3010         if (cm->cm_flags & MPR_CM_FLAGS_DATAIN) {
 3011                 sge->FlagsLength |=
 3012                     ((uint32_t)(MPI2_SGE_FLAGS_IOC_TO_HOST) <<
 3013                     MPI2_SGE_FLAGS_SHIFT);
 3014         } else {
 3015                 sge->FlagsLength |=
 3016                     ((uint32_t)(MPI2_SGE_FLAGS_HOST_TO_IOC) <<
 3017                     MPI2_SGE_FLAGS_SHIFT);
 3018         }
 3019         sge->Address.Low = saved_address_low;
 3020         sge->Address.High = saved_address_high;
 3021 
 3022         cm->cm_sglsize -= len;
 3023         /* Endian Safe code */
 3024         sge_flags = sge->FlagsLength;
 3025         sge->FlagsLength = htole32(sge_flags);
 3026         sge->Address.High = htole32(sge->Address.High); 
 3027         sge->Address.Low = htole32(sge->Address.Low);
 3028         bcopy(sge, cm->cm_sge, len);
 3029         cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len);
 3030         return (0);
 3031 }
 3032 
 3033 /*
 3034  * Add one IEEE scatter-gather element (chain or simple) to the IEEE scatter-
 3035  * gather list for a command.  Maintain cm_sglsize and cm_sge as the
 3036  * remaining size and pointer to the next SGE to fill in, respectively.
 3037  */
 3038 int
 3039 mpr_push_ieee_sge(struct mpr_command *cm, void *sgep, int segsleft)
 3040 {
 3041         MPI2_IEEE_SGE_SIMPLE64 *sge = sgep;
 3042         int error, ieee_sge_size = sizeof(MPI25_SGE_IO_UNION);
 3043         uint32_t saved_buf_len, saved_address_low, saved_address_high;
 3044         uint32_t sge_length;
 3045 
 3046         /*
 3047          * case 1: No room for chain or segment (error).
 3048          * case 2: Two or more segments left but only room for chain.
 3049          * case 3: Last segment and room for it, so set flags.
 3050          */
 3051 
 3052         /*
 3053          * There should be room for at least one element, or there is a big
 3054          * problem.
 3055          */
 3056         if (cm->cm_sglsize < ieee_sge_size)
 3057                 panic("MPR: Need SGE Error Code\n");
 3058 
 3059         if ((segsleft >= 2) && (cm->cm_sglsize < (ieee_sge_size * 2))) {
 3060                 if ((error = mpr_add_chain(cm, segsleft)) != 0)
 3061                         return (error);
 3062         }
 3063 
 3064         if (segsleft == 1) {
 3065                 /*
 3066                  * If this is a bi-directional request, need to account for that
 3067                  * here.  Save the pre-filled sge values.  These will be used
 3068                  * either for the 2nd SGL or for a single direction SGL.  If
 3069                  * cm_out_len is non-zero, this is a bi-directional request, so
 3070                  * fill in the OUT SGL first, then the IN SGL, otherwise just
 3071                  * fill in the IN SGL.  Note that at this time, when filling in
 3072                  * 2 SGL's for a bi-directional request, they both use the same
 3073                  * DMA buffer (same cm command).
 3074                  */
 3075                 saved_buf_len = sge->Length;
 3076                 saved_address_low = sge->Address.Low;
 3077                 saved_address_high = sge->Address.High;
 3078                 if (cm->cm_out_len) {
 3079                         sge->Length = cm->cm_out_len;
 3080                         sge->Flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT |
 3081                             MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR);
 3082                         cm->cm_sglsize -= ieee_sge_size;
 3083                         /* Endian Safe code */
 3084                         sge_length = sge->Length;
 3085                         sge->Length = htole32(sge_length);
 3086                         sge->Address.High = htole32(sge->Address.High); 
 3087                         sge->Address.Low = htole32(sge->Address.Low);
 3088                         bcopy(sgep, cm->cm_sge, ieee_sge_size);
 3089                         cm->cm_sge =
 3090                             (MPI25_SGE_IO_UNION *)((uintptr_t)cm->cm_sge +
 3091                             ieee_sge_size);
 3092                 }
 3093                 sge->Length = saved_buf_len;
 3094                 sge->Flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT |
 3095                     MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR |
 3096                     MPI25_IEEE_SGE_FLAGS_END_OF_LIST);
 3097                 sge->Address.Low = saved_address_low;
 3098                 sge->Address.High = saved_address_high;
 3099         }
 3100 
 3101         cm->cm_sglsize -= ieee_sge_size;
 3102         /* Endian Safe code */
 3103         sge_length = sge->Length;
 3104         sge->Length = htole32(sge_length);
 3105         sge->Address.High = htole32(sge->Address.High); 
 3106         sge->Address.Low = htole32(sge->Address.Low);
 3107         bcopy(sgep, cm->cm_sge, ieee_sge_size);
 3108         cm->cm_sge = (MPI25_SGE_IO_UNION *)((uintptr_t)cm->cm_sge +
 3109             ieee_sge_size);
 3110         return (0);
 3111 }
 3112 
 3113 /*
 3114  * Add one dma segment to the scatter-gather list for a command.
 3115  */
 3116 int
 3117 mpr_add_dmaseg(struct mpr_command *cm, vm_paddr_t pa, size_t len, u_int flags,
 3118     int segsleft)
 3119 {
 3120         MPI2_SGE_SIMPLE64 sge;
 3121         MPI2_IEEE_SGE_SIMPLE64 ieee_sge;
 3122 
 3123         if (!(cm->cm_flags & MPR_CM_FLAGS_SGE_SIMPLE)) {
 3124                 ieee_sge.Flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT |
 3125                     MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR);
 3126                 ieee_sge.Length = len;
 3127                 mpr_from_u64(pa, &ieee_sge.Address);
 3128 
 3129                 return (mpr_push_ieee_sge(cm, &ieee_sge, segsleft));
 3130         } else {
 3131                 /*
 3132                  * This driver always uses 64-bit address elements for
 3133                  * simplicity.
 3134                  */
 3135                 flags |= MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
 3136                     MPI2_SGE_FLAGS_64_BIT_ADDRESSING;
 3137                 /* Set Endian safe macro in mpr_push_sge */
 3138                 sge.FlagsLength = len | (flags << MPI2_SGE_FLAGS_SHIFT);
 3139                 mpr_from_u64(pa, &sge.Address);
 3140 
 3141                 return (mpr_push_sge(cm, &sge, sizeof sge, segsleft));
 3142         }
 3143 }
 3144 
 3145 static void
 3146 mpr_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 3147 {
 3148         struct mpr_softc *sc;
 3149         struct mpr_command *cm;
 3150         u_int i, dir, sflags;
 3151 
 3152         cm = (struct mpr_command *)arg;
 3153         sc = cm->cm_sc;
 3154 
 3155         /*
 3156          * In this case, just print out a warning and let the chip tell the
 3157          * user they did the wrong thing.
 3158          */
 3159         if ((cm->cm_max_segs != 0) && (nsegs > cm->cm_max_segs)) {
 3160                 mpr_dprint(sc, MPR_ERROR, "%s: warning: busdma returned %d "
 3161                     "segments, more than the %d allowed\n", __func__, nsegs,
 3162                     cm->cm_max_segs);
 3163         }
 3164 
 3165         /*
 3166          * Set up DMA direction flags.  Bi-directional requests are also handled
 3167          * here.  In that case, both direction flags will be set.
 3168          */
 3169         sflags = 0;
 3170         if (cm->cm_flags & MPR_CM_FLAGS_SMP_PASS) {
 3171                 /*
 3172                  * We have to add a special case for SMP passthrough, there
 3173                  * is no easy way to generically handle it.  The first
 3174                  * S/G element is used for the command (therefore the
 3175                  * direction bit needs to be set).  The second one is used
 3176                  * for the reply.  We'll leave it to the caller to make
 3177                  * sure we only have two buffers.
 3178                  */
 3179                 /*
 3180                  * Even though the busdma man page says it doesn't make
 3181                  * sense to have both direction flags, it does in this case.
 3182                  * We have one s/g element being accessed in each direction.
 3183                  */
 3184                 dir = BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD;
 3185 
 3186                 /*
 3187                  * Set the direction flag on the first buffer in the SMP
 3188                  * passthrough request.  We'll clear it for the second one.
 3189                  */
 3190                 sflags |= MPI2_SGE_FLAGS_DIRECTION |
 3191                           MPI2_SGE_FLAGS_END_OF_BUFFER;
 3192         } else if (cm->cm_flags & MPR_CM_FLAGS_DATAOUT) {
 3193                 sflags |= MPI2_SGE_FLAGS_HOST_TO_IOC;
 3194                 dir = BUS_DMASYNC_PREWRITE;
 3195         } else
 3196                 dir = BUS_DMASYNC_PREREAD;
 3197 
 3198         /* Check if a native SG list is needed for an NVMe PCIe device. */
 3199         if (cm->cm_targ && cm->cm_targ->is_nvme &&
 3200             mpr_check_pcie_native_sgl(sc, cm, segs, nsegs) == 0) {
 3201                 /* A native SG list was built, skip to end. */
 3202                 goto out;
 3203         }
 3204 
 3205         for (i = 0; i < nsegs; i++) {
 3206                 if ((cm->cm_flags & MPR_CM_FLAGS_SMP_PASS) && (i != 0)) {
 3207                         sflags &= ~MPI2_SGE_FLAGS_DIRECTION;
 3208                 }
 3209                 error = mpr_add_dmaseg(cm, segs[i].ds_addr, segs[i].ds_len,
 3210                     sflags, nsegs - i);
 3211                 if (error != 0) {
 3212                         /* Resource shortage, roll back! */
 3213                         if (ratecheck(&sc->lastfail, &mpr_chainfail_interval))
 3214                                 mpr_dprint(sc, MPR_INFO, "Out of chain frames, "
 3215                                     "consider increasing hw.mpr.max_chains.\n");
 3216                         cm->cm_flags |= MPR_CM_FLAGS_CHAIN_FAILED;
 3217                         mpr_complete_command(sc, cm);
 3218                         return;
 3219                 }
 3220         }
 3221 
 3222 out:
 3223         bus_dmamap_sync(sc->buffer_dmat, cm->cm_dmamap, dir);
 3224         mpr_enqueue_request(sc, cm);
 3225 
 3226         return;
 3227 }
 3228 
 3229 static void
 3230 mpr_data_cb2(void *arg, bus_dma_segment_t *segs, int nsegs, bus_size_t mapsize,
 3231              int error)
 3232 {
 3233         mpr_data_cb(arg, segs, nsegs, error);
 3234 }
 3235 
 3236 /*
 3237  * This is the routine to enqueue commands ansynchronously.
 3238  * Note that the only error path here is from bus_dmamap_load(), which can
 3239  * return EINPROGRESS if it is waiting for resources.  Other than this, it's
 3240  * assumed that if you have a command in-hand, then you have enough credits
 3241  * to use it.
 3242  */
 3243 int
 3244 mpr_map_command(struct mpr_softc *sc, struct mpr_command *cm)
 3245 {
 3246         int error = 0;
 3247 
 3248         if (cm->cm_flags & MPR_CM_FLAGS_USE_UIO) {
 3249                 error = bus_dmamap_load_uio(sc->buffer_dmat, cm->cm_dmamap,
 3250                     &cm->cm_uio, mpr_data_cb2, cm, 0);
 3251         } else if (cm->cm_flags & MPR_CM_FLAGS_USE_CCB) {
 3252                 error = bus_dmamap_load_ccb(sc->buffer_dmat, cm->cm_dmamap,
 3253                     cm->cm_data, mpr_data_cb, cm, 0);
 3254         } else if ((cm->cm_data != NULL) && (cm->cm_length != 0)) {
 3255                 error = bus_dmamap_load(sc->buffer_dmat, cm->cm_dmamap,
 3256                     cm->cm_data, cm->cm_length, mpr_data_cb, cm, 0);
 3257         } else {
 3258                 /* Add a zero-length element as needed */
 3259                 if (cm->cm_sge != NULL)
 3260                         mpr_add_dmaseg(cm, 0, 0, 0, 1);
 3261                 mpr_enqueue_request(sc, cm);
 3262         }
 3263 
 3264         return (error);
 3265 }
 3266 
 3267 /*
 3268  * This is the routine to enqueue commands synchronously.  An error of
 3269  * EINPROGRESS from mpr_map_command() is ignored since the command will
 3270  * be executed and enqueued automatically.  Other errors come from msleep().
 3271  */
 3272 int
 3273 mpr_wait_command(struct mpr_softc *sc, struct mpr_command **cmp, int timeout,
 3274     int sleep_flag)
 3275 {
 3276         int error, rc;
 3277         struct timeval cur_time, start_time;
 3278         struct mpr_command *cm = *cmp;
 3279 
 3280         if (sc->mpr_flags & MPR_FLAGS_DIAGRESET) 
 3281                 return  EBUSY;
 3282 
 3283         cm->cm_complete = NULL;
 3284         cm->cm_flags |= (MPR_CM_FLAGS_WAKEUP + MPR_CM_FLAGS_POLLED);
 3285         error = mpr_map_command(sc, cm);
 3286         if ((error != 0) && (error != EINPROGRESS))
 3287                 return (error);
 3288 
 3289         // Check for context and wait for 50 mSec at a time until time has
 3290         // expired or the command has finished.  If msleep can't be used, need
 3291         // to poll.
 3292 #if __FreeBSD_version >= 1000029
 3293         if (curthread->td_no_sleeping)
 3294 #else //__FreeBSD_version < 1000029
 3295         if (curthread->td_pflags & TDP_NOSLEEPING)
 3296 #endif //__FreeBSD_version >= 1000029
 3297                 sleep_flag = NO_SLEEP;
 3298         getmicrouptime(&start_time);
 3299         if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP) {
 3300                 error = msleep(cm, &sc->mpr_mtx, 0, "mprwait", timeout*hz);
 3301                 if (error == EWOULDBLOCK) {
 3302                         /*
 3303                          * Record the actual elapsed time in the case of a
 3304                          * timeout for the message below.
 3305                          */
 3306                         getmicrouptime(&cur_time);
 3307                         timevalsub(&cur_time, &start_time);
 3308                 }
 3309         } else {
 3310                 while ((cm->cm_flags & MPR_CM_FLAGS_COMPLETE) == 0) {
 3311                         mpr_intr_locked(sc);
 3312                         if (sleep_flag == CAN_SLEEP)
 3313                                 pause("mprwait", hz/20);
 3314                         else
 3315                                 DELAY(50000);
 3316                 
 3317                         getmicrouptime(&cur_time);
 3318                         timevalsub(&cur_time, &start_time);
 3319                         if (cur_time.tv_sec > timeout) {
 3320                                 error = EWOULDBLOCK;
 3321                                 break;
 3322                         }
 3323                 }
 3324         }
 3325 
 3326         if (error == EWOULDBLOCK) {
 3327                 mpr_dprint(sc, MPR_FAULT, "Calling Reinit from %s, timeout=%d,"
 3328                     " elapsed=%jd\n", __func__, timeout,
 3329                     (intmax_t)cur_time.tv_sec);
 3330                 rc = mpr_reinit(sc);
 3331                 mpr_dprint(sc, MPR_FAULT, "Reinit %s\n", (rc == 0) ? "success" :
 3332                     "failed");
 3333                 if (sc->mpr_flags & MPR_FLAGS_REALLOCATED) {
 3334                         /*
 3335                          * Tell the caller that we freed the command in a
 3336                          * reinit.
 3337                          */
 3338                         *cmp = NULL;
 3339                 }
 3340                 error = ETIMEDOUT;
 3341         }
 3342         return (error);
 3343 }
 3344 
 3345 /*
 3346  * This is the routine to enqueue a command synchonously and poll for
 3347  * completion.  Its use should be rare.
 3348  */
 3349 int
 3350 mpr_request_polled(struct mpr_softc *sc, struct mpr_command **cmp)
 3351 {
 3352         int error, rc;
 3353         struct timeval cur_time, start_time;
 3354         struct mpr_command *cm = *cmp;
 3355 
 3356         error = 0;
 3357 
 3358         cm->cm_flags |= MPR_CM_FLAGS_POLLED;
 3359         cm->cm_complete = NULL;
 3360         mpr_map_command(sc, cm);
 3361 
 3362         getmicrouptime(&start_time);
 3363         while ((cm->cm_flags & MPR_CM_FLAGS_COMPLETE) == 0) {
 3364                 mpr_intr_locked(sc);
 3365 
 3366                 if (mtx_owned(&sc->mpr_mtx))
 3367                         msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0,
 3368                             "mprpoll", hz/20);
 3369                 else
 3370                         pause("mprpoll", hz/20);
 3371 
 3372                 /*
 3373                  * Check for real-time timeout and fail if more than 60 seconds.
 3374                  */
 3375                 getmicrouptime(&cur_time);
 3376                 timevalsub(&cur_time, &start_time);
 3377                 if (cur_time.tv_sec > 60) {
 3378                         mpr_dprint(sc, MPR_FAULT, "polling failed\n");
 3379                         error = ETIMEDOUT;
 3380                         break;
 3381                 }
 3382         }
 3383 
 3384         if (error) {
 3385                 mpr_dprint(sc, MPR_FAULT, "Calling Reinit from %s\n", __func__);
 3386                 rc = mpr_reinit(sc);
 3387                 mpr_dprint(sc, MPR_FAULT, "Reinit %s\n", (rc == 0) ? "success" :
 3388                     "failed");
 3389 
 3390                 if (sc->mpr_flags & MPR_FLAGS_REALLOCATED) {
 3391                         /*
 3392                          * Tell the caller that we freed the command in a
 3393                          * reinit.
 3394                          */
 3395                         *cmp = NULL;
 3396                 }
 3397         }
 3398         return (error);
 3399 }
 3400 
 3401 /*
 3402  * The MPT driver had a verbose interface for config pages.  In this driver,
 3403  * reduce it to much simpler terms, similar to the Linux driver.
 3404  */
 3405 int
 3406 mpr_read_config_page(struct mpr_softc *sc, struct mpr_config_params *params)
 3407 {
 3408         MPI2_CONFIG_REQUEST *req;
 3409         struct mpr_command *cm;
 3410         int error;
 3411 
 3412         if (sc->mpr_flags & MPR_FLAGS_BUSY) {
 3413                 return (EBUSY);
 3414         }
 3415 
 3416         cm = mpr_alloc_command(sc);
 3417         if (cm == NULL) {
 3418                 return (EBUSY);
 3419         }
 3420 
 3421         req = (MPI2_CONFIG_REQUEST *)cm->cm_req;
 3422         req->Function = MPI2_FUNCTION_CONFIG;
 3423         req->Action = params->action;
 3424         req->SGLFlags = 0;
 3425         req->ChainOffset = 0;
 3426         req->PageAddress = params->page_address;
 3427         if (params->hdr.Struct.PageType == MPI2_CONFIG_PAGETYPE_EXTENDED) {
 3428                 MPI2_CONFIG_EXTENDED_PAGE_HEADER *hdr;
 3429 
 3430                 hdr = &params->hdr.Ext;
 3431                 req->ExtPageType = hdr->ExtPageType;
 3432                 req->ExtPageLength = hdr->ExtPageLength;
 3433                 req->Header.PageType = MPI2_CONFIG_PAGETYPE_EXTENDED;
 3434                 req->Header.PageLength = 0; /* Must be set to zero */
 3435                 req->Header.PageNumber = hdr->PageNumber;
 3436                 req->Header.PageVersion = hdr->PageVersion;
 3437         } else {
 3438                 MPI2_CONFIG_PAGE_HEADER *hdr;
 3439 
 3440                 hdr = &params->hdr.Struct;
 3441                 req->Header.PageType = hdr->PageType;
 3442                 req->Header.PageNumber = hdr->PageNumber;
 3443                 req->Header.PageLength = hdr->PageLength;
 3444                 req->Header.PageVersion = hdr->PageVersion;
 3445         }
 3446 
 3447         cm->cm_data = params->buffer;
 3448         cm->cm_length = params->length;
 3449         if (cm->cm_data != NULL) {
 3450                 cm->cm_sge = &req->PageBufferSGE;
 3451                 cm->cm_sglsize = sizeof(MPI2_SGE_IO_UNION);
 3452                 cm->cm_flags = MPR_CM_FLAGS_SGE_SIMPLE | MPR_CM_FLAGS_DATAIN;
 3453         } else
 3454                 cm->cm_sge = NULL;
 3455         cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE;
 3456 
 3457         cm->cm_complete_data = params;
 3458         if (params->callback != NULL) {
 3459                 cm->cm_complete = mpr_config_complete;
 3460                 return (mpr_map_command(sc, cm));
 3461         } else {
 3462                 error = mpr_wait_command(sc, &cm, 0, CAN_SLEEP);
 3463                 if (error) {
 3464                         mpr_dprint(sc, MPR_FAULT,
 3465                             "Error %d reading config page\n", error);
 3466                         if (cm != NULL)
 3467                                 mpr_free_command(sc, cm);
 3468                         return (error);
 3469                 }
 3470                 mpr_config_complete(sc, cm);
 3471         }
 3472 
 3473         return (0);
 3474 }
 3475 
 3476 int
 3477 mpr_write_config_page(struct mpr_softc *sc, struct mpr_config_params *params)
 3478 {
 3479         return (EINVAL);
 3480 }
 3481 
 3482 static void
 3483 mpr_config_complete(struct mpr_softc *sc, struct mpr_command *cm)
 3484 {
 3485         MPI2_CONFIG_REPLY *reply;
 3486         struct mpr_config_params *params;
 3487 
 3488         MPR_FUNCTRACE(sc);
 3489         params = cm->cm_complete_data;
 3490 
 3491         if (cm->cm_data != NULL) {
 3492                 bus_dmamap_sync(sc->buffer_dmat, cm->cm_dmamap,
 3493                     BUS_DMASYNC_POSTREAD);
 3494                 bus_dmamap_unload(sc->buffer_dmat, cm->cm_dmamap);
 3495         }
 3496 
 3497         /*
 3498          * XXX KDM need to do more error recovery?  This results in the
 3499          * device in question not getting probed.
 3500          */
 3501         if ((cm->cm_flags & MPR_CM_FLAGS_ERROR_MASK) != 0) {
 3502                 params->status = MPI2_IOCSTATUS_BUSY;
 3503                 goto done;
 3504         }
 3505 
 3506         reply = (MPI2_CONFIG_REPLY *)cm->cm_reply;
 3507         if (reply == NULL) {
 3508                 params->status = MPI2_IOCSTATUS_BUSY;
 3509                 goto done;
 3510         }
 3511         params->status = reply->IOCStatus;
 3512         if (params->hdr.Struct.PageType == MPI2_CONFIG_PAGETYPE_EXTENDED) {
 3513                 params->hdr.Ext.ExtPageType = reply->ExtPageType;
 3514                 params->hdr.Ext.ExtPageLength = reply->ExtPageLength;
 3515                 params->hdr.Ext.PageType = reply->Header.PageType;
 3516                 params->hdr.Ext.PageNumber = reply->Header.PageNumber;
 3517                 params->hdr.Ext.PageVersion = reply->Header.PageVersion;
 3518         } else {
 3519                 params->hdr.Struct.PageType = reply->Header.PageType;
 3520                 params->hdr.Struct.PageNumber = reply->Header.PageNumber;
 3521                 params->hdr.Struct.PageLength = reply->Header.PageLength;
 3522                 params->hdr.Struct.PageVersion = reply->Header.PageVersion;
 3523         }
 3524 
 3525 done:
 3526         mpr_free_command(sc, cm);
 3527         if (params->callback != NULL)
 3528                 params->callback(sc, params);
 3529 
 3530         return;
 3531 }

Cache object: 33b1bd0f764633689229fe558a0af1db


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.