The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/cxgbe/t4_sge.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2011 Chelsio Communications, Inc.
    3  * All rights reserved.
    4  * Written by: Navdeep Parhar <np@FreeBSD.org>
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD: releng/8.4/sys/dev/cxgbe/t4_sge.c 247670 2013-03-02 21:59:07Z np $");
   30 
   31 #include "opt_inet.h"
   32 #include "opt_inet6.h"
   33 
   34 #include <sys/types.h>
   35 #include <sys/mbuf.h>
   36 #include <sys/socket.h>
   37 #include <sys/kernel.h>
   38 #include <sys/kdb.h>
   39 #include <sys/malloc.h>
   40 #include <sys/queue.h>
   41 #include <sys/taskqueue.h>
   42 #include <sys/sysctl.h>
   43 #include <sys/smp.h>
   44 #include <net/bpf.h>
   45 #include <net/ethernet.h>
   46 #include <net/if.h>
   47 #include <net/if_vlan_var.h>
   48 #include <netinet/in.h>
   49 #include <netinet/ip.h>
   50 #include <netinet/ip6.h>
   51 #include <netinet/tcp.h>
   52 
   53 #include "common/common.h"
   54 #include "common/t4_regs.h"
   55 #include "common/t4_regs_values.h"
   56 #include "common/t4_msg.h"
   57 
   58 struct fl_buf_info {
   59         int size;
   60         int type;
   61         uma_zone_t zone;
   62 };
   63 
   64 /* Filled up by t4_sge_modload */
   65 static struct fl_buf_info fl_buf_info[FL_BUF_SIZES];
   66 
   67 #define FL_BUF_SIZE(x)  (fl_buf_info[x].size)
   68 #define FL_BUF_TYPE(x)  (fl_buf_info[x].type)
   69 #define FL_BUF_ZONE(x)  (fl_buf_info[x].zone)
   70 
   71 /*
   72  * Ethernet frames are DMA'd at this byte offset into the freelist buffer.
   73  * 0-7 are valid values.
   74  */
   75 static int fl_pktshift = 2;
   76 TUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_pktshift);
   77 
   78 /*
   79  * Pad ethernet payload up to this boundary.
   80  * -1: driver should figure out a good value.
   81  *  Any power of 2, from 32 to 4096 (both inclusive) is a valid value.
   82  */
   83 static int fl_pad = -1;
   84 TUNABLE_INT("hw.cxgbe.fl_pad", &fl_pad);
   85 
   86 /*
   87  * Status page length.
   88  * -1: driver should figure out a good value.
   89  *  64 or 128 are the only other valid values.
   90  */
   91 static int spg_len = -1;
   92 TUNABLE_INT("hw.cxgbe.spg_len", &spg_len);
   93 
   94 /*
   95  * Congestion drops.
   96  * -1: no congestion feedback (not recommended).
   97  *  0: backpressure the channel instead of dropping packets right away.
   98  *  1: no backpressure, drop packets for the congested queue immediately.
   99  */
  100 static int cong_drop = 0;
  101 TUNABLE_INT("hw.cxgbe.cong_drop", &cong_drop);
  102 
  103 /* Used to track coalesced tx work request */
  104 struct txpkts {
  105         uint64_t *flitp;        /* ptr to flit where next pkt should start */
  106         uint8_t npkt;           /* # of packets in this work request */
  107         uint8_t nflits;         /* # of flits used by this work request */
  108         uint16_t plen;          /* total payload (sum of all packets) */
  109 };
  110 
  111 /* A packet's SGL.  This + m_pkthdr has all info needed for tx */
  112 struct sgl {
  113         int nsegs;              /* # of segments in the SGL, 0 means imm. tx */
  114         int nflits;             /* # of flits needed for the SGL */
  115         bus_dma_segment_t seg[TX_SGL_SEGS];
  116 };
  117 
  118 static int service_iq(struct sge_iq *, int);
  119 static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t,
  120     int *);
  121 static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *);
  122 static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int,
  123     int);
  124 static inline void init_fl(struct sge_fl *, int, int, char *);
  125 static inline void init_eq(struct sge_eq *, int, int, uint8_t, uint16_t,
  126     char *);
  127 static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *,
  128     bus_addr_t *, void **);
  129 static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
  130     void *);
  131 static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *,
  132     int, int);
  133 static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *);
  134 static int alloc_fwq(struct adapter *);
  135 static int free_fwq(struct adapter *);
  136 static int alloc_mgmtq(struct adapter *);
  137 static int free_mgmtq(struct adapter *);
  138 static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int,
  139     struct sysctl_oid *);
  140 static int free_rxq(struct port_info *, struct sge_rxq *);
  141 #ifdef TCP_OFFLOAD
  142 static int alloc_ofld_rxq(struct port_info *, struct sge_ofld_rxq *, int, int,
  143     struct sysctl_oid *);
  144 static int free_ofld_rxq(struct port_info *, struct sge_ofld_rxq *);
  145 #endif
  146 static int ctrl_eq_alloc(struct adapter *, struct sge_eq *);
  147 static int eth_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *);
  148 #ifdef TCP_OFFLOAD
  149 static int ofld_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *);
  150 #endif
  151 static int alloc_eq(struct adapter *, struct port_info *, struct sge_eq *);
  152 static int free_eq(struct adapter *, struct sge_eq *);
  153 static int alloc_wrq(struct adapter *, struct port_info *, struct sge_wrq *,
  154     struct sysctl_oid *);
  155 static int free_wrq(struct adapter *, struct sge_wrq *);
  156 static int alloc_txq(struct port_info *, struct sge_txq *, int,
  157     struct sysctl_oid *);
  158 static int free_txq(struct port_info *, struct sge_txq *);
  159 static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int);
  160 static inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **);
  161 static inline void iq_next(struct sge_iq *);
  162 static inline void ring_fl_db(struct adapter *, struct sge_fl *);
  163 static int refill_fl(struct adapter *, struct sge_fl *, int);
  164 static void refill_sfl(void *);
  165 static int alloc_fl_sdesc(struct sge_fl *);
  166 static void free_fl_sdesc(struct sge_fl *);
  167 static void set_fl_tag_idx(struct sge_fl *, int);
  168 static void add_fl_to_sfl(struct adapter *, struct sge_fl *);
  169 
  170 static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int);
  171 static int free_pkt_sgl(struct sge_txq *, struct sgl *);
  172 static int write_txpkt_wr(struct port_info *, struct sge_txq *, struct mbuf *,
  173     struct sgl *);
  174 static int add_to_txpkts(struct port_info *, struct sge_txq *, struct txpkts *,
  175     struct mbuf *, struct sgl *);
  176 static void write_txpkts_wr(struct sge_txq *, struct txpkts *);
  177 static inline void write_ulp_cpl_sgl(struct port_info *, struct sge_txq *,
  178     struct txpkts *, struct mbuf *, struct sgl *);
  179 static int write_sgl_to_txd(struct sge_eq *, struct sgl *, caddr_t *);
  180 static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int);
  181 static inline void ring_eq_db(struct adapter *, struct sge_eq *);
  182 static inline int reclaimable(struct sge_eq *);
  183 static int reclaim_tx_descs(struct sge_txq *, int, int);
  184 static void write_eqflush_wr(struct sge_eq *);
  185 static __be64 get_flit(bus_dma_segment_t *, int, int);
  186 static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *,
  187     struct mbuf *);
  188 static int handle_fw_msg(struct sge_iq *, const struct rss_header *,
  189     struct mbuf *);
  190 
  191 static int sysctl_uint16(SYSCTL_HANDLER_ARGS);
  192 
  193 #if defined(__i386__) || defined(__amd64__)
  194 extern u_int cpu_clflush_line_size;
  195 #endif
  196 
  197 /*
  198  * Called on MOD_LOAD.  Fills up fl_buf_info[] and validates/calculates the SGE
  199  * tunables.
  200  */
  201 void
  202 t4_sge_modload(void)
  203 {
  204         int i;
  205         int bufsize[FL_BUF_SIZES] = {
  206                 MCLBYTES,
  207 #if MJUMPAGESIZE != MCLBYTES
  208                 MJUMPAGESIZE,
  209 #endif
  210                 MJUM9BYTES,
  211                 MJUM16BYTES
  212         };
  213 
  214         for (i = 0; i < FL_BUF_SIZES; i++) {
  215                 FL_BUF_SIZE(i) = bufsize[i];
  216                 FL_BUF_TYPE(i) = m_gettype(bufsize[i]);
  217                 FL_BUF_ZONE(i) = m_getzone(bufsize[i]);
  218         }
  219 
  220         if (fl_pktshift < 0 || fl_pktshift > 7) {
  221                 printf("Invalid hw.cxgbe.fl_pktshift value (%d),"
  222                     " using 2 instead.\n", fl_pktshift);
  223                 fl_pktshift = 2;
  224         }
  225 
  226         if (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad)) {
  227                 int pad;
  228 
  229 #if defined(__i386__) || defined(__amd64__)
  230                 pad = max(cpu_clflush_line_size, 32);
  231 #else
  232                 pad = max(CACHE_LINE_SIZE, 32);
  233 #endif
  234                 pad = min(pad, 4096);
  235 
  236                 if (fl_pad != -1) {
  237                         printf("Invalid hw.cxgbe.fl_pad value (%d),"
  238                             " using %d instead.\n", fl_pad, pad);
  239                 }
  240                 fl_pad = pad;
  241         }
  242 
  243         if (spg_len != 64 && spg_len != 128) {
  244                 int len;
  245 
  246 #if defined(__i386__) || defined(__amd64__)
  247                 len = cpu_clflush_line_size > 64 ? 128 : 64;
  248 #else
  249                 len = 64;
  250 #endif
  251                 if (spg_len != -1) {
  252                         printf("Invalid hw.cxgbe.spg_len value (%d),"
  253                             " using %d instead.\n", spg_len, len);
  254                 }
  255                 spg_len = len;
  256         }
  257 
  258         if (cong_drop < -1 || cong_drop > 1) {
  259                 printf("Invalid hw.cxgbe.cong_drop value (%d),"
  260                     " using 0 instead.\n", cong_drop);
  261                 cong_drop = 0;
  262         }
  263 }
  264 
  265 /**
  266  *      t4_sge_init - initialize SGE
  267  *      @sc: the adapter
  268  *
  269  *      Performs SGE initialization needed every time after a chip reset.
  270  *      We do not initialize any of the queues here, instead the driver
  271  *      top-level must request them individually.
  272  */
  273 int
  274 t4_sge_init(struct adapter *sc)
  275 {
  276         struct sge *s = &sc->sge;
  277         int i, rc = 0;
  278         uint32_t ctrl_mask, ctrl_val, hpsize, v;
  279 
  280         ctrl_mask = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE |
  281             V_INGPADBOUNDARY(M_INGPADBOUNDARY) |
  282             F_EGRSTATUSPAGESIZE;
  283         ctrl_val = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
  284             V_INGPADBOUNDARY(ilog2(fl_pad) - 5) |
  285             V_EGRSTATUSPAGESIZE(spg_len == 128);
  286 
  287         hpsize = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
  288             V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
  289             V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) |
  290             V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) |
  291             V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) |
  292             V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) |
  293             V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) |
  294             V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10);
  295 
  296         if (sc->flags & MASTER_PF) {
  297                 int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200};
  298                 int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */
  299 
  300                 t4_set_reg_field(sc, A_SGE_CONTROL, ctrl_mask, ctrl_val);
  301                 t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, hpsize);
  302                 for (i = 0; i < FL_BUF_SIZES; i++) {
  303                         t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i),
  304                             FL_BUF_SIZE(i));
  305                 }
  306 
  307                 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD,
  308                     V_THRESHOLD_0(intr_pktcount[0]) |
  309                     V_THRESHOLD_1(intr_pktcount[1]) |
  310                     V_THRESHOLD_2(intr_pktcount[2]) |
  311                     V_THRESHOLD_3(intr_pktcount[3]));
  312 
  313                 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1,
  314                     V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) |
  315                     V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1])));
  316                 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3,
  317                     V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) |
  318                     V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3])));
  319                 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5,
  320                     V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) |
  321                     V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5])));
  322 
  323                 if (cong_drop == 0) {
  324                         t4_set_reg_field(sc, A_TP_PARA_REG3, F_TUNNELCNGDROP0 |
  325                             F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 |
  326                             F_TUNNELCNGDROP3, 0);
  327                 }
  328         }
  329 
  330         v = t4_read_reg(sc, A_SGE_CONTROL);
  331         if ((v & ctrl_mask) != ctrl_val) {
  332                 device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", v);
  333                 rc = EINVAL;
  334         }
  335 
  336         v = t4_read_reg(sc, A_SGE_HOST_PAGE_SIZE);
  337         if (v != hpsize) {
  338                 device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", v);
  339                 rc = EINVAL;
  340         }
  341 
  342         for (i = 0; i < FL_BUF_SIZES; i++) {
  343                 v = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i));
  344                 if (v != FL_BUF_SIZE(i)) {
  345                         device_printf(sc->dev,
  346                             "invalid SGE_FL_BUFFER_SIZE[%d](0x%x)\n", i, v);
  347                         rc = EINVAL;
  348                 }
  349         }
  350 
  351         v = t4_read_reg(sc, A_SGE_CONM_CTRL);
  352         s->fl_starve_threshold = G_EGRTHRESHOLD(v) * 2 + 1;
  353 
  354         v = t4_read_reg(sc, A_SGE_INGRESS_RX_THRESHOLD);
  355         sc->sge.counter_val[0] = G_THRESHOLD_0(v);
  356         sc->sge.counter_val[1] = G_THRESHOLD_1(v);
  357         sc->sge.counter_val[2] = G_THRESHOLD_2(v);
  358         sc->sge.counter_val[3] = G_THRESHOLD_3(v);
  359 
  360         v = t4_read_reg(sc, A_SGE_TIMER_VALUE_0_AND_1);
  361         sc->sge.timer_val[0] = G_TIMERVALUE0(v) / core_ticks_per_usec(sc);
  362         sc->sge.timer_val[1] = G_TIMERVALUE1(v) / core_ticks_per_usec(sc);
  363         v = t4_read_reg(sc, A_SGE_TIMER_VALUE_2_AND_3);
  364         sc->sge.timer_val[2] = G_TIMERVALUE2(v) / core_ticks_per_usec(sc);
  365         sc->sge.timer_val[3] = G_TIMERVALUE3(v) / core_ticks_per_usec(sc);
  366         v = t4_read_reg(sc, A_SGE_TIMER_VALUE_4_AND_5);
  367         sc->sge.timer_val[4] = G_TIMERVALUE4(v) / core_ticks_per_usec(sc);
  368         sc->sge.timer_val[5] = G_TIMERVALUE5(v) / core_ticks_per_usec(sc);
  369 
  370         t4_register_cpl_handler(sc, CPL_FW4_MSG, handle_fw_msg);
  371         t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_msg);
  372         t4_register_cpl_handler(sc, CPL_SGE_EGR_UPDATE, handle_sge_egr_update);
  373         t4_register_cpl_handler(sc, CPL_RX_PKT, t4_eth_rx);
  374 
  375         t4_register_fw_msg_handler(sc, FW6_TYPE_CMD_RPL, t4_handle_fw_rpl);
  376 
  377         return (rc);
  378 }
  379 
  380 int
  381 t4_create_dma_tag(struct adapter *sc)
  382 {
  383         int rc;
  384 
  385         rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0,
  386             BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
  387             BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL,
  388             NULL, &sc->dmat);
  389         if (rc != 0) {
  390                 device_printf(sc->dev,
  391                     "failed to create main DMA tag: %d\n", rc);
  392         }
  393 
  394         return (rc);
  395 }
  396 
  397 int
  398 t4_destroy_dma_tag(struct adapter *sc)
  399 {
  400         if (sc->dmat)
  401                 bus_dma_tag_destroy(sc->dmat);
  402 
  403         return (0);
  404 }
  405 
  406 /*
  407  * Allocate and initialize the firmware event queue and the management queue.
  408  *
  409  * Returns errno on failure.  Resources allocated up to that point may still be
  410  * allocated.  Caller is responsible for cleanup in case this function fails.
  411  */
  412 int
  413 t4_setup_adapter_queues(struct adapter *sc)
  414 {
  415         int rc;
  416 
  417         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
  418 
  419         sysctl_ctx_init(&sc->ctx);
  420         sc->flags |= ADAP_SYSCTL_CTX;
  421 
  422         /*
  423          * Firmware event queue
  424          */
  425         rc = alloc_fwq(sc);
  426         if (rc != 0)
  427                 return (rc);
  428 
  429         /*
  430          * Management queue.  This is just a control queue that uses the fwq as
  431          * its associated iq.
  432          */
  433         rc = alloc_mgmtq(sc);
  434 
  435         return (rc);
  436 }
  437 
  438 /*
  439  * Idempotent
  440  */
  441 int
  442 t4_teardown_adapter_queues(struct adapter *sc)
  443 {
  444 
  445         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
  446 
  447         /* Do this before freeing the queue */
  448         if (sc->flags & ADAP_SYSCTL_CTX) {
  449                 sysctl_ctx_free(&sc->ctx);
  450                 sc->flags &= ~ADAP_SYSCTL_CTX;
  451         }
  452 
  453         free_mgmtq(sc);
  454         free_fwq(sc);
  455 
  456         return (0);
  457 }
  458 
  459 static inline int
  460 first_vector(struct port_info *pi)
  461 {
  462         struct adapter *sc = pi->adapter;
  463         int rc = T4_EXTRA_INTR, i;
  464 
  465         if (sc->intr_count == 1)
  466                 return (0);
  467 
  468         for_each_port(sc, i) {
  469                 struct port_info *p = sc->port[i];
  470 
  471                 if (i == pi->port_id)
  472                         break;
  473 
  474 #ifdef TCP_OFFLOAD
  475                 if (sc->flags & INTR_DIRECT)
  476                         rc += p->nrxq + p->nofldrxq;
  477                 else
  478                         rc += max(p->nrxq, p->nofldrxq);
  479 #else
  480                 /*
  481                  * Not compiled with offload support and intr_count > 1.  Only
  482                  * NIC queues exist and they'd better be taking direct
  483                  * interrupts.
  484                  */
  485                 KASSERT(sc->flags & INTR_DIRECT,
  486                     ("%s: intr_count %d, !INTR_DIRECT", __func__,
  487                     sc->intr_count));
  488 
  489                 rc += p->nrxq;
  490 #endif
  491         }
  492 
  493         return (rc);
  494 }
  495 
  496 /*
  497  * Given an arbitrary "index," come up with an iq that can be used by other
  498  * queues (of this port) for interrupt forwarding, SGE egress updates, etc.
  499  * The iq returned is guaranteed to be something that takes direct interrupts.
  500  */
  501 static struct sge_iq *
  502 port_intr_iq(struct port_info *pi, int idx)
  503 {
  504         struct adapter *sc = pi->adapter;
  505         struct sge *s = &sc->sge;
  506         struct sge_iq *iq = NULL;
  507 
  508         if (sc->intr_count == 1)
  509                 return (&sc->sge.fwq);
  510 
  511 #ifdef TCP_OFFLOAD
  512         if (sc->flags & INTR_DIRECT) {
  513                 idx %= pi->nrxq + pi->nofldrxq;
  514                 
  515                 if (idx >= pi->nrxq) {
  516                         idx -= pi->nrxq;
  517                         iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq;
  518                 } else
  519                         iq = &s->rxq[pi->first_rxq + idx].iq;
  520 
  521         } else {
  522                 idx %= max(pi->nrxq, pi->nofldrxq);
  523 
  524                 if (pi->nrxq >= pi->nofldrxq)
  525                         iq = &s->rxq[pi->first_rxq + idx].iq;
  526                 else
  527                         iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq;
  528         }
  529 #else
  530         /*
  531          * Not compiled with offload support and intr_count > 1.  Only NIC
  532          * queues exist and they'd better be taking direct interrupts.
  533          */
  534         KASSERT(sc->flags & INTR_DIRECT,
  535             ("%s: intr_count %d, !INTR_DIRECT", __func__, sc->intr_count));
  536 
  537         idx %= pi->nrxq;
  538         iq = &s->rxq[pi->first_rxq + idx].iq;
  539 #endif
  540 
  541         KASSERT(iq->flags & IQ_INTR, ("%s: EDOOFUS", __func__));
  542         return (iq);
  543 }
  544 
  545 static inline int
  546 mtu_to_bufsize(int mtu)
  547 {
  548         int bufsize;
  549 
  550         /* large enough for a frame even when VLAN extraction is disabled */
  551         bufsize = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + mtu;
  552         bufsize = roundup(bufsize + fl_pktshift, fl_pad);
  553 
  554         return (bufsize);
  555 }
  556 
  557 int
  558 t4_setup_port_queues(struct port_info *pi)
  559 {
  560         int rc = 0, i, j, intr_idx, iqid;
  561         struct sge_rxq *rxq;
  562         struct sge_txq *txq;
  563         struct sge_wrq *ctrlq;
  564 #ifdef TCP_OFFLOAD
  565         struct sge_ofld_rxq *ofld_rxq;
  566         struct sge_wrq *ofld_txq;
  567         struct sysctl_oid *oid2 = NULL;
  568 #endif
  569         char name[16];
  570         struct adapter *sc = pi->adapter;
  571         struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev);
  572         struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
  573         int bufsize = mtu_to_bufsize(pi->ifp->if_mtu);
  574 
  575         oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD,
  576             NULL, "rx queues");
  577 
  578 #ifdef TCP_OFFLOAD
  579         if (is_offload(sc)) {
  580                 oid2 = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_rxq",
  581                     CTLFLAG_RD, NULL,
  582                     "rx queues for offloaded TCP connections");
  583         }
  584 #endif
  585 
  586         /* Interrupt vector to start from (when using multiple vectors) */
  587         intr_idx = first_vector(pi);
  588 
  589         /*
  590          * First pass over all rx queues (NIC and TOE):
  591          * a) initialize iq and fl
  592          * b) allocate queue iff it will take direct interrupts.
  593          */
  594         for_each_rxq(pi, i, rxq) {
  595 
  596                 init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, pi->qsize_rxq,
  597                     RX_IQ_ESIZE);
  598 
  599                 snprintf(name, sizeof(name), "%s rxq%d-fl",
  600                     device_get_nameunit(pi->dev), i);
  601                 init_fl(&rxq->fl, pi->qsize_rxq / 8, bufsize, name);
  602 
  603                 if (sc->flags & INTR_DIRECT
  604 #ifdef TCP_OFFLOAD
  605                     || (sc->intr_count > 1 && pi->nrxq >= pi->nofldrxq)
  606 #endif
  607                    ) {
  608                         rxq->iq.flags |= IQ_INTR;
  609                         rc = alloc_rxq(pi, rxq, intr_idx, i, oid);
  610                         if (rc != 0)
  611                                 goto done;
  612                         intr_idx++;
  613                 }
  614         }
  615 
  616 #ifdef TCP_OFFLOAD
  617         for_each_ofld_rxq(pi, i, ofld_rxq) {
  618 
  619                 init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx,
  620                     pi->qsize_rxq, RX_IQ_ESIZE);
  621 
  622                 snprintf(name, sizeof(name), "%s ofld_rxq%d-fl",
  623                     device_get_nameunit(pi->dev), i);
  624                 init_fl(&ofld_rxq->fl, pi->qsize_rxq / 8, OFLD_BUF_SIZE, name);
  625 
  626                 if (sc->flags & INTR_DIRECT ||
  627                     (sc->intr_count > 1 && pi->nofldrxq > pi->nrxq)) {
  628                         ofld_rxq->iq.flags |= IQ_INTR;
  629                         rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2);
  630                         if (rc != 0)
  631                                 goto done;
  632                         intr_idx++;
  633                 }
  634         }
  635 #endif
  636 
  637         /*
  638          * Second pass over all rx queues (NIC and TOE).  The queues forwarding
  639          * their interrupts are allocated now.
  640          */
  641         j = 0;
  642         for_each_rxq(pi, i, rxq) {
  643                 if (rxq->iq.flags & IQ_INTR)
  644                         continue;
  645 
  646                 intr_idx = port_intr_iq(pi, j)->abs_id;
  647 
  648                 rc = alloc_rxq(pi, rxq, intr_idx, i, oid);
  649                 if (rc != 0)
  650                         goto done;
  651                 j++;
  652         }
  653 
  654 #ifdef TCP_OFFLOAD
  655         for_each_ofld_rxq(pi, i, ofld_rxq) {
  656                 if (ofld_rxq->iq.flags & IQ_INTR)
  657                         continue;
  658 
  659                 intr_idx = port_intr_iq(pi, j)->abs_id;
  660 
  661                 rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2);
  662                 if (rc != 0)
  663                         goto done;
  664                 j++;
  665         }
  666 #endif
  667 
  668         /*
  669          * Now the tx queues.  Only one pass needed.
  670          */
  671         oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD,
  672             NULL, "tx queues");
  673         j = 0;
  674         for_each_txq(pi, i, txq) {
  675                 uint16_t iqid;
  676 
  677                 iqid = port_intr_iq(pi, j)->cntxt_id;
  678 
  679                 snprintf(name, sizeof(name), "%s txq%d",
  680                     device_get_nameunit(pi->dev), i);
  681                 init_eq(&txq->eq, EQ_ETH, pi->qsize_txq, pi->tx_chan, iqid,
  682                     name);
  683 
  684                 rc = alloc_txq(pi, txq, i, oid);
  685                 if (rc != 0)
  686                         goto done;
  687                 j++;
  688         }
  689 
  690 #ifdef TCP_OFFLOAD
  691         oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_txq",
  692             CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections");
  693         for_each_ofld_txq(pi, i, ofld_txq) {
  694                 uint16_t iqid;
  695 
  696                 iqid = port_intr_iq(pi, j)->cntxt_id;
  697 
  698                 snprintf(name, sizeof(name), "%s ofld_txq%d",
  699                     device_get_nameunit(pi->dev), i);
  700                 init_eq(&ofld_txq->eq, EQ_OFLD, pi->qsize_txq, pi->tx_chan,
  701                     iqid, name);
  702 
  703                 snprintf(name, sizeof(name), "%d", i);
  704                 oid2 = SYSCTL_ADD_NODE(&pi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
  705                     name, CTLFLAG_RD, NULL, "offload tx queue");
  706 
  707                 rc = alloc_wrq(sc, pi, ofld_txq, oid2);
  708                 if (rc != 0)
  709                         goto done;
  710                 j++;
  711         }
  712 #endif
  713 
  714         /*
  715          * Finally, the control queue.
  716          */
  717         oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD,
  718             NULL, "ctrl queue");
  719         ctrlq = &sc->sge.ctrlq[pi->port_id];
  720         iqid = port_intr_iq(pi, 0)->cntxt_id;
  721         snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(pi->dev));
  722         init_eq(&ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid, name);
  723         rc = alloc_wrq(sc, pi, ctrlq, oid);
  724 
  725 done:
  726         if (rc)
  727                 t4_teardown_port_queues(pi);
  728 
  729         return (rc);
  730 }
  731 
  732 /*
  733  * Idempotent
  734  */
  735 int
  736 t4_teardown_port_queues(struct port_info *pi)
  737 {
  738         int i;
  739         struct adapter *sc = pi->adapter;
  740         struct sge_rxq *rxq;
  741         struct sge_txq *txq;
  742 #ifdef TCP_OFFLOAD
  743         struct sge_ofld_rxq *ofld_rxq;
  744         struct sge_wrq *ofld_txq;
  745 #endif
  746 
  747         /* Do this before freeing the queues */
  748         if (pi->flags & PORT_SYSCTL_CTX) {
  749                 sysctl_ctx_free(&pi->ctx);
  750                 pi->flags &= ~PORT_SYSCTL_CTX;
  751         }
  752 
  753         /*
  754          * Take down all the tx queues first, as they reference the rx queues
  755          * (for egress updates, etc.).
  756          */
  757 
  758         free_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
  759 
  760         for_each_txq(pi, i, txq) {
  761                 free_txq(pi, txq);
  762         }
  763 
  764 #ifdef TCP_OFFLOAD
  765         for_each_ofld_txq(pi, i, ofld_txq) {
  766                 free_wrq(sc, ofld_txq);
  767         }
  768 #endif
  769 
  770         /*
  771          * Then take down the rx queues that forward their interrupts, as they
  772          * reference other rx queues.
  773          */
  774 
  775         for_each_rxq(pi, i, rxq) {
  776                 if ((rxq->iq.flags & IQ_INTR) == 0)
  777                         free_rxq(pi, rxq);
  778         }
  779 
  780 #ifdef TCP_OFFLOAD
  781         for_each_ofld_rxq(pi, i, ofld_rxq) {
  782                 if ((ofld_rxq->iq.flags & IQ_INTR) == 0)
  783                         free_ofld_rxq(pi, ofld_rxq);
  784         }
  785 #endif
  786 
  787         /*
  788          * Then take down the rx queues that take direct interrupts.
  789          */
  790 
  791         for_each_rxq(pi, i, rxq) {
  792                 if (rxq->iq.flags & IQ_INTR)
  793                         free_rxq(pi, rxq);
  794         }
  795 
  796 #ifdef TCP_OFFLOAD
  797         for_each_ofld_rxq(pi, i, ofld_rxq) {
  798                 if (ofld_rxq->iq.flags & IQ_INTR)
  799                         free_ofld_rxq(pi, ofld_rxq);
  800         }
  801 #endif
  802 
  803         return (0);
  804 }
  805 
  806 /*
  807  * Deals with errors and the firmware event queue.  All data rx queues forward
  808  * their interrupt to the firmware event queue.
  809  */
  810 void
  811 t4_intr_all(void *arg)
  812 {
  813         struct adapter *sc = arg;
  814         struct sge_iq *fwq = &sc->sge.fwq;
  815 
  816         t4_intr_err(arg);
  817         if (atomic_cmpset_int(&fwq->state, IQS_IDLE, IQS_BUSY)) {
  818                 service_iq(fwq, 0);
  819                 atomic_cmpset_int(&fwq->state, IQS_BUSY, IQS_IDLE);
  820         }
  821 }
  822 
  823 /* Deals with error interrupts */
  824 void
  825 t4_intr_err(void *arg)
  826 {
  827         struct adapter *sc = arg;
  828 
  829         t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0);
  830         t4_slow_intr_handler(sc);
  831 }
  832 
  833 void
  834 t4_intr_evt(void *arg)
  835 {
  836         struct sge_iq *iq = arg;
  837 
  838         if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
  839                 service_iq(iq, 0);
  840                 atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
  841         }
  842 }
  843 
  844 void
  845 t4_intr(void *arg)
  846 {
  847         struct sge_iq *iq = arg;
  848 
  849         if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
  850                 service_iq(iq, 0);
  851                 atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
  852         }
  853 }
  854 
  855 /*
  856  * Deals with anything and everything on the given ingress queue.
  857  */
  858 static int
  859 service_iq(struct sge_iq *iq, int budget)
  860 {
  861         struct sge_iq *q;
  862         struct sge_rxq *rxq = iq_to_rxq(iq);    /* Use iff iq is part of rxq */
  863         struct sge_fl *fl = &rxq->fl;           /* Use iff IQ_HAS_FL */
  864         struct adapter *sc = iq->adapter;
  865         struct rsp_ctrl *ctrl;
  866         const struct rss_header *rss;
  867         int ndescs = 0, limit, fl_bufs_used = 0;
  868         int rsp_type;
  869         uint32_t lq;
  870         struct mbuf *m0;
  871         STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql);
  872 
  873         limit = budget ? budget : iq->qsize / 8;
  874 
  875         KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq));
  876 
  877         /*
  878          * We always come back and check the descriptor ring for new indirect
  879          * interrupts and other responses after running a single handler.
  880          */
  881         for (;;) {
  882                 while (is_new_response(iq, &ctrl)) {
  883 
  884                         rmb();
  885 
  886                         m0 = NULL;
  887                         rsp_type = G_RSPD_TYPE(ctrl->u.type_gen);
  888                         lq = be32toh(ctrl->pldbuflen_qid);
  889                         rss = (const void *)iq->cdesc;
  890 
  891                         switch (rsp_type) {
  892                         case X_RSPD_TYPE_FLBUF:
  893 
  894                                 KASSERT(iq->flags & IQ_HAS_FL,
  895                                     ("%s: data for an iq (%p) with no freelist",
  896                                     __func__, iq));
  897 
  898                                 m0 = get_fl_payload(sc, fl, lq, &fl_bufs_used);
  899 #ifdef T4_PKT_TIMESTAMP
  900                                 /*
  901                                  * 60 bit timestamp for the payload is
  902                                  * *(uint64_t *)m0->m_pktdat.  Note that it is
  903                                  * in the leading free-space in the mbuf.  The
  904                                  * kernel can clobber it during a pullup,
  905                                  * m_copymdata, etc.  You need to make sure that
  906                                  * the mbuf reaches you unmolested if you care
  907                                  * about the timestamp.
  908                                  */
  909                                 *(uint64_t *)m0->m_pktdat =
  910                                     be64toh(ctrl->u.last_flit) &
  911                                     0xfffffffffffffff;
  912 #endif
  913 
  914                                 /* fall through */
  915 
  916                         case X_RSPD_TYPE_CPL:
  917                                 KASSERT(rss->opcode < NUM_CPL_CMDS,
  918                                     ("%s: bad opcode %02x.", __func__,
  919                                     rss->opcode));
  920                                 sc->cpl_handler[rss->opcode](iq, rss, m0);
  921                                 break;
  922 
  923                         case X_RSPD_TYPE_INTR:
  924 
  925                                 /*
  926                                  * Interrupts should be forwarded only to queues
  927                                  * that are not forwarding their interrupts.
  928                                  * This means service_iq can recurse but only 1
  929                                  * level deep.
  930                                  */
  931                                 KASSERT(budget == 0,
  932                                     ("%s: budget %u, rsp_type %u", __func__,
  933                                     budget, rsp_type));
  934 
  935                                 q = sc->sge.iqmap[lq - sc->sge.iq_start];
  936                                 if (atomic_cmpset_int(&q->state, IQS_IDLE,
  937                                     IQS_BUSY)) {
  938                                         if (service_iq(q, q->qsize / 8) == 0) {
  939                                                 atomic_cmpset_int(&q->state,
  940                                                     IQS_BUSY, IQS_IDLE);
  941                                         } else {
  942                                                 STAILQ_INSERT_TAIL(&iql, q,
  943                                                     link);
  944                                         }
  945                                 }
  946                                 break;
  947 
  948                         default:
  949                                 sc->an_handler(iq, ctrl);
  950                                 break;
  951                         }
  952 
  953                         iq_next(iq);
  954                         if (++ndescs == limit) {
  955                                 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
  956                                     V_CIDXINC(ndescs) |
  957                                     V_INGRESSQID(iq->cntxt_id) |
  958                                     V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
  959                                 ndescs = 0;
  960 
  961                                 if (fl_bufs_used > 0) {
  962                                         FL_LOCK(fl);
  963                                         fl->needed += fl_bufs_used;
  964                                         refill_fl(sc, fl, fl->cap / 8);
  965                                         FL_UNLOCK(fl);
  966                                         fl_bufs_used = 0;
  967                                 }
  968 
  969                                 if (budget)
  970                                         return (EINPROGRESS);
  971                         }
  972                 }
  973 
  974                 if (STAILQ_EMPTY(&iql))
  975                         break;
  976 
  977                 /*
  978                  * Process the head only, and send it to the back of the list if
  979                  * it's still not done.
  980                  */
  981                 q = STAILQ_FIRST(&iql);
  982                 STAILQ_REMOVE_HEAD(&iql, link);
  983                 if (service_iq(q, q->qsize / 8) == 0)
  984                         atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE);
  985                 else
  986                         STAILQ_INSERT_TAIL(&iql, q, link);
  987         }
  988 
  989 #if defined(INET) || defined(INET6)
  990         if (iq->flags & IQ_LRO_ENABLED) {
  991                 struct lro_ctrl *lro = &rxq->lro;
  992                 struct lro_entry *l;
  993 
  994                 while (!SLIST_EMPTY(&lro->lro_active)) {
  995                         l = SLIST_FIRST(&lro->lro_active);
  996                         SLIST_REMOVE_HEAD(&lro->lro_active, next);
  997                         tcp_lro_flush(lro, l);
  998                 }
  999         }
 1000 #endif
 1001 
 1002         t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) |
 1003             V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params));
 1004 
 1005         if (iq->flags & IQ_HAS_FL) {
 1006                 int starved;
 1007 
 1008                 FL_LOCK(fl);
 1009                 fl->needed += fl_bufs_used;
 1010                 starved = refill_fl(sc, fl, fl->cap / 4);
 1011                 FL_UNLOCK(fl);
 1012                 if (__predict_false(starved != 0))
 1013                         add_fl_to_sfl(sc, fl);
 1014         }
 1015 
 1016         return (0);
 1017 }
 1018 
 1019 static struct mbuf *
 1020 get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf,
 1021     int *fl_bufs_used)
 1022 {
 1023         struct mbuf *m0, *m;
 1024         struct fl_sdesc *sd = &fl->sdesc[fl->cidx];
 1025         unsigned int nbuf, len;
 1026 
 1027         /*
 1028          * No assertion for the fl lock because we don't need it.  This routine
 1029          * is called only from the rx interrupt handler and it only updates
 1030          * fl->cidx.  (Contrast that with fl->pidx/fl->needed which could be
 1031          * updated in the rx interrupt handler or the starvation helper routine.
 1032          * That's why code that manipulates fl->pidx/fl->needed needs the fl
 1033          * lock but this routine does not).
 1034          */
 1035 
 1036         if (__predict_false((len_newbuf & F_RSPD_NEWBUF) == 0))
 1037                 panic("%s: cannot handle packed frames", __func__);
 1038         len = G_RSPD_LEN(len_newbuf);
 1039 
 1040         m0 = sd->m;
 1041         sd->m = NULL;   /* consumed */
 1042 
 1043         bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD);
 1044         m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR);
 1045 #ifdef T4_PKT_TIMESTAMP
 1046         /* Leave room for a timestamp */
 1047         m0->m_data += 8;
 1048 #endif
 1049 
 1050         if (len < RX_COPY_THRESHOLD) {
 1051                 /* copy data to mbuf, buffer will be recycled */
 1052                 bcopy(sd->cl, mtod(m0, caddr_t), len);
 1053                 m0->m_len = len;
 1054         } else {
 1055                 bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
 1056                 m_cljset(m0, sd->cl, FL_BUF_TYPE(sd->tag_idx));
 1057                 sd->cl = NULL;  /* consumed */
 1058                 m0->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
 1059         }
 1060         m0->m_pkthdr.len = len;
 1061 
 1062         sd++;
 1063         if (__predict_false(++fl->cidx == fl->cap)) {
 1064                 sd = fl->sdesc;
 1065                 fl->cidx = 0;
 1066         }
 1067 
 1068         m = m0;
 1069         len -= m->m_len;
 1070         nbuf = 1;       /* # of fl buffers used */
 1071 
 1072         while (len > 0) {
 1073                 m->m_next = sd->m;
 1074                 sd->m = NULL;   /* consumed */
 1075                 m = m->m_next;
 1076 
 1077                 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map,
 1078                     BUS_DMASYNC_POSTREAD);
 1079 
 1080                 m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0);
 1081                 if (len <= MLEN) {
 1082                         bcopy(sd->cl, mtod(m, caddr_t), len);
 1083                         m->m_len = len;
 1084                 } else {
 1085                         bus_dmamap_unload(fl->tag[sd->tag_idx],
 1086                             sd->map);
 1087                         m_cljset(m, sd->cl, FL_BUF_TYPE(sd->tag_idx));
 1088                         sd->cl = NULL;  /* consumed */
 1089                         m->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
 1090                 }
 1091 
 1092                 sd++;
 1093                 if (__predict_false(++fl->cidx == fl->cap)) {
 1094                         sd = fl->sdesc;
 1095                         fl->cidx = 0;
 1096                 }
 1097 
 1098                 len -= m->m_len;
 1099                 nbuf++;
 1100         }
 1101 
 1102         (*fl_bufs_used) += nbuf;
 1103 
 1104         return (m0);
 1105 }
 1106 
 1107 static int
 1108 t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0)
 1109 {
 1110         struct sge_rxq *rxq = iq_to_rxq(iq);
 1111         struct ifnet *ifp = rxq->ifp;
 1112         const struct cpl_rx_pkt *cpl = (const void *)(rss + 1);
 1113 #if defined(INET) || defined(INET6)
 1114         struct lro_ctrl *lro = &rxq->lro;
 1115 #endif
 1116 
 1117         KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__,
 1118             rss->opcode));
 1119 
 1120         m0->m_pkthdr.len -= fl_pktshift;
 1121         m0->m_len -= fl_pktshift;
 1122         m0->m_data += fl_pktshift;
 1123 
 1124         m0->m_pkthdr.rcvif = ifp;
 1125         m0->m_flags |= M_FLOWID;
 1126         m0->m_pkthdr.flowid = rss->hash_val;
 1127 
 1128         if (cpl->csum_calc && !cpl->err_vec) {
 1129                 if (ifp->if_capenable & IFCAP_RXCSUM &&
 1130                     cpl->l2info & htobe32(F_RXF_IP)) {
 1131                         m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
 1132                             CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 1133                         rxq->rxcsum++;
 1134                 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
 1135                     cpl->l2info & htobe32(F_RXF_IP6)) {
 1136                         m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
 1137                             CSUM_PSEUDO_HDR);
 1138                         rxq->rxcsum++;
 1139                 }
 1140 
 1141                 if (__predict_false(cpl->ip_frag))
 1142                         m0->m_pkthdr.csum_data = be16toh(cpl->csum);
 1143                 else
 1144                         m0->m_pkthdr.csum_data = 0xffff;
 1145         }
 1146 
 1147         if (cpl->vlan_ex) {
 1148                 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan);
 1149                 m0->m_flags |= M_VLANTAG;
 1150                 rxq->vlan_extraction++;
 1151         }
 1152 
 1153 #if defined(INET) || defined(INET6)
 1154         if (cpl->l2info & htobe32(F_RXF_LRO) &&
 1155             iq->flags & IQ_LRO_ENABLED &&
 1156             tcp_lro_rx(lro, m0, 0) == 0) {
 1157                 /* queued for LRO */
 1158         } else
 1159 #endif
 1160         ifp->if_input(ifp, m0);
 1161 
 1162         return (0);
 1163 }
 1164 
 1165 /*
 1166  * Doesn't fail.  Holds on to work requests it can't send right away.
 1167  */
 1168 void
 1169 t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr)
 1170 {
 1171         struct sge_eq *eq = &wrq->eq;
 1172         int can_reclaim;
 1173         caddr_t dst;
 1174 
 1175         TXQ_LOCK_ASSERT_OWNED(wrq);
 1176 #ifdef TCP_OFFLOAD
 1177         KASSERT((eq->flags & EQ_TYPEMASK) == EQ_OFLD ||
 1178             (eq->flags & EQ_TYPEMASK) == EQ_CTRL,
 1179             ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
 1180 #else
 1181         KASSERT((eq->flags & EQ_TYPEMASK) == EQ_CTRL,
 1182             ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
 1183 #endif
 1184 
 1185         if (__predict_true(wr != NULL))
 1186                 STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link);
 1187 
 1188         can_reclaim = reclaimable(eq);
 1189         if (__predict_false(eq->flags & EQ_STALLED)) {
 1190                 if (can_reclaim < tx_resume_threshold(eq))
 1191                         return;
 1192                 eq->flags &= ~EQ_STALLED;
 1193                 eq->unstalled++;
 1194         }
 1195         eq->cidx += can_reclaim;
 1196         eq->avail += can_reclaim;
 1197         if (__predict_false(eq->cidx >= eq->cap))
 1198                 eq->cidx -= eq->cap;
 1199 
 1200         while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL) {
 1201                 int ndesc;
 1202 
 1203                 if (__predict_false(wr->wr_len < 0 ||
 1204                     wr->wr_len > SGE_MAX_WR_LEN || (wr->wr_len & 0x7))) {
 1205 
 1206 #ifdef INVARIANTS
 1207                         panic("%s: work request with length %d", __func__,
 1208                             wr->wr_len);
 1209 #endif
 1210 #ifdef KDB
 1211                         kdb_backtrace();
 1212 #endif
 1213                         log(LOG_ERR, "%s: %s work request with length %d",
 1214                             device_get_nameunit(sc->dev), __func__, wr->wr_len);
 1215                         STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
 1216                         free_wrqe(wr);
 1217                         continue;
 1218                 }
 1219 
 1220                 ndesc = howmany(wr->wr_len, EQ_ESIZE);
 1221                 if (eq->avail < ndesc) {
 1222                         wrq->no_desc++;
 1223                         break;
 1224                 }
 1225 
 1226                 dst = (void *)&eq->desc[eq->pidx];
 1227                 copy_to_txd(eq, wrtod(wr), &dst, wr->wr_len);
 1228 
 1229                 eq->pidx += ndesc;
 1230                 eq->avail -= ndesc;
 1231                 if (__predict_false(eq->pidx >= eq->cap))
 1232                         eq->pidx -= eq->cap;
 1233 
 1234                 eq->pending += ndesc;
 1235                 if (eq->pending > 16)
 1236                         ring_eq_db(sc, eq);
 1237 
 1238                 wrq->tx_wrs++;
 1239                 STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
 1240                 free_wrqe(wr);
 1241 
 1242                 if (eq->avail < 8) {
 1243                         can_reclaim = reclaimable(eq);
 1244                         eq->cidx += can_reclaim;
 1245                         eq->avail += can_reclaim;
 1246                         if (__predict_false(eq->cidx >= eq->cap))
 1247                                 eq->cidx -= eq->cap;
 1248                 }
 1249         }
 1250 
 1251         if (eq->pending)
 1252                 ring_eq_db(sc, eq);
 1253 
 1254         if (wr != NULL) {
 1255                 eq->flags |= EQ_STALLED;
 1256                 if (callout_pending(&eq->tx_callout) == 0)
 1257                         callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
 1258         }
 1259 }
 1260 
 1261 /* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */
 1262 #define TXPKTS_PKT_HDR ((\
 1263     sizeof(struct ulp_txpkt) + \
 1264     sizeof(struct ulptx_idata) + \
 1265     sizeof(struct cpl_tx_pkt_core) \
 1266     ) / 8)
 1267 
 1268 /* Header of a coalesced tx WR, before SGL of first packet (in flits) */
 1269 #define TXPKTS_WR_HDR (\
 1270     sizeof(struct fw_eth_tx_pkts_wr) / 8 + \
 1271     TXPKTS_PKT_HDR)
 1272 
 1273 /* Header of a tx WR, before SGL of first packet (in flits) */
 1274 #define TXPKT_WR_HDR ((\
 1275     sizeof(struct fw_eth_tx_pkt_wr) + \
 1276     sizeof(struct cpl_tx_pkt_core) \
 1277     ) / 8 )
 1278 
 1279 /* Header of a tx LSO WR, before SGL of first packet (in flits) */
 1280 #define TXPKT_LSO_WR_HDR ((\
 1281     sizeof(struct fw_eth_tx_pkt_wr) + \
 1282     sizeof(struct cpl_tx_pkt_lso_core) + \
 1283     sizeof(struct cpl_tx_pkt_core) \
 1284     ) / 8 )
 1285 
 1286 int
 1287 t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m)
 1288 {
 1289         struct port_info *pi = (void *)ifp->if_softc;
 1290         struct adapter *sc = pi->adapter;
 1291         struct sge_eq *eq = &txq->eq;
 1292         struct buf_ring *br = txq->br;
 1293         struct mbuf *next;
 1294         int rc, coalescing, can_reclaim;
 1295         struct txpkts txpkts;
 1296         struct sgl sgl;
 1297 
 1298         TXQ_LOCK_ASSERT_OWNED(txq);
 1299         KASSERT(m, ("%s: called with nothing to do.", __func__));
 1300         KASSERT((eq->flags & EQ_TYPEMASK) == EQ_ETH,
 1301             ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
 1302 
 1303         prefetch(&eq->desc[eq->pidx]);
 1304         prefetch(&txq->sdesc[eq->pidx]);
 1305 
 1306         txpkts.npkt = 0;/* indicates there's nothing in txpkts */
 1307         coalescing = 0;
 1308 
 1309         can_reclaim = reclaimable(eq);
 1310         if (__predict_false(eq->flags & EQ_STALLED)) {
 1311                 if (can_reclaim < tx_resume_threshold(eq)) {
 1312                         txq->m = m;
 1313                         return (0);
 1314                 }
 1315                 eq->flags &= ~EQ_STALLED;
 1316                 eq->unstalled++;
 1317         }
 1318 
 1319         if (__predict_false(eq->flags & EQ_DOOMED)) {
 1320                 m_freem(m);
 1321                 while ((m = buf_ring_dequeue_sc(txq->br)) != NULL)
 1322                         m_freem(m);
 1323                 return (ENETDOWN);
 1324         }
 1325 
 1326         if (eq->avail < 8 && can_reclaim)
 1327                 reclaim_tx_descs(txq, can_reclaim, 32);
 1328 
 1329         for (; m; m = next ? next : drbr_dequeue(ifp, br)) {
 1330 
 1331                 if (eq->avail < 8)
 1332                         break;
 1333 
 1334                 next = m->m_nextpkt;
 1335                 m->m_nextpkt = NULL;
 1336 
 1337                 if (next || buf_ring_peek(br))
 1338                         coalescing = 1;
 1339 
 1340                 rc = get_pkt_sgl(txq, &m, &sgl, coalescing);
 1341                 if (rc != 0) {
 1342                         if (rc == ENOMEM) {
 1343 
 1344                                 /* Short of resources, suspend tx */
 1345 
 1346                                 m->m_nextpkt = next;
 1347                                 break;
 1348                         }
 1349 
 1350                         /*
 1351                          * Unrecoverable error for this packet, throw it away
 1352                          * and move on to the next.  get_pkt_sgl may already
 1353                          * have freed m (it will be NULL in that case and the
 1354                          * m_freem here is still safe).
 1355                          */
 1356 
 1357                         m_freem(m);
 1358                         continue;
 1359                 }
 1360 
 1361                 if (coalescing &&
 1362                     add_to_txpkts(pi, txq, &txpkts, m, &sgl) == 0) {
 1363 
 1364                         /* Successfully absorbed into txpkts */
 1365 
 1366                         write_ulp_cpl_sgl(pi, txq, &txpkts, m, &sgl);
 1367                         goto doorbell;
 1368                 }
 1369 
 1370                 /*
 1371                  * We weren't coalescing to begin with, or current frame could
 1372                  * not be coalesced (add_to_txpkts flushes txpkts if a frame
 1373                  * given to it can't be coalesced).  Either way there should be
 1374                  * nothing in txpkts.
 1375                  */
 1376                 KASSERT(txpkts.npkt == 0,
 1377                     ("%s: txpkts not empty: %d", __func__, txpkts.npkt));
 1378 
 1379                 /* We're sending out individual packets now */
 1380                 coalescing = 0;
 1381 
 1382                 if (eq->avail < 8)
 1383                         reclaim_tx_descs(txq, 0, 8);
 1384                 rc = write_txpkt_wr(pi, txq, m, &sgl);
 1385                 if (rc != 0) {
 1386 
 1387                         /* Short of hardware descriptors, suspend tx */
 1388 
 1389                         /*
 1390                          * This is an unlikely but expensive failure.  We've
 1391                          * done all the hard work (DMA mappings etc.) and now we
 1392                          * can't send out the packet.  What's worse, we have to
 1393                          * spend even more time freeing up everything in sgl.
 1394                          */
 1395                         txq->no_desc++;
 1396                         free_pkt_sgl(txq, &sgl);
 1397 
 1398                         m->m_nextpkt = next;
 1399                         break;
 1400                 }
 1401 
 1402                 ETHER_BPF_MTAP(ifp, m);
 1403                 if (sgl.nsegs == 0)
 1404                         m_freem(m);
 1405 doorbell:
 1406                 if (eq->pending >= 64)
 1407                     ring_eq_db(sc, eq);
 1408 
 1409                 can_reclaim = reclaimable(eq);
 1410                 if (can_reclaim >= 32)
 1411                         reclaim_tx_descs(txq, can_reclaim, 64);
 1412         }
 1413 
 1414         if (txpkts.npkt > 0)
 1415                 write_txpkts_wr(txq, &txpkts);
 1416 
 1417         /*
 1418          * m not NULL means there was an error but we haven't thrown it away.
 1419          * This can happen when we're short of tx descriptors (no_desc) or maybe
 1420          * even DMA maps (no_dmamap).  Either way, a credit flush and reclaim
 1421          * will get things going again.
 1422          */
 1423         if (m && !(eq->flags & EQ_CRFLUSHED)) {
 1424                 struct tx_sdesc *txsd = &txq->sdesc[eq->pidx];
 1425 
 1426                 /*
 1427                  * If EQ_CRFLUSHED is not set then we know we have at least one
 1428                  * available descriptor because any WR that reduces eq->avail to
 1429                  * 0 also sets EQ_CRFLUSHED.
 1430                  */
 1431                 KASSERT(eq->avail > 0, ("%s: no space for eqflush.", __func__));
 1432 
 1433                 txsd->desc_used = 1;
 1434                 txsd->credits = 0;
 1435                 write_eqflush_wr(eq);
 1436         }
 1437         txq->m = m;
 1438 
 1439         if (eq->pending)
 1440                 ring_eq_db(sc, eq);
 1441 
 1442         reclaim_tx_descs(txq, 0, 128);
 1443 
 1444         if (eq->flags & EQ_STALLED && callout_pending(&eq->tx_callout) == 0)
 1445                 callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
 1446 
 1447         return (0);
 1448 }
 1449 
 1450 void
 1451 t4_update_fl_bufsize(struct ifnet *ifp)
 1452 {
 1453         struct port_info *pi = ifp->if_softc;
 1454         struct sge_rxq *rxq;
 1455         struct sge_fl *fl;
 1456         int i, bufsize = mtu_to_bufsize(ifp->if_mtu);
 1457 
 1458         for_each_rxq(pi, i, rxq) {
 1459                 fl = &rxq->fl;
 1460 
 1461                 FL_LOCK(fl);
 1462                 set_fl_tag_idx(fl, bufsize);
 1463                 FL_UNLOCK(fl);
 1464         }
 1465 }
 1466 
 1467 int
 1468 can_resume_tx(struct sge_eq *eq)
 1469 {
 1470         return (reclaimable(eq) >= tx_resume_threshold(eq));
 1471 }
 1472 
 1473 static inline void
 1474 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
 1475     int qsize, int esize)
 1476 {
 1477         KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS,
 1478             ("%s: bad tmr_idx %d", __func__, tmr_idx));
 1479         KASSERT(pktc_idx < SGE_NCOUNTERS,       /* -ve is ok, means don't use */
 1480             ("%s: bad pktc_idx %d", __func__, pktc_idx));
 1481 
 1482         iq->flags = 0;
 1483         iq->adapter = sc;
 1484         iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx);
 1485         iq->intr_pktc_idx = SGE_NCOUNTERS - 1;
 1486         if (pktc_idx >= 0) {
 1487                 iq->intr_params |= F_QINTR_CNT_EN;
 1488                 iq->intr_pktc_idx = pktc_idx;
 1489         }
 1490         iq->qsize = roundup(qsize, 16);         /* See FW_IQ_CMD/iqsize */
 1491         iq->esize = max(esize, 16);             /* See FW_IQ_CMD/iqesize */
 1492 }
 1493 
 1494 static inline void
 1495 init_fl(struct sge_fl *fl, int qsize, int bufsize, char *name)
 1496 {
 1497         fl->qsize = qsize;
 1498         strlcpy(fl->lockname, name, sizeof(fl->lockname));
 1499         set_fl_tag_idx(fl, bufsize);
 1500 }
 1501 
 1502 static inline void
 1503 init_eq(struct sge_eq *eq, int eqtype, int qsize, uint8_t tx_chan,
 1504     uint16_t iqid, char *name)
 1505 {
 1506         KASSERT(tx_chan < NCHAN, ("%s: bad tx channel %d", __func__, tx_chan));
 1507         KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype));
 1508 
 1509         eq->flags = eqtype & EQ_TYPEMASK;
 1510         eq->tx_chan = tx_chan;
 1511         eq->iqid = iqid;
 1512         eq->qsize = qsize;
 1513         strlcpy(eq->lockname, name, sizeof(eq->lockname));
 1514 
 1515         TASK_INIT(&eq->tx_task, 0, t4_tx_task, eq);
 1516         callout_init(&eq->tx_callout, CALLOUT_MPSAFE);
 1517 }
 1518 
 1519 static int
 1520 alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag,
 1521     bus_dmamap_t *map, bus_addr_t *pa, void **va)
 1522 {
 1523         int rc;
 1524 
 1525         rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR,
 1526             BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag);
 1527         if (rc != 0) {
 1528                 device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc);
 1529                 goto done;
 1530         }
 1531 
 1532         rc = bus_dmamem_alloc(*tag, va,
 1533             BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map);
 1534         if (rc != 0) {
 1535                 device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc);
 1536                 goto done;
 1537         }
 1538 
 1539         rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0);
 1540         if (rc != 0) {
 1541                 device_printf(sc->dev, "cannot load DMA map: %d\n", rc);
 1542                 goto done;
 1543         }
 1544 done:
 1545         if (rc)
 1546                 free_ring(sc, *tag, *map, *pa, *va);
 1547 
 1548         return (rc);
 1549 }
 1550 
 1551 static int
 1552 free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map,
 1553     bus_addr_t pa, void *va)
 1554 {
 1555         if (pa)
 1556                 bus_dmamap_unload(tag, map);
 1557         if (va)
 1558                 bus_dmamem_free(tag, va, map);
 1559         if (tag)
 1560                 bus_dma_tag_destroy(tag);
 1561 
 1562         return (0);
 1563 }
 1564 
 1565 /*
 1566  * Allocates the ring for an ingress queue and an optional freelist.  If the
 1567  * freelist is specified it will be allocated and then associated with the
 1568  * ingress queue.
 1569  *
 1570  * Returns errno on failure.  Resources allocated up to that point may still be
 1571  * allocated.  Caller is responsible for cleanup in case this function fails.
 1572  *
 1573  * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then
 1574  * the intr_idx specifies the vector, starting from 0.  Otherwise it specifies
 1575  * the abs_id of the ingress queue to which its interrupts should be forwarded.
 1576  */
 1577 static int
 1578 alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
 1579     int intr_idx, int cong)
 1580 {
 1581         int rc, i, cntxt_id;
 1582         size_t len;
 1583         struct fw_iq_cmd c;
 1584         struct adapter *sc = iq->adapter;
 1585         __be32 v = 0;
 1586 
 1587         len = iq->qsize * iq->esize;
 1588         rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba,
 1589             (void **)&iq->desc);
 1590         if (rc != 0)
 1591                 return (rc);
 1592 
 1593         bzero(&c, sizeof(c));
 1594         c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST |
 1595             F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) |
 1596             V_FW_IQ_CMD_VFN(0));
 1597 
 1598         c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART |
 1599             FW_LEN16(c));
 1600 
 1601         /* Special handling for firmware event queue */
 1602         if (iq == &sc->sge.fwq)
 1603                 v |= F_FW_IQ_CMD_IQASYNCH;
 1604 
 1605         if (iq->flags & IQ_INTR) {
 1606                 KASSERT(intr_idx < sc->intr_count,
 1607                     ("%s: invalid direct intr_idx %d", __func__, intr_idx));
 1608         } else
 1609                 v |= F_FW_IQ_CMD_IQANDST;
 1610         v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx);
 1611 
 1612         c.type_to_iqandstindex = htobe32(v |
 1613             V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
 1614             V_FW_IQ_CMD_VIID(pi->viid) |
 1615             V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
 1616         c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
 1617             F_FW_IQ_CMD_IQGTSMODE |
 1618             V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) |
 1619             V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4));
 1620         c.iqsize = htobe16(iq->qsize);
 1621         c.iqaddr = htobe64(iq->ba);
 1622         if (cong >= 0)
 1623                 c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN);
 1624 
 1625         if (fl) {
 1626                 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF);
 1627 
 1628                 for (i = 0; i < FL_BUF_SIZES; i++) {
 1629 
 1630                         /*
 1631                          * A freelist buffer must be 16 byte aligned as the SGE
 1632                          * uses the low 4 bits of the bus addr to figure out the
 1633                          * buffer size.
 1634                          */
 1635                         rc = bus_dma_tag_create(sc->dmat, 16, 0,
 1636                             BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
 1637                             FL_BUF_SIZE(i), 1, FL_BUF_SIZE(i), BUS_DMA_ALLOCNOW,
 1638                             NULL, NULL, &fl->tag[i]);
 1639                         if (rc != 0) {
 1640                                 device_printf(sc->dev,
 1641                                     "failed to create fl DMA tag[%d]: %d\n",
 1642                                     i, rc);
 1643                                 return (rc);
 1644                         }
 1645                 }
 1646                 len = fl->qsize * RX_FL_ESIZE;
 1647                 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map,
 1648                     &fl->ba, (void **)&fl->desc);
 1649                 if (rc)
 1650                         return (rc);
 1651 
 1652                 /* Allocate space for one software descriptor per buffer. */
 1653                 fl->cap = (fl->qsize - spg_len / RX_FL_ESIZE) * 8;
 1654                 FL_LOCK(fl);
 1655                 rc = alloc_fl_sdesc(fl);
 1656                 FL_UNLOCK(fl);
 1657                 if (rc != 0) {
 1658                         device_printf(sc->dev,
 1659                             "failed to setup fl software descriptors: %d\n",
 1660                             rc);
 1661                         return (rc);
 1662                 }
 1663                 fl->needed = fl->cap;
 1664                 fl->lowat = roundup(sc->sge.fl_starve_threshold, 8);
 1665 
 1666                 c.iqns_to_fl0congen |=
 1667                     htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
 1668                         F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO |
 1669                         F_FW_IQ_CMD_FL0PADEN);
 1670                 if (cong >= 0) {
 1671                         c.iqns_to_fl0congen |=
 1672                                 htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) |
 1673                                     F_FW_IQ_CMD_FL0CONGCIF |
 1674                                     F_FW_IQ_CMD_FL0CONGEN);
 1675                 }
 1676                 c.fl0dcaen_to_fl0cidxfthresh =
 1677                     htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) |
 1678                         V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B));
 1679                 c.fl0size = htobe16(fl->qsize);
 1680                 c.fl0addr = htobe64(fl->ba);
 1681         }
 1682 
 1683         rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
 1684         if (rc != 0) {
 1685                 device_printf(sc->dev,
 1686                     "failed to create ingress queue: %d\n", rc);
 1687                 return (rc);
 1688         }
 1689 
 1690         iq->cdesc = iq->desc;
 1691         iq->cidx = 0;
 1692         iq->gen = 1;
 1693         iq->intr_next = iq->intr_params;
 1694         iq->cntxt_id = be16toh(c.iqid);
 1695         iq->abs_id = be16toh(c.physiqid);
 1696         iq->flags |= IQ_ALLOCATED;
 1697 
 1698         cntxt_id = iq->cntxt_id - sc->sge.iq_start;
 1699         if (cntxt_id >= sc->sge.niq) {
 1700                 panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__,
 1701                     cntxt_id, sc->sge.niq - 1);
 1702         }
 1703         sc->sge.iqmap[cntxt_id] = iq;
 1704 
 1705         if (fl) {
 1706                 fl->cntxt_id = be16toh(c.fl0id);
 1707                 fl->pidx = fl->cidx = 0;
 1708 
 1709                 cntxt_id = fl->cntxt_id - sc->sge.eq_start;
 1710                 if (cntxt_id >= sc->sge.neq) {
 1711                         panic("%s: fl->cntxt_id (%d) more than the max (%d)",
 1712                             __func__, cntxt_id, sc->sge.neq - 1);
 1713                 }
 1714                 sc->sge.eqmap[cntxt_id] = (void *)fl;
 1715 
 1716                 FL_LOCK(fl);
 1717                 /* Enough to make sure the SGE doesn't think it's starved */
 1718                 refill_fl(sc, fl, fl->lowat);
 1719                 FL_UNLOCK(fl);
 1720 
 1721                 iq->flags |= IQ_HAS_FL;
 1722         }
 1723 
 1724         /* Enable IQ interrupts */
 1725         atomic_store_rel_int(&iq->state, IQS_IDLE);
 1726         t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) |
 1727             V_INGRESSQID(iq->cntxt_id));
 1728 
 1729         return (0);
 1730 }
 1731 
 1732 static int
 1733 free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl)
 1734 {
 1735         int i, rc;
 1736         struct adapter *sc = iq->adapter;
 1737         device_t dev;
 1738 
 1739         if (sc == NULL)
 1740                 return (0);     /* nothing to do */
 1741 
 1742         dev = pi ? pi->dev : sc->dev;
 1743 
 1744         if (iq->flags & IQ_ALLOCATED) {
 1745                 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0,
 1746                     FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id,
 1747                     fl ? fl->cntxt_id : 0xffff, 0xffff);
 1748                 if (rc != 0) {
 1749                         device_printf(dev,
 1750                             "failed to free queue %p: %d\n", iq, rc);
 1751                         return (rc);
 1752                 }
 1753                 iq->flags &= ~IQ_ALLOCATED;
 1754         }
 1755 
 1756         free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc);
 1757 
 1758         bzero(iq, sizeof(*iq));
 1759 
 1760         if (fl) {
 1761                 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba,
 1762                     fl->desc);
 1763 
 1764                 if (fl->sdesc) {
 1765                         FL_LOCK(fl);
 1766                         free_fl_sdesc(fl);
 1767                         FL_UNLOCK(fl);
 1768                 }
 1769 
 1770                 if (mtx_initialized(&fl->fl_lock))
 1771                         mtx_destroy(&fl->fl_lock);
 1772 
 1773                 for (i = 0; i < FL_BUF_SIZES; i++) {
 1774                         if (fl->tag[i])
 1775                                 bus_dma_tag_destroy(fl->tag[i]);
 1776                 }
 1777 
 1778                 bzero(fl, sizeof(*fl));
 1779         }
 1780 
 1781         return (0);
 1782 }
 1783 
 1784 static int
 1785 alloc_fwq(struct adapter *sc)
 1786 {
 1787         int rc, intr_idx;
 1788         struct sge_iq *fwq = &sc->sge.fwq;
 1789         struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
 1790         struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
 1791 
 1792         init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE);
 1793         fwq->flags |= IQ_INTR;  /* always */
 1794         intr_idx = sc->intr_count > 1 ? 1 : 0;
 1795         rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1);
 1796         if (rc != 0) {
 1797                 device_printf(sc->dev,
 1798                     "failed to create firmware event queue: %d\n", rc);
 1799                 return (rc);
 1800         }
 1801 
 1802         oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD,
 1803             NULL, "firmware event queue");
 1804         children = SYSCTL_CHILDREN(oid);
 1805 
 1806         SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id",
 1807             CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I",
 1808             "absolute id of the queue");
 1809         SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id",
 1810             CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I",
 1811             "SGE context id of the queue");
 1812         SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx",
 1813             CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I",
 1814             "consumer index");
 1815 
 1816         return (0);
 1817 }
 1818 
 1819 static int
 1820 free_fwq(struct adapter *sc)
 1821 {
 1822         return free_iq_fl(NULL, &sc->sge.fwq, NULL);
 1823 }
 1824 
 1825 static int
 1826 alloc_mgmtq(struct adapter *sc)
 1827 {
 1828         int rc;
 1829         struct sge_wrq *mgmtq = &sc->sge.mgmtq;
 1830         char name[16];
 1831         struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
 1832         struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
 1833 
 1834         oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "mgmtq", CTLFLAG_RD,
 1835             NULL, "management queue");
 1836 
 1837         snprintf(name, sizeof(name), "%s mgmtq", device_get_nameunit(sc->dev));
 1838         init_eq(&mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan,
 1839             sc->sge.fwq.cntxt_id, name);
 1840         rc = alloc_wrq(sc, NULL, mgmtq, oid);
 1841         if (rc != 0) {
 1842                 device_printf(sc->dev,
 1843                     "failed to create management queue: %d\n", rc);
 1844                 return (rc);
 1845         }
 1846 
 1847         return (0);
 1848 }
 1849 
 1850 static int
 1851 free_mgmtq(struct adapter *sc)
 1852 {
 1853 
 1854         return free_wrq(sc, &sc->sge.mgmtq);
 1855 }
 1856 
 1857 static inline int
 1858 tnl_cong(struct port_info *pi)
 1859 {
 1860 
 1861         if (cong_drop == -1)
 1862                 return (-1);
 1863         else if (cong_drop == 1)
 1864                 return (0);
 1865         else
 1866                 return (1 << pi->tx_chan);
 1867 }
 1868 
 1869 static int
 1870 alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx,
 1871     struct sysctl_oid *oid)
 1872 {
 1873         int rc;
 1874         struct sysctl_oid_list *children;
 1875         char name[16];
 1876 
 1877         rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, tnl_cong(pi));
 1878         if (rc != 0)
 1879                 return (rc);
 1880 
 1881         FL_LOCK(&rxq->fl);
 1882         refill_fl(pi->adapter, &rxq->fl, rxq->fl.needed / 8);
 1883         FL_UNLOCK(&rxq->fl);
 1884 
 1885 #if defined(INET) || defined(INET6)
 1886         rc = tcp_lro_init(&rxq->lro);
 1887         if (rc != 0)
 1888                 return (rc);
 1889         rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */
 1890 
 1891         if (pi->ifp->if_capenable & IFCAP_LRO)
 1892                 rxq->iq.flags |= IQ_LRO_ENABLED;
 1893 #endif
 1894         rxq->ifp = pi->ifp;
 1895 
 1896         children = SYSCTL_CHILDREN(oid);
 1897 
 1898         snprintf(name, sizeof(name), "%d", idx);
 1899         oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
 1900             NULL, "rx queue");
 1901         children = SYSCTL_CHILDREN(oid);
 1902 
 1903         SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id",
 1904             CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I",
 1905             "absolute id of the queue");
 1906         SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
 1907             CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cntxt_id, 0, sysctl_uint16, "I",
 1908             "SGE context id of the queue");
 1909         SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
 1910             CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cidx, 0, sysctl_uint16, "I",
 1911             "consumer index");
 1912 #if defined(INET) || defined(INET6)
 1913         SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD,
 1914             &rxq->lro.lro_queued, 0, NULL);
 1915         SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD,
 1916             &rxq->lro.lro_flushed, 0, NULL);
 1917 #endif
 1918         SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD,
 1919             &rxq->rxcsum, "# of times hardware assisted with checksum");
 1920         SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_extraction",
 1921             CTLFLAG_RD, &rxq->vlan_extraction,
 1922             "# of times hardware extracted 802.1Q tag");
 1923 
 1924         children = SYSCTL_CHILDREN(oid);
 1925         oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD,
 1926             NULL, "freelist");
 1927         children = SYSCTL_CHILDREN(oid);
 1928 
 1929         SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
 1930             CTLTYPE_INT | CTLFLAG_RD, &rxq->fl.cntxt_id, 0, sysctl_uint16, "I",
 1931             "SGE context id of the queue");
 1932         SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD,
 1933             &rxq->fl.cidx, 0, "consumer index");
 1934         SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD,
 1935             &rxq->fl.pidx, 0, "producer index");
 1936 
 1937         return (rc);
 1938 }
 1939 
 1940 static int
 1941 free_rxq(struct port_info *pi, struct sge_rxq *rxq)
 1942 {
 1943         int rc;
 1944 
 1945 #if defined(INET) || defined(INET6)
 1946         if (rxq->lro.ifp) {
 1947                 tcp_lro_free(&rxq->lro);
 1948                 rxq->lro.ifp = NULL;
 1949         }
 1950 #endif
 1951 
 1952         rc = free_iq_fl(pi, &rxq->iq, &rxq->fl);
 1953         if (rc == 0)
 1954                 bzero(rxq, sizeof(*rxq));
 1955 
 1956         return (rc);
 1957 }
 1958 
 1959 #ifdef TCP_OFFLOAD
 1960 static int
 1961 alloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq,
 1962     int intr_idx, int idx, struct sysctl_oid *oid)
 1963 {
 1964         int rc;
 1965         struct sysctl_oid_list *children;
 1966         char name[16];
 1967 
 1968         rc = alloc_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx,
 1969             1 << pi->tx_chan);
 1970         if (rc != 0)
 1971                 return (rc);
 1972 
 1973         children = SYSCTL_CHILDREN(oid);
 1974 
 1975         snprintf(name, sizeof(name), "%d", idx);
 1976         oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
 1977             NULL, "rx queue");
 1978         children = SYSCTL_CHILDREN(oid);
 1979 
 1980         SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id",
 1981             CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.abs_id, 0, sysctl_uint16,
 1982             "I", "absolute id of the queue");
 1983         SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
 1984             CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cntxt_id, 0, sysctl_uint16,
 1985             "I", "SGE context id of the queue");
 1986         SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
 1987             CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cidx, 0, sysctl_uint16, "I",
 1988             "consumer index");
 1989 
 1990         children = SYSCTL_CHILDREN(oid);
 1991         oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD,
 1992             NULL, "freelist");
 1993         children = SYSCTL_CHILDREN(oid);
 1994 
 1995         SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
 1996             CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->fl.cntxt_id, 0, sysctl_uint16,
 1997             "I", "SGE context id of the queue");
 1998         SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD,
 1999             &ofld_rxq->fl.cidx, 0, "consumer index");
 2000         SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD,
 2001             &ofld_rxq->fl.pidx, 0, "producer index");
 2002 
 2003         return (rc);
 2004 }
 2005 
 2006 static int
 2007 free_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq)
 2008 {
 2009         int rc;
 2010 
 2011         rc = free_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl);
 2012         if (rc == 0)
 2013                 bzero(ofld_rxq, sizeof(*ofld_rxq));
 2014 
 2015         return (rc);
 2016 }
 2017 #endif
 2018 
 2019 static int
 2020 ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq)
 2021 {
 2022         int rc, cntxt_id;
 2023         struct fw_eq_ctrl_cmd c;
 2024 
 2025         bzero(&c, sizeof(c));
 2026 
 2027         c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST |
 2028             F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) |
 2029             V_FW_EQ_CTRL_CMD_VFN(0));
 2030         c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC |
 2031             F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c));
 2032         c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* XXX */
 2033         c.physeqid_pkd = htobe32(0);
 2034         c.fetchszm_to_iqid =
 2035             htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
 2036                 V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) |
 2037                 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid));
 2038         c.dcaen_to_eqsize =
 2039             htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
 2040                 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
 2041                 V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
 2042                 V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize));
 2043         c.eqaddr = htobe64(eq->ba);
 2044 
 2045         rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
 2046         if (rc != 0) {
 2047                 device_printf(sc->dev,
 2048                     "failed to create control queue %d: %d\n", eq->tx_chan, rc);
 2049                 return (rc);
 2050         }
 2051         eq->flags |= EQ_ALLOCATED;
 2052 
 2053         eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid));
 2054         cntxt_id = eq->cntxt_id - sc->sge.eq_start;
 2055         if (cntxt_id >= sc->sge.neq)
 2056             panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
 2057                 cntxt_id, sc->sge.neq - 1);
 2058         sc->sge.eqmap[cntxt_id] = eq;
 2059 
 2060         return (rc);
 2061 }
 2062 
 2063 static int
 2064 eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
 2065 {
 2066         int rc, cntxt_id;
 2067         struct fw_eq_eth_cmd c;
 2068 
 2069         bzero(&c, sizeof(c));
 2070 
 2071         c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST |
 2072             F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) |
 2073             V_FW_EQ_ETH_CMD_VFN(0));
 2074         c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC |
 2075             F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
 2076         c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid));
 2077         c.fetchszm_to_iqid =
 2078             htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
 2079                 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO |
 2080                 V_FW_EQ_ETH_CMD_IQID(eq->iqid));
 2081         c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
 2082                       V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
 2083                       V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
 2084                       V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize));
 2085         c.eqaddr = htobe64(eq->ba);
 2086 
 2087         rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
 2088         if (rc != 0) {
 2089                 device_printf(pi->dev,
 2090                     "failed to create Ethernet egress queue: %d\n", rc);
 2091                 return (rc);
 2092         }
 2093         eq->flags |= EQ_ALLOCATED;
 2094 
 2095         eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd));
 2096         cntxt_id = eq->cntxt_id - sc->sge.eq_start;
 2097         if (cntxt_id >= sc->sge.neq)
 2098             panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
 2099                 cntxt_id, sc->sge.neq - 1);
 2100         sc->sge.eqmap[cntxt_id] = eq;
 2101 
 2102         return (rc);
 2103 }
 2104 
 2105 #ifdef TCP_OFFLOAD
 2106 static int
 2107 ofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
 2108 {
 2109         int rc, cntxt_id;
 2110         struct fw_eq_ofld_cmd c;
 2111 
 2112         bzero(&c, sizeof(c));
 2113 
 2114         c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST |
 2115             F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) |
 2116             V_FW_EQ_OFLD_CMD_VFN(0));
 2117         c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC |
 2118             F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c));
 2119         c.fetchszm_to_iqid =
 2120                 htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
 2121                     V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) |
 2122                     F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid));
 2123         c.dcaen_to_eqsize =
 2124             htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
 2125                 V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
 2126                 V_FW_EQ_OFLD_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
 2127                 V_FW_EQ_OFLD_CMD_EQSIZE(eq->qsize));
 2128         c.eqaddr = htobe64(eq->ba);
 2129 
 2130         rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
 2131         if (rc != 0) {
 2132                 device_printf(pi->dev,
 2133                     "failed to create egress queue for TCP offload: %d\n", rc);
 2134                 return (rc);
 2135         }
 2136         eq->flags |= EQ_ALLOCATED;
 2137 
 2138         eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd));
 2139         cntxt_id = eq->cntxt_id - sc->sge.eq_start;
 2140         if (cntxt_id >= sc->sge.neq)
 2141             panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
 2142                 cntxt_id, sc->sge.neq - 1);
 2143         sc->sge.eqmap[cntxt_id] = eq;
 2144 
 2145         return (rc);
 2146 }
 2147 #endif
 2148 
 2149 static int
 2150 alloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
 2151 {
 2152         int rc;
 2153         size_t len;
 2154 
 2155         mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
 2156 
 2157         len = eq->qsize * EQ_ESIZE;
 2158         rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map,
 2159             &eq->ba, (void **)&eq->desc);
 2160         if (rc)
 2161                 return (rc);
 2162 
 2163         eq->cap = eq->qsize - spg_len / EQ_ESIZE;
 2164         eq->spg = (void *)&eq->desc[eq->cap];
 2165         eq->avail = eq->cap - 1;        /* one less to avoid cidx = pidx */
 2166         eq->pidx = eq->cidx = 0;
 2167 
 2168         switch (eq->flags & EQ_TYPEMASK) {
 2169         case EQ_CTRL:
 2170                 rc = ctrl_eq_alloc(sc, eq);
 2171                 break;
 2172 
 2173         case EQ_ETH:
 2174                 rc = eth_eq_alloc(sc, pi, eq);
 2175                 break;
 2176 
 2177 #ifdef TCP_OFFLOAD
 2178         case EQ_OFLD:
 2179                 rc = ofld_eq_alloc(sc, pi, eq);
 2180                 break;
 2181 #endif
 2182 
 2183         default:
 2184                 panic("%s: invalid eq type %d.", __func__,
 2185                     eq->flags & EQ_TYPEMASK);
 2186         }
 2187         if (rc != 0) {
 2188                 device_printf(sc->dev,
 2189                     "failed to allocate egress queue(%d): %d",
 2190                     eq->flags & EQ_TYPEMASK, rc);
 2191         }
 2192 
 2193         eq->tx_callout.c_cpu = eq->cntxt_id % mp_ncpus;
 2194 
 2195         return (rc);
 2196 }
 2197 
 2198 static int
 2199 free_eq(struct adapter *sc, struct sge_eq *eq)
 2200 {
 2201         int rc;
 2202 
 2203         if (eq->flags & EQ_ALLOCATED) {
 2204                 switch (eq->flags & EQ_TYPEMASK) {
 2205                 case EQ_CTRL:
 2206                         rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0,
 2207                             eq->cntxt_id);
 2208                         break;
 2209 
 2210                 case EQ_ETH:
 2211                         rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0,
 2212                             eq->cntxt_id);
 2213                         break;
 2214 
 2215 #ifdef TCP_OFFLOAD
 2216                 case EQ_OFLD:
 2217                         rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0,
 2218                             eq->cntxt_id);
 2219                         break;
 2220 #endif
 2221 
 2222                 default:
 2223                         panic("%s: invalid eq type %d.", __func__,
 2224                             eq->flags & EQ_TYPEMASK);
 2225                 }
 2226                 if (rc != 0) {
 2227                         device_printf(sc->dev,
 2228                             "failed to free egress queue (%d): %d\n",
 2229                             eq->flags & EQ_TYPEMASK, rc);
 2230                         return (rc);
 2231                 }
 2232                 eq->flags &= ~EQ_ALLOCATED;
 2233         }
 2234 
 2235         free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc);
 2236 
 2237         if (mtx_initialized(&eq->eq_lock))
 2238                 mtx_destroy(&eq->eq_lock);
 2239 
 2240         bzero(eq, sizeof(*eq));
 2241         return (0);
 2242 }
 2243 
 2244 static int
 2245 alloc_wrq(struct adapter *sc, struct port_info *pi, struct sge_wrq *wrq,
 2246     struct sysctl_oid *oid)
 2247 {
 2248         int rc;
 2249         struct sysctl_ctx_list *ctx = pi ? &pi->ctx : &sc->ctx;
 2250         struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
 2251 
 2252         rc = alloc_eq(sc, pi, &wrq->eq);
 2253         if (rc)
 2254                 return (rc);
 2255 
 2256         wrq->adapter = sc;
 2257         STAILQ_INIT(&wrq->wr_list);
 2258 
 2259         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
 2260             &wrq->eq.cntxt_id, 0, "SGE context id of the queue");
 2261         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx",
 2262             CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I",
 2263             "consumer index");
 2264         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx",
 2265             CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I",
 2266             "producer index");
 2267         SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs", CTLFLAG_RD,
 2268             &wrq->tx_wrs, "# of work requests");
 2269         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
 2270             &wrq->no_desc, 0,
 2271             "# of times queue ran out of hardware descriptors");
 2272         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD,
 2273             &wrq->eq.unstalled, 0, "# of times queue recovered after stall");
 2274 
 2275 
 2276         return (rc);
 2277 }
 2278 
 2279 static int
 2280 free_wrq(struct adapter *sc, struct sge_wrq *wrq)
 2281 {
 2282         int rc;
 2283 
 2284         rc = free_eq(sc, &wrq->eq);
 2285         if (rc)
 2286                 return (rc);
 2287 
 2288         bzero(wrq, sizeof(*wrq));
 2289         return (0);
 2290 }
 2291 
 2292 static int
 2293 alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx,
 2294     struct sysctl_oid *oid)
 2295 {
 2296         int rc;
 2297         struct adapter *sc = pi->adapter;
 2298         struct sge_eq *eq = &txq->eq;
 2299         char name[16];
 2300         struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
 2301 
 2302         rc = alloc_eq(sc, pi, eq);
 2303         if (rc)
 2304                 return (rc);
 2305 
 2306         txq->ifp = pi->ifp;
 2307 
 2308         txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE,
 2309             M_ZERO | M_WAITOK);
 2310         txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock);
 2311 
 2312         rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR,
 2313             BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS,
 2314             BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag);
 2315         if (rc != 0) {
 2316                 device_printf(sc->dev,
 2317                     "failed to create tx DMA tag: %d\n", rc);
 2318                 return (rc);
 2319         }
 2320 
 2321         /*
 2322          * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE
 2323          * limit for any WR).  txq->no_dmamap events shouldn't occur if maps is
 2324          * sized for the worst case.
 2325          */
 2326         rc = t4_alloc_tx_maps(&txq->txmaps, txq->tx_tag, eq->qsize * 10 / 8,
 2327             M_WAITOK);
 2328         if (rc != 0) {
 2329                 device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc);
 2330                 return (rc);
 2331         }
 2332 
 2333         snprintf(name, sizeof(name), "%d", idx);
 2334         oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
 2335             NULL, "tx queue");
 2336         children = SYSCTL_CHILDREN(oid);
 2337 
 2338         SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
 2339             &eq->cntxt_id, 0, "SGE context id of the queue");
 2340         SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
 2341             CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I",
 2342             "consumer index");
 2343         SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "pidx",
 2344             CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I",
 2345             "producer index");
 2346 
 2347         SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD,
 2348             &txq->txcsum, "# of times hardware assisted with checksum");
 2349         SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion",
 2350             CTLFLAG_RD, &txq->vlan_insertion,
 2351             "# of times hardware inserted 802.1Q tag");
 2352         SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD,
 2353             &txq->tso_wrs, "# of TSO work requests");
 2354         SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD,
 2355             &txq->imm_wrs, "# of work requests with immediate data");
 2356         SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD,
 2357             &txq->sgl_wrs, "# of work requests with direct SGL");
 2358         SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD,
 2359             &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)");
 2360         SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_wrs", CTLFLAG_RD,
 2361             &txq->txpkts_wrs, "# of txpkts work requests (multiple pkts/WR)");
 2362         SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD,
 2363             &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests");
 2364 
 2365         SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "br_drops", CTLFLAG_RD,
 2366             &txq->br->br_drops, "# of drops in the buf_ring for this queue");
 2367         SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD,
 2368             &txq->no_dmamap, 0, "# of times txq ran out of DMA maps");
 2369         SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
 2370             &txq->no_desc, 0, "# of times txq ran out of hardware descriptors");
 2371         SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD,
 2372             &eq->egr_update, 0, "egress update notifications from the SGE");
 2373         SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD,
 2374             &eq->unstalled, 0, "# of times txq recovered after stall");
 2375 
 2376         return (rc);
 2377 }
 2378 
 2379 static int
 2380 free_txq(struct port_info *pi, struct sge_txq *txq)
 2381 {
 2382         int rc;
 2383         struct adapter *sc = pi->adapter;
 2384         struct sge_eq *eq = &txq->eq;
 2385 
 2386         rc = free_eq(sc, eq);
 2387         if (rc)
 2388                 return (rc);
 2389 
 2390         free(txq->sdesc, M_CXGBE);
 2391 
 2392         if (txq->txmaps.maps)
 2393                 t4_free_tx_maps(&txq->txmaps, txq->tx_tag);
 2394 
 2395         buf_ring_free(txq->br, M_CXGBE);
 2396 
 2397         if (txq->tx_tag)
 2398                 bus_dma_tag_destroy(txq->tx_tag);
 2399 
 2400         bzero(txq, sizeof(*txq));
 2401         return (0);
 2402 }
 2403 
 2404 static void
 2405 oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 2406 {
 2407         bus_addr_t *ba = arg;
 2408 
 2409         KASSERT(nseg == 1,
 2410             ("%s meant for single segment mappings only.", __func__));
 2411 
 2412         *ba = error ? 0 : segs->ds_addr;
 2413 }
 2414 
 2415 static inline bool
 2416 is_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl)
 2417 {
 2418         *ctrl = (void *)((uintptr_t)iq->cdesc +
 2419             (iq->esize - sizeof(struct rsp_ctrl)));
 2420 
 2421         return (((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen);
 2422 }
 2423 
 2424 static inline void
 2425 iq_next(struct sge_iq *iq)
 2426 {
 2427         iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize);
 2428         if (__predict_false(++iq->cidx == iq->qsize - 1)) {
 2429                 iq->cidx = 0;
 2430                 iq->gen ^= 1;
 2431                 iq->cdesc = iq->desc;
 2432         }
 2433 }
 2434 
 2435 #define FL_HW_IDX(x) ((x) >> 3)
 2436 static inline void
 2437 ring_fl_db(struct adapter *sc, struct sge_fl *fl)
 2438 {
 2439         int ndesc = fl->pending / 8;
 2440 
 2441         if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx))
 2442                 ndesc--;        /* hold back one credit */
 2443 
 2444         if (ndesc <= 0)
 2445                 return;         /* nothing to do */
 2446 
 2447         wmb();
 2448 
 2449         t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), F_DBPRIO |
 2450             V_QID(fl->cntxt_id) | V_PIDX(ndesc));
 2451         fl->pending -= ndesc * 8;
 2452 }
 2453 
 2454 /*
 2455  * Fill up the freelist by upto nbufs and maybe ring its doorbell.
 2456  *
 2457  * Returns non-zero to indicate that it should be added to the list of starving
 2458  * freelists.
 2459  */
 2460 static int
 2461 refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs)
 2462 {
 2463         __be64 *d = &fl->desc[fl->pidx];
 2464         struct fl_sdesc *sd = &fl->sdesc[fl->pidx];
 2465         bus_dma_tag_t tag;
 2466         bus_addr_t pa;
 2467         caddr_t cl;
 2468         int rc;
 2469 
 2470         FL_LOCK_ASSERT_OWNED(fl);
 2471 
 2472         if (nbufs > fl->needed)
 2473                 nbufs = fl->needed;
 2474 
 2475         while (nbufs--) {
 2476 
 2477                 if (sd->cl != NULL) {
 2478 
 2479                         /*
 2480                          * This happens when a frame small enough to fit
 2481                          * entirely in an mbuf was received in cl last time.
 2482                          * We'd held on to cl and can reuse it now.  Note that
 2483                          * we reuse a cluster of the old size if fl->tag_idx is
 2484                          * no longer the same as sd->tag_idx.
 2485                          */
 2486 
 2487                         KASSERT(*d == sd->ba_tag,
 2488                             ("%s: recyling problem at pidx %d",
 2489                             __func__, fl->pidx));
 2490 
 2491                         d++;
 2492                         goto recycled;
 2493                 }
 2494 
 2495 
 2496                 if (fl->tag_idx != sd->tag_idx) {
 2497                         bus_dmamap_t map;
 2498                         bus_dma_tag_t newtag = fl->tag[fl->tag_idx];
 2499                         bus_dma_tag_t oldtag = fl->tag[sd->tag_idx];
 2500 
 2501                         /*
 2502                          * An MTU change can get us here.  Discard the old map
 2503                          * which was created with the old tag, but only if
 2504                          * we're able to get a new one.
 2505                          */
 2506                         rc = bus_dmamap_create(newtag, 0, &map);
 2507                         if (rc == 0) {
 2508                                 bus_dmamap_destroy(oldtag, sd->map);
 2509                                 sd->map = map;
 2510                                 sd->tag_idx = fl->tag_idx;
 2511                         }
 2512                 }
 2513 
 2514                 tag = fl->tag[sd->tag_idx];
 2515 
 2516                 cl = m_cljget(NULL, M_NOWAIT, FL_BUF_SIZE(sd->tag_idx));
 2517                 if (cl == NULL)
 2518                         break;
 2519 
 2520                 rc = bus_dmamap_load(tag, sd->map, cl, FL_BUF_SIZE(sd->tag_idx),
 2521                     oneseg_dma_callback, &pa, 0);
 2522                 if (rc != 0 || pa == 0) {
 2523                         fl->dmamap_failed++;
 2524                         uma_zfree(FL_BUF_ZONE(sd->tag_idx), cl);
 2525                         break;
 2526                 }
 2527 
 2528                 sd->cl = cl;
 2529                 *d++ = htobe64(pa | sd->tag_idx);
 2530 
 2531 #ifdef INVARIANTS
 2532                 sd->ba_tag = htobe64(pa | sd->tag_idx);
 2533 #endif
 2534 
 2535 recycled:
 2536                 /* sd->m is never recycled, should always be NULL */
 2537                 KASSERT(sd->m == NULL, ("%s: stray mbuf", __func__));
 2538 
 2539                 sd->m = m_gethdr(M_NOWAIT, MT_NOINIT);
 2540                 if (sd->m == NULL)
 2541                         break;
 2542 
 2543                 fl->pending++;
 2544                 fl->needed--;
 2545                 sd++;
 2546                 if (++fl->pidx == fl->cap) {
 2547                         fl->pidx = 0;
 2548                         sd = fl->sdesc;
 2549                         d = fl->desc;
 2550                 }
 2551         }
 2552 
 2553         if (fl->pending >= 8)
 2554                 ring_fl_db(sc, fl);
 2555 
 2556         return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING));
 2557 }
 2558 
 2559 /*
 2560  * Attempt to refill all starving freelists.
 2561  */
 2562 static void
 2563 refill_sfl(void *arg)
 2564 {
 2565         struct adapter *sc = arg;
 2566         struct sge_fl *fl, *fl_temp;
 2567 
 2568         mtx_lock(&sc->sfl_lock);
 2569         TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) {
 2570                 FL_LOCK(fl);
 2571                 refill_fl(sc, fl, 64);
 2572                 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) {
 2573                         TAILQ_REMOVE(&sc->sfl, fl, link);
 2574                         fl->flags &= ~FL_STARVING;
 2575                 }
 2576                 FL_UNLOCK(fl);
 2577         }
 2578 
 2579         if (!TAILQ_EMPTY(&sc->sfl))
 2580                 callout_schedule(&sc->sfl_callout, hz / 5);
 2581         mtx_unlock(&sc->sfl_lock);
 2582 }
 2583 
 2584 static int
 2585 alloc_fl_sdesc(struct sge_fl *fl)
 2586 {
 2587         struct fl_sdesc *sd;
 2588         bus_dma_tag_t tag;
 2589         int i, rc;
 2590 
 2591         FL_LOCK_ASSERT_OWNED(fl);
 2592 
 2593         fl->sdesc = malloc(fl->cap * sizeof(struct fl_sdesc), M_CXGBE,
 2594             M_ZERO | M_WAITOK);
 2595 
 2596         tag = fl->tag[fl->tag_idx];
 2597         sd = fl->sdesc;
 2598         for (i = 0; i < fl->cap; i++, sd++) {
 2599 
 2600                 sd->tag_idx = fl->tag_idx;
 2601                 rc = bus_dmamap_create(tag, 0, &sd->map);
 2602                 if (rc != 0)
 2603                         goto failed;
 2604         }
 2605 
 2606         return (0);
 2607 failed:
 2608         while (--i >= 0) {
 2609                 sd--;
 2610                 bus_dmamap_destroy(tag, sd->map);
 2611                 if (sd->m) {
 2612                         m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0);
 2613                         m_free(sd->m);
 2614                         sd->m = NULL;
 2615                 }
 2616         }
 2617         KASSERT(sd == fl->sdesc, ("%s: EDOOFUS", __func__));
 2618 
 2619         free(fl->sdesc, M_CXGBE);
 2620         fl->sdesc = NULL;
 2621 
 2622         return (rc);
 2623 }
 2624 
 2625 static void
 2626 free_fl_sdesc(struct sge_fl *fl)
 2627 {
 2628         struct fl_sdesc *sd;
 2629         int i;
 2630 
 2631         FL_LOCK_ASSERT_OWNED(fl);
 2632 
 2633         sd = fl->sdesc;
 2634         for (i = 0; i < fl->cap; i++, sd++) {
 2635 
 2636                 if (sd->m) {
 2637                         m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0);
 2638                         m_free(sd->m);
 2639                         sd->m = NULL;
 2640                 }
 2641 
 2642                 if (sd->cl) {
 2643                         bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
 2644                         uma_zfree(FL_BUF_ZONE(sd->tag_idx), sd->cl);
 2645                         sd->cl = NULL;
 2646                 }
 2647 
 2648                 bus_dmamap_destroy(fl->tag[sd->tag_idx], sd->map);
 2649         }
 2650 
 2651         free(fl->sdesc, M_CXGBE);
 2652         fl->sdesc = NULL;
 2653 }
 2654 
 2655 int
 2656 t4_alloc_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag, int count,
 2657     int flags)
 2658 {
 2659         struct tx_map *txm;
 2660         int i, rc;
 2661 
 2662         txmaps->map_total = txmaps->map_avail = count;
 2663         txmaps->map_cidx = txmaps->map_pidx = 0;
 2664 
 2665         txmaps->maps = malloc(count * sizeof(struct tx_map), M_CXGBE,
 2666             M_ZERO | flags);
 2667 
 2668         txm = txmaps->maps;
 2669         for (i = 0; i < count; i++, txm++) {
 2670                 rc = bus_dmamap_create(tx_tag, 0, &txm->map);
 2671                 if (rc != 0)
 2672                         goto failed;
 2673         }
 2674 
 2675         return (0);
 2676 failed:
 2677         while (--i >= 0) {
 2678                 txm--;
 2679                 bus_dmamap_destroy(tx_tag, txm->map);
 2680         }
 2681         KASSERT(txm == txmaps->maps, ("%s: EDOOFUS", __func__));
 2682 
 2683         free(txmaps->maps, M_CXGBE);
 2684         txmaps->maps = NULL;
 2685 
 2686         return (rc);
 2687 }
 2688 
 2689 void
 2690 t4_free_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag)
 2691 {
 2692         struct tx_map *txm;
 2693         int i;
 2694 
 2695         txm = txmaps->maps;
 2696         for (i = 0; i < txmaps->map_total; i++, txm++) {
 2697 
 2698                 if (txm->m) {
 2699                         bus_dmamap_unload(tx_tag, txm->map);
 2700                         m_freem(txm->m);
 2701                         txm->m = NULL;
 2702                 }
 2703 
 2704                 bus_dmamap_destroy(tx_tag, txm->map);
 2705         }
 2706 
 2707         free(txmaps->maps, M_CXGBE);
 2708         txmaps->maps = NULL;
 2709 }
 2710 
 2711 /*
 2712  * We'll do immediate data tx for non-TSO, but only when not coalescing.  We're
 2713  * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes
 2714  * of immediate data.
 2715  */
 2716 #define IMM_LEN ( \
 2717       2 * EQ_ESIZE \
 2718     - sizeof(struct fw_eth_tx_pkt_wr) \
 2719     - sizeof(struct cpl_tx_pkt_core))
 2720 
 2721 /*
 2722  * Returns non-zero on failure, no need to cleanup anything in that case.
 2723  *
 2724  * Note 1: We always try to defrag the mbuf if required and return EFBIG only
 2725  * if the resulting chain still won't fit in a tx descriptor.
 2726  *
 2727  * Note 2: We'll pullup the mbuf chain if TSO is requested and the first mbuf
 2728  * does not have the TCP header in it.
 2729  */
 2730 static int
 2731 get_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl,
 2732     int sgl_only)
 2733 {
 2734         struct mbuf *m = *fp;
 2735         struct tx_maps *txmaps;
 2736         struct tx_map *txm;
 2737         int rc, defragged = 0, n;
 2738 
 2739         TXQ_LOCK_ASSERT_OWNED(txq);
 2740 
 2741         if (m->m_pkthdr.tso_segsz)
 2742                 sgl_only = 1;   /* Do not allow immediate data with LSO */
 2743 
 2744 start:  sgl->nsegs = 0;
 2745 
 2746         if (m->m_pkthdr.len <= IMM_LEN && !sgl_only)
 2747                 return (0);     /* nsegs = 0 tells caller to use imm. tx */
 2748 
 2749         txmaps = &txq->txmaps;
 2750         if (txmaps->map_avail == 0) {
 2751                 txq->no_dmamap++;
 2752                 return (ENOMEM);
 2753         }
 2754         txm = &txmaps->maps[txmaps->map_pidx];
 2755 
 2756         if (m->m_pkthdr.tso_segsz && m->m_len < 50) {
 2757                 *fp = m_pullup(m, 50);
 2758                 m = *fp;
 2759                 if (m == NULL)
 2760                         return (ENOBUFS);
 2761         }
 2762 
 2763         rc = bus_dmamap_load_mbuf_sg(txq->tx_tag, txm->map, m, sgl->seg,
 2764             &sgl->nsegs, BUS_DMA_NOWAIT);
 2765         if (rc == EFBIG && defragged == 0) {
 2766                 m = m_defrag(m, M_DONTWAIT);
 2767                 if (m == NULL)
 2768                         return (EFBIG);
 2769 
 2770                 defragged = 1;
 2771                 *fp = m;
 2772                 goto start;
 2773         }
 2774         if (rc != 0)
 2775                 return (rc);
 2776 
 2777         txm->m = m;
 2778         txmaps->map_avail--;
 2779         if (++txmaps->map_pidx == txmaps->map_total)
 2780                 txmaps->map_pidx = 0;
 2781 
 2782         KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS,
 2783             ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs));
 2784 
 2785         /*
 2786          * Store the # of flits required to hold this frame's SGL in nflits.  An
 2787          * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by
 2788          * multiple (len0 + len1, addr0, addr1) tuples.  If addr1 is not used
 2789          * then len1 must be set to 0.
 2790          */
 2791         n = sgl->nsegs - 1;
 2792         sgl->nflits = (3 * n) / 2 + (n & 1) + 2;
 2793 
 2794         return (0);
 2795 }
 2796 
 2797 
 2798 /*
 2799  * Releases all the txq resources used up in the specified sgl.
 2800  */
 2801 static int
 2802 free_pkt_sgl(struct sge_txq *txq, struct sgl *sgl)
 2803 {
 2804         struct tx_maps *txmaps;
 2805         struct tx_map *txm;
 2806 
 2807         TXQ_LOCK_ASSERT_OWNED(txq);
 2808 
 2809         if (sgl->nsegs == 0)
 2810                 return (0);     /* didn't use any map */
 2811 
 2812         txmaps = &txq->txmaps;
 2813 
 2814         /* 1 pkt uses exactly 1 map, back it out */
 2815 
 2816         txmaps->map_avail++;
 2817         if (txmaps->map_pidx > 0)
 2818                 txmaps->map_pidx--;
 2819         else
 2820                 txmaps->map_pidx = txmaps->map_total - 1;
 2821 
 2822         txm = &txmaps->maps[txmaps->map_pidx];
 2823         bus_dmamap_unload(txq->tx_tag, txm->map);
 2824         txm->m = NULL;
 2825 
 2826         return (0);
 2827 }
 2828 
 2829 static int
 2830 write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m,
 2831     struct sgl *sgl)
 2832 {
 2833         struct sge_eq *eq = &txq->eq;
 2834         struct fw_eth_tx_pkt_wr *wr;
 2835         struct cpl_tx_pkt_core *cpl;
 2836         uint32_t ctrl;  /* used in many unrelated places */
 2837         uint64_t ctrl1;
 2838         int nflits, ndesc, pktlen;
 2839         struct tx_sdesc *txsd;
 2840         caddr_t dst;
 2841 
 2842         TXQ_LOCK_ASSERT_OWNED(txq);
 2843 
 2844         pktlen = m->m_pkthdr.len;
 2845 
 2846         /*
 2847          * Do we have enough flits to send this frame out?
 2848          */
 2849         ctrl = sizeof(struct cpl_tx_pkt_core);
 2850         if (m->m_pkthdr.tso_segsz) {
 2851                 nflits = TXPKT_LSO_WR_HDR;
 2852                 ctrl += sizeof(struct cpl_tx_pkt_lso_core);
 2853         } else
 2854                 nflits = TXPKT_WR_HDR;
 2855         if (sgl->nsegs > 0)
 2856                 nflits += sgl->nflits;
 2857         else {
 2858                 nflits += howmany(pktlen, 8);
 2859                 ctrl += pktlen;
 2860         }
 2861         ndesc = howmany(nflits, 8);
 2862         if (ndesc > eq->avail)
 2863                 return (ENOMEM);
 2864 
 2865         /* Firmware work request header */
 2866         wr = (void *)&eq->desc[eq->pidx];
 2867         wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
 2868             V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
 2869         ctrl = V_FW_WR_LEN16(howmany(nflits, 2));
 2870         if (eq->avail == ndesc) {
 2871                 if (!(eq->flags & EQ_CRFLUSHED)) {
 2872                         ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
 2873                         eq->flags |= EQ_CRFLUSHED;
 2874                 }
 2875                 eq->flags |= EQ_STALLED;
 2876         }
 2877 
 2878         wr->equiq_to_len16 = htobe32(ctrl);
 2879         wr->r3 = 0;
 2880 
 2881         if (m->m_pkthdr.tso_segsz) {
 2882                 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
 2883                 struct ether_header *eh;
 2884                 void *l3hdr;
 2885 #if defined(INET) || defined(INET6)
 2886                 struct tcphdr *tcp;
 2887 #endif
 2888                 uint16_t eh_type;
 2889 
 2890                 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
 2891                     F_LSO_LAST_SLICE;
 2892 
 2893                 eh = mtod(m, struct ether_header *);
 2894                 eh_type = ntohs(eh->ether_type);
 2895                 if (eh_type == ETHERTYPE_VLAN) {
 2896                         struct ether_vlan_header *evh = (void *)eh;
 2897 
 2898                         ctrl |= V_LSO_ETHHDR_LEN(1);
 2899                         l3hdr = evh + 1;
 2900                         eh_type = ntohs(evh->evl_proto);
 2901                 } else
 2902                         l3hdr = eh + 1;
 2903 
 2904                 switch (eh_type) {
 2905 #ifdef INET6
 2906                 case ETHERTYPE_IPV6:
 2907                 {
 2908                         struct ip6_hdr *ip6 = l3hdr;
 2909 
 2910                         /*
 2911                          * XXX-BZ For now we do not pretend to support
 2912                          * IPv6 extension headers.
 2913                          */
 2914                         KASSERT(ip6->ip6_nxt == IPPROTO_TCP, ("%s: CSUM_TSO "
 2915                             "with ip6_nxt != TCP: %u", __func__, ip6->ip6_nxt));
 2916                         tcp = (struct tcphdr *)(ip6 + 1);
 2917                         ctrl |= F_LSO_IPV6;
 2918                         ctrl |= V_LSO_IPHDR_LEN(sizeof(*ip6) >> 2) |
 2919                             V_LSO_TCPHDR_LEN(tcp->th_off);
 2920                         break;
 2921                 }
 2922 #endif
 2923 #ifdef INET
 2924                 case ETHERTYPE_IP:
 2925                 {
 2926                         struct ip *ip = l3hdr;
 2927 
 2928                         tcp = (void *)((uintptr_t)ip + ip->ip_hl * 4);
 2929                         ctrl |= V_LSO_IPHDR_LEN(ip->ip_hl) |
 2930                             V_LSO_TCPHDR_LEN(tcp->th_off);
 2931                         break;
 2932                 }
 2933 #endif
 2934                 default:
 2935                         panic("%s: CSUM_TSO but no supported IP version "
 2936                             "(0x%04x)", __func__, eh_type);
 2937                 }
 2938 
 2939                 lso->lso_ctrl = htobe32(ctrl);
 2940                 lso->ipid_ofst = htobe16(0);
 2941                 lso->mss = htobe16(m->m_pkthdr.tso_segsz);
 2942                 lso->seqno_offset = htobe32(0);
 2943                 lso->len = htobe32(pktlen);
 2944 
 2945                 cpl = (void *)(lso + 1);
 2946 
 2947                 txq->tso_wrs++;
 2948         } else
 2949                 cpl = (void *)(wr + 1);
 2950 
 2951         /* Checksum offload */
 2952         ctrl1 = 0;
 2953         if (!(m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)))
 2954                 ctrl1 |= F_TXPKT_IPCSUM_DIS;
 2955         if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
 2956             CSUM_TCP_IPV6 | CSUM_TSO)))
 2957                 ctrl1 |= F_TXPKT_L4CSUM_DIS;
 2958         if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
 2959             CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
 2960                 txq->txcsum++;  /* some hardware assistance provided */
 2961 
 2962         /* VLAN tag insertion */
 2963         if (m->m_flags & M_VLANTAG) {
 2964                 ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
 2965                 txq->vlan_insertion++;
 2966         }
 2967 
 2968         /* CPL header */
 2969         cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
 2970             V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
 2971         cpl->pack = 0;
 2972         cpl->len = htobe16(pktlen);
 2973         cpl->ctrl1 = htobe64(ctrl1);
 2974 
 2975         /* Software descriptor */
 2976         txsd = &txq->sdesc[eq->pidx];
 2977         txsd->desc_used = ndesc;
 2978 
 2979         eq->pending += ndesc;
 2980         eq->avail -= ndesc;
 2981         eq->pidx += ndesc;
 2982         if (eq->pidx >= eq->cap)
 2983                 eq->pidx -= eq->cap;
 2984 
 2985         /* SGL */
 2986         dst = (void *)(cpl + 1);
 2987         if (sgl->nsegs > 0) {
 2988                 txsd->credits = 1;
 2989                 txq->sgl_wrs++;
 2990                 write_sgl_to_txd(eq, sgl, &dst);
 2991         } else {
 2992                 txsd->credits = 0;
 2993                 txq->imm_wrs++;
 2994                 for (; m; m = m->m_next) {
 2995                         copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
 2996 #ifdef INVARIANTS
 2997                         pktlen -= m->m_len;
 2998 #endif
 2999                 }
 3000 #ifdef INVARIANTS
 3001                 KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen));
 3002 #endif
 3003 
 3004         }
 3005 
 3006         txq->txpkt_wrs++;
 3007         return (0);
 3008 }
 3009 
 3010 /*
 3011  * Returns 0 to indicate that m has been accepted into a coalesced tx work
 3012  * request.  It has either been folded into txpkts or txpkts was flushed and m
 3013  * has started a new coalesced work request (as the first frame in a fresh
 3014  * txpkts).
 3015  *
 3016  * Returns non-zero to indicate a failure - caller is responsible for
 3017  * transmitting m, if there was anything in txpkts it has been flushed.
 3018  */
 3019 static int
 3020 add_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts,
 3021     struct mbuf *m, struct sgl *sgl)
 3022 {
 3023         struct sge_eq *eq = &txq->eq;
 3024         int can_coalesce;
 3025         struct tx_sdesc *txsd;
 3026         int flits;
 3027 
 3028         TXQ_LOCK_ASSERT_OWNED(txq);
 3029 
 3030         KASSERT(sgl->nsegs, ("%s: can't coalesce imm data", __func__));
 3031 
 3032         if (txpkts->npkt > 0) {
 3033                 flits = TXPKTS_PKT_HDR + sgl->nflits;
 3034                 can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
 3035                     txpkts->nflits + flits <= TX_WR_FLITS &&
 3036                     txpkts->nflits + flits <= eq->avail * 8 &&
 3037                     txpkts->plen + m->m_pkthdr.len < 65536;
 3038 
 3039                 if (can_coalesce) {
 3040                         txpkts->npkt++;
 3041                         txpkts->nflits += flits;
 3042                         txpkts->plen += m->m_pkthdr.len;
 3043 
 3044                         txsd = &txq->sdesc[eq->pidx];
 3045                         txsd->credits++;
 3046 
 3047                         return (0);
 3048                 }
 3049 
 3050                 /*
 3051                  * Couldn't coalesce m into txpkts.  The first order of business
 3052                  * is to send txpkts on its way.  Then we'll revisit m.
 3053                  */
 3054                 write_txpkts_wr(txq, txpkts);
 3055         }
 3056 
 3057         /*
 3058          * Check if we can start a new coalesced tx work request with m as
 3059          * the first packet in it.
 3060          */
 3061 
 3062         KASSERT(txpkts->npkt == 0, ("%s: txpkts not empty", __func__));
 3063 
 3064         flits = TXPKTS_WR_HDR + sgl->nflits;
 3065         can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
 3066             flits <= eq->avail * 8 && flits <= TX_WR_FLITS;
 3067 
 3068         if (can_coalesce == 0)
 3069                 return (EINVAL);
 3070 
 3071         /*
 3072          * Start a fresh coalesced tx WR with m as the first frame in it.
 3073          */
 3074         txpkts->npkt = 1;
 3075         txpkts->nflits = flits;
 3076         txpkts->flitp = &eq->desc[eq->pidx].flit[2];
 3077         txpkts->plen = m->m_pkthdr.len;
 3078 
 3079         txsd = &txq->sdesc[eq->pidx];
 3080         txsd->credits = 1;
 3081 
 3082         return (0);
 3083 }
 3084 
 3085 /*
 3086  * Note that write_txpkts_wr can never run out of hardware descriptors (but
 3087  * write_txpkt_wr can).  add_to_txpkts ensures that a frame is accepted for
 3088  * coalescing only if sufficient hardware descriptors are available.
 3089  */
 3090 static void
 3091 write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts)
 3092 {
 3093         struct sge_eq *eq = &txq->eq;
 3094         struct fw_eth_tx_pkts_wr *wr;
 3095         struct tx_sdesc *txsd;
 3096         uint32_t ctrl;
 3097         int ndesc;
 3098 
 3099         TXQ_LOCK_ASSERT_OWNED(txq);
 3100 
 3101         ndesc = howmany(txpkts->nflits, 8);
 3102 
 3103         wr = (void *)&eq->desc[eq->pidx];
 3104         wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR));
 3105         ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2));
 3106         if (eq->avail == ndesc) {
 3107                 if (!(eq->flags & EQ_CRFLUSHED)) {
 3108                         ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
 3109                         eq->flags |= EQ_CRFLUSHED;
 3110                 }
 3111                 eq->flags |= EQ_STALLED;
 3112         }
 3113         wr->equiq_to_len16 = htobe32(ctrl);
 3114         wr->plen = htobe16(txpkts->plen);
 3115         wr->npkt = txpkts->npkt;
 3116         wr->r3 = wr->type = 0;
 3117 
 3118         /* Everything else already written */
 3119 
 3120         txsd = &txq->sdesc[eq->pidx];
 3121         txsd->desc_used = ndesc;
 3122 
 3123         KASSERT(eq->avail >= ndesc, ("%s: out of descriptors", __func__));
 3124 
 3125         eq->pending += ndesc;
 3126         eq->avail -= ndesc;
 3127         eq->pidx += ndesc;
 3128         if (eq->pidx >= eq->cap)
 3129                 eq->pidx -= eq->cap;
 3130 
 3131         txq->txpkts_pkts += txpkts->npkt;
 3132         txq->txpkts_wrs++;
 3133         txpkts->npkt = 0;       /* emptied */
 3134 }
 3135 
 3136 static inline void
 3137 write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq,
 3138     struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl)
 3139 {
 3140         struct ulp_txpkt *ulpmc;
 3141         struct ulptx_idata *ulpsc;
 3142         struct cpl_tx_pkt_core *cpl;
 3143         struct sge_eq *eq = &txq->eq;
 3144         uintptr_t flitp, start, end;
 3145         uint64_t ctrl;
 3146         caddr_t dst;
 3147 
 3148         KASSERT(txpkts->npkt > 0, ("%s: txpkts is empty", __func__));
 3149 
 3150         start = (uintptr_t)eq->desc;
 3151         end = (uintptr_t)eq->spg;
 3152 
 3153         /* Checksum offload */
 3154         ctrl = 0;
 3155         if (!(m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)))
 3156                 ctrl |= F_TXPKT_IPCSUM_DIS;
 3157         if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
 3158             CSUM_TCP_IPV6 | CSUM_TSO)))
 3159                 ctrl |= F_TXPKT_L4CSUM_DIS;
 3160         if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
 3161             CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
 3162                 txq->txcsum++;  /* some hardware assistance provided */
 3163 
 3164         /* VLAN tag insertion */
 3165         if (m->m_flags & M_VLANTAG) {
 3166                 ctrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
 3167                 txq->vlan_insertion++;
 3168         }
 3169 
 3170         /*
 3171          * The previous packet's SGL must have ended at a 16 byte boundary (this
 3172          * is required by the firmware/hardware).  It follows that flitp cannot
 3173          * wrap around between the ULPTX master command and ULPTX subcommand (8
 3174          * bytes each), and that it can not wrap around in the middle of the
 3175          * cpl_tx_pkt_core either.
 3176          */
 3177         flitp = (uintptr_t)txpkts->flitp;
 3178         KASSERT((flitp & 0xf) == 0,
 3179             ("%s: last SGL did not end at 16 byte boundary: %p",
 3180             __func__, txpkts->flitp));
 3181 
 3182         /* ULP master command */
 3183         ulpmc = (void *)flitp;
 3184         ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) |
 3185             V_ULP_TXPKT_FID(eq->iqid));
 3186         ulpmc->len = htonl(howmany(sizeof(*ulpmc) + sizeof(*ulpsc) +
 3187             sizeof(*cpl) + 8 * sgl->nflits, 16));
 3188 
 3189         /* ULP subcommand */
 3190         ulpsc = (void *)(ulpmc + 1);
 3191         ulpsc->cmd_more = htobe32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) |
 3192             F_ULP_TX_SC_MORE);
 3193         ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core));
 3194 
 3195         flitp += sizeof(*ulpmc) + sizeof(*ulpsc);
 3196         if (flitp == end)
 3197                 flitp = start;
 3198 
 3199         /* CPL_TX_PKT */
 3200         cpl = (void *)flitp;
 3201         cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
 3202             V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
 3203         cpl->pack = 0;
 3204         cpl->len = htobe16(m->m_pkthdr.len);
 3205         cpl->ctrl1 = htobe64(ctrl);
 3206 
 3207         flitp += sizeof(*cpl);
 3208         if (flitp == end)
 3209                 flitp = start;
 3210 
 3211         /* SGL for this frame */
 3212         dst = (caddr_t)flitp;
 3213         txpkts->nflits += write_sgl_to_txd(eq, sgl, &dst);
 3214         txpkts->flitp = (void *)dst;
 3215 
 3216         KASSERT(((uintptr_t)dst & 0xf) == 0,
 3217             ("%s: SGL ends at %p (not a 16 byte boundary)", __func__, dst));
 3218 }
 3219 
 3220 /*
 3221  * If the SGL ends on an address that is not 16 byte aligned, this function will
 3222  * add a 0 filled flit at the end.  It returns 1 in that case.
 3223  */
 3224 static int
 3225 write_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to)
 3226 {
 3227         __be64 *flitp, *end;
 3228         struct ulptx_sgl *usgl;
 3229         bus_dma_segment_t *seg;
 3230         int i, padded;
 3231 
 3232         KASSERT(sgl->nsegs > 0 && sgl->nflits > 0,
 3233             ("%s: bad SGL - nsegs=%d, nflits=%d",
 3234             __func__, sgl->nsegs, sgl->nflits));
 3235 
 3236         KASSERT(((uintptr_t)(*to) & 0xf) == 0,
 3237             ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to));
 3238 
 3239         flitp = (__be64 *)(*to);
 3240         end = flitp + sgl->nflits;
 3241         seg = &sgl->seg[0];
 3242         usgl = (void *)flitp;
 3243 
 3244         /*
 3245          * We start at a 16 byte boundary somewhere inside the tx descriptor
 3246          * ring, so we're at least 16 bytes away from the status page.  There is
 3247          * no chance of a wrap around in the middle of usgl (which is 16 bytes).
 3248          */
 3249 
 3250         usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
 3251             V_ULPTX_NSGE(sgl->nsegs));
 3252         usgl->len0 = htobe32(seg->ds_len);
 3253         usgl->addr0 = htobe64(seg->ds_addr);
 3254         seg++;
 3255 
 3256         if ((uintptr_t)end <= (uintptr_t)eq->spg) {
 3257 
 3258                 /* Won't wrap around at all */
 3259 
 3260                 for (i = 0; i < sgl->nsegs - 1; i++, seg++) {
 3261                         usgl->sge[i / 2].len[i & 1] = htobe32(seg->ds_len);
 3262                         usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ds_addr);
 3263                 }
 3264                 if (i & 1)
 3265                         usgl->sge[i / 2].len[1] = htobe32(0);
 3266         } else {
 3267 
 3268                 /* Will wrap somewhere in the rest of the SGL */
 3269 
 3270                 /* 2 flits already written, write the rest flit by flit */
 3271                 flitp = (void *)(usgl + 1);
 3272                 for (i = 0; i < sgl->nflits - 2; i++) {
 3273                         if ((uintptr_t)flitp == (uintptr_t)eq->spg)
 3274                                 flitp = (void *)eq->desc;
 3275                         *flitp++ = get_flit(seg, sgl->nsegs - 1, i);
 3276                 }
 3277                 end = flitp;
 3278         }
 3279 
 3280         if ((uintptr_t)end & 0xf) {
 3281                 *(uint64_t *)end = 0;
 3282                 end++;
 3283                 padded = 1;
 3284         } else
 3285                 padded = 0;
 3286 
 3287         if ((uintptr_t)end == (uintptr_t)eq->spg)
 3288                 *to = (void *)eq->desc;
 3289         else
 3290                 *to = (void *)end;
 3291 
 3292         return (padded);
 3293 }
 3294 
 3295 static inline void
 3296 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
 3297 {
 3298         if (__predict_true((uintptr_t)(*to) + len <= (uintptr_t)eq->spg)) {
 3299                 bcopy(from, *to, len);
 3300                 (*to) += len;
 3301         } else {
 3302                 int portion = (uintptr_t)eq->spg - (uintptr_t)(*to);
 3303 
 3304                 bcopy(from, *to, portion);
 3305                 from += portion;
 3306                 portion = len - portion;        /* remaining */
 3307                 bcopy(from, (void *)eq->desc, portion);
 3308                 (*to) = (caddr_t)eq->desc + portion;
 3309         }
 3310 }
 3311 
 3312 static inline void
 3313 ring_eq_db(struct adapter *sc, struct sge_eq *eq)
 3314 {
 3315         wmb();
 3316         t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
 3317             V_QID(eq->cntxt_id) | V_PIDX(eq->pending));
 3318         eq->pending = 0;
 3319 }
 3320 
 3321 static inline int
 3322 reclaimable(struct sge_eq *eq)
 3323 {
 3324         unsigned int cidx;
 3325 
 3326         cidx = eq->spg->cidx;   /* stable snapshot */
 3327         cidx = be16toh(cidx);
 3328 
 3329         if (cidx >= eq->cidx)
 3330                 return (cidx - eq->cidx);
 3331         else
 3332                 return (cidx + eq->cap - eq->cidx);
 3333 }
 3334 
 3335 /*
 3336  * There are "can_reclaim" tx descriptors ready to be reclaimed.  Reclaim as
 3337  * many as possible but stop when there are around "n" mbufs to free.
 3338  *
 3339  * The actual number reclaimed is provided as the return value.
 3340  */
 3341 static int
 3342 reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n)
 3343 {
 3344         struct tx_sdesc *txsd;
 3345         struct tx_maps *txmaps;
 3346         struct tx_map *txm;
 3347         unsigned int reclaimed, maps;
 3348         struct sge_eq *eq = &txq->eq;
 3349 
 3350         TXQ_LOCK_ASSERT_OWNED(txq);
 3351 
 3352         if (can_reclaim == 0)
 3353                 can_reclaim = reclaimable(eq);
 3354 
 3355         maps = reclaimed = 0;
 3356         while (can_reclaim && maps < n) {
 3357                 int ndesc;
 3358 
 3359                 txsd = &txq->sdesc[eq->cidx];
 3360                 ndesc = txsd->desc_used;
 3361 
 3362                 /* Firmware doesn't return "partial" credits. */
 3363                 KASSERT(can_reclaim >= ndesc,
 3364                     ("%s: unexpected number of credits: %d, %d",
 3365                     __func__, can_reclaim, ndesc));
 3366 
 3367                 maps += txsd->credits;
 3368 
 3369                 reclaimed += ndesc;
 3370                 can_reclaim -= ndesc;
 3371 
 3372                 eq->cidx += ndesc;
 3373                 if (__predict_false(eq->cidx >= eq->cap))
 3374                         eq->cidx -= eq->cap;
 3375         }
 3376 
 3377         txmaps = &txq->txmaps;
 3378         txm = &txmaps->maps[txmaps->map_cidx];
 3379         if (maps)
 3380                 prefetch(txm->m);
 3381 
 3382         eq->avail += reclaimed;
 3383         KASSERT(eq->avail < eq->cap,    /* avail tops out at (cap - 1) */
 3384             ("%s: too many descriptors available", __func__));
 3385 
 3386         txmaps->map_avail += maps;
 3387         KASSERT(txmaps->map_avail <= txmaps->map_total,
 3388             ("%s: too many maps available", __func__));
 3389 
 3390         while (maps--) {
 3391                 struct tx_map *next;
 3392 
 3393                 next = txm + 1;
 3394                 if (__predict_false(txmaps->map_cidx + 1 == txmaps->map_total))
 3395                         next = txmaps->maps;
 3396                 prefetch(next->m);
 3397 
 3398                 bus_dmamap_unload(txq->tx_tag, txm->map);
 3399                 m_freem(txm->m);
 3400                 txm->m = NULL;
 3401 
 3402                 txm = next;
 3403                 if (__predict_false(++txmaps->map_cidx == txmaps->map_total))
 3404                         txmaps->map_cidx = 0;
 3405         }
 3406 
 3407         return (reclaimed);
 3408 }
 3409 
 3410 static void
 3411 write_eqflush_wr(struct sge_eq *eq)
 3412 {
 3413         struct fw_eq_flush_wr *wr;
 3414 
 3415         EQ_LOCK_ASSERT_OWNED(eq);
 3416         KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__));
 3417         KASSERT(!(eq->flags & EQ_CRFLUSHED), ("%s: flushed already", __func__));
 3418 
 3419         wr = (void *)&eq->desc[eq->pidx];
 3420         bzero(wr, sizeof(*wr));
 3421         wr->opcode = FW_EQ_FLUSH_WR;
 3422         wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) |
 3423             F_FW_WR_EQUEQ | F_FW_WR_EQUIQ);
 3424 
 3425         eq->flags |= (EQ_CRFLUSHED | EQ_STALLED);
 3426         eq->pending++;
 3427         eq->avail--;
 3428         if (++eq->pidx == eq->cap)
 3429                 eq->pidx = 0; 
 3430 }
 3431 
 3432 static __be64
 3433 get_flit(bus_dma_segment_t *sgl, int nsegs, int idx)
 3434 {
 3435         int i = (idx / 3) * 2;
 3436 
 3437         switch (idx % 3) {
 3438         case 0: {
 3439                 __be64 rc;
 3440 
 3441                 rc = htobe32(sgl[i].ds_len);
 3442                 if (i + 1 < nsegs)
 3443                         rc |= (uint64_t)htobe32(sgl[i + 1].ds_len) << 32;
 3444 
 3445                 return (rc);
 3446         }
 3447         case 1:
 3448                 return htobe64(sgl[i].ds_addr);
 3449         case 2:
 3450                 return htobe64(sgl[i + 1].ds_addr);
 3451         }
 3452 
 3453         return (0);
 3454 }
 3455 
 3456 static void
 3457 set_fl_tag_idx(struct sge_fl *fl, int bufsize)
 3458 {
 3459         int i;
 3460 
 3461         for (i = 0; i < FL_BUF_SIZES - 1; i++) {
 3462                 if (FL_BUF_SIZE(i) >= bufsize)
 3463                         break;
 3464         }
 3465 
 3466         fl->tag_idx = i;
 3467 }
 3468 
 3469 static void
 3470 add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl)
 3471 {
 3472         mtx_lock(&sc->sfl_lock);
 3473         FL_LOCK(fl);
 3474         if ((fl->flags & FL_DOOMED) == 0) {
 3475                 fl->flags |= FL_STARVING;
 3476                 TAILQ_INSERT_TAIL(&sc->sfl, fl, link);
 3477                 callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc);
 3478         }
 3479         FL_UNLOCK(fl);
 3480         mtx_unlock(&sc->sfl_lock);
 3481 }
 3482 
 3483 static int
 3484 handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss,
 3485     struct mbuf *m)
 3486 {
 3487         const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1);
 3488         unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid));
 3489         struct adapter *sc = iq->adapter;
 3490         struct sge *s = &sc->sge;
 3491         struct sge_eq *eq;
 3492 
 3493         KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
 3494             rss->opcode));
 3495 
 3496         eq = s->eqmap[qid - s->eq_start];
 3497         EQ_LOCK(eq);
 3498         KASSERT(eq->flags & EQ_CRFLUSHED,
 3499             ("%s: unsolicited egress update", __func__));
 3500         eq->flags &= ~EQ_CRFLUSHED;
 3501         eq->egr_update++;
 3502 
 3503         if (__predict_false(eq->flags & EQ_DOOMED))
 3504                 wakeup_one(eq);
 3505         else if (eq->flags & EQ_STALLED && can_resume_tx(eq))
 3506                 taskqueue_enqueue(sc->tq[eq->tx_chan], &eq->tx_task);
 3507         EQ_UNLOCK(eq);
 3508 
 3509         return (0);
 3510 }
 3511 
 3512 /* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */
 3513 CTASSERT(offsetof(struct cpl_fw4_msg, data) == \
 3514     offsetof(struct cpl_fw6_msg, data));
 3515 
 3516 static int
 3517 handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 3518 {
 3519         struct adapter *sc = iq->adapter;
 3520         const struct cpl_fw6_msg *cpl = (const void *)(rss + 1);
 3521 
 3522         KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
 3523             rss->opcode));
 3524 
 3525         if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) {
 3526                 const struct rss_header *rss2;
 3527 
 3528                 rss2 = (const struct rss_header *)&cpl->data[0];
 3529                 return (sc->cpl_handler[rss2->opcode](iq, rss2, m));
 3530         }
 3531 
 3532         return (sc->fw_msg_handler[cpl->type](sc, &cpl->data[0]));
 3533 }
 3534 
 3535 static int
 3536 sysctl_uint16(SYSCTL_HANDLER_ARGS)
 3537 {
 3538         uint16_t *id = arg1;
 3539         int i = *id;
 3540 
 3541         return sysctl_handle_int(oidp, &i, 0, req);
 3542 }

Cache object: d79ca2a035ec3409fe85cb9ddbe5f2d0


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.