The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/cxgb/cxgb_sge.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /**************************************************************************
    2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3 
    4 Copyright (c) 2007-2009, Chelsio Inc.
    5 All rights reserved.
    6 
    7 Redistribution and use in source and binary forms, with or without
    8 modification, are permitted provided that the following conditions are met:
    9 
   10  1. Redistributions of source code must retain the above copyright notice,
   11     this list of conditions and the following disclaimer.
   12 
   13  2. Neither the name of the Chelsio Corporation nor the names of its
   14     contributors may be used to endorse or promote products derived from
   15     this software without specific prior written permission.
   16  
   17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   27 POSSIBILITY OF SUCH DAMAGE.
   28 
   29 ***************************************************************************/
   30 
   31 #include <sys/cdefs.h>
   32 __FBSDID("$FreeBSD: releng/12.0/sys/dev/cxgb/cxgb_sge.c 333288 2018-05-06 00:48:43Z markj $");
   33 
   34 #include "opt_inet6.h"
   35 #include "opt_inet.h"
   36 
   37 #include <sys/param.h>
   38 #include <sys/systm.h>
   39 #include <sys/kernel.h>
   40 #include <sys/module.h>
   41 #include <sys/bus.h>
   42 #include <sys/conf.h>
   43 #include <machine/bus.h>
   44 #include <machine/resource.h>
   45 #include <sys/rman.h>
   46 #include <sys/queue.h>
   47 #include <sys/sysctl.h>
   48 #include <sys/taskqueue.h>
   49 
   50 #include <sys/proc.h>
   51 #include <sys/sbuf.h>
   52 #include <sys/sched.h>
   53 #include <sys/smp.h>
   54 #include <sys/systm.h>
   55 #include <sys/syslog.h>
   56 #include <sys/socket.h>
   57 #include <sys/sglist.h>
   58 
   59 #include <net/if.h>
   60 #include <net/if_var.h>
   61 #include <net/bpf.h>    
   62 #include <net/ethernet.h>
   63 #include <net/if_vlan_var.h>
   64 
   65 #include <netinet/in_systm.h>
   66 #include <netinet/in.h>
   67 #include <netinet/ip.h>
   68 #include <netinet/ip6.h>
   69 #include <netinet/tcp.h>
   70 
   71 #include <dev/pci/pcireg.h>
   72 #include <dev/pci/pcivar.h>
   73 
   74 #include <vm/vm.h>
   75 #include <vm/pmap.h>
   76 
   77 #include <cxgb_include.h>
   78 #include <sys/mvec.h>
   79 
   80 int     txq_fills = 0;
   81 int     multiq_tx_enable = 1;
   82 
   83 #ifdef TCP_OFFLOAD
   84 CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS);
   85 #endif
   86 
   87 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
   88 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
   89 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
   90     "size of per-queue mbuf ring");
   91 
   92 static int cxgb_tx_coalesce_force = 0;
   93 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RWTUN,
   94     &cxgb_tx_coalesce_force, 0,
   95     "coalesce small packets into a single work request regardless of ring state");
   96 
   97 #define COALESCE_START_DEFAULT          TX_ETH_Q_SIZE>>1
   98 #define COALESCE_START_MAX              (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
   99 #define COALESCE_STOP_DEFAULT           TX_ETH_Q_SIZE>>2
  100 #define COALESCE_STOP_MIN               TX_ETH_Q_SIZE>>5
  101 #define TX_RECLAIM_DEFAULT              TX_ETH_Q_SIZE>>5
  102 #define TX_RECLAIM_MAX                  TX_ETH_Q_SIZE>>2
  103 #define TX_RECLAIM_MIN                  TX_ETH_Q_SIZE>>6
  104 
  105 
  106 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
  107 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RWTUN,
  108     &cxgb_tx_coalesce_enable_start, 0,
  109     "coalesce enable threshold");
  110 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
  111 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RWTUN,
  112     &cxgb_tx_coalesce_enable_stop, 0,
  113     "coalesce disable threshold");
  114 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  115 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RWTUN,
  116     &cxgb_tx_reclaim_threshold, 0,
  117     "tx cleaning minimum threshold");
  118 
  119 /*
  120  * XXX don't re-enable this until TOE stops assuming
  121  * we have an m_ext
  122  */
  123 static int recycle_enable = 0;
  124 
  125 extern int cxgb_use_16k_clusters;
  126 extern int nmbjumbop;
  127 extern int nmbjumbo9;
  128 extern int nmbjumbo16;
  129 
  130 #define USE_GTS 0
  131 
  132 #define SGE_RX_SM_BUF_SIZE      1536
  133 #define SGE_RX_DROP_THRES       16
  134 #define SGE_RX_COPY_THRES       128
  135 
  136 /*
  137  * Period of the Tx buffer reclaim timer.  This timer does not need to run
  138  * frequently as Tx buffers are usually reclaimed by new Tx packets.
  139  */
  140 #define TX_RECLAIM_PERIOD       (hz >> 1)
  141 
  142 /* 
  143  * Values for sge_txq.flags
  144  */
  145 enum {
  146         TXQ_RUNNING     = 1 << 0,  /* fetch engine is running */
  147         TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
  148 };
  149 
  150 struct tx_desc {
  151         uint64_t        flit[TX_DESC_FLITS];
  152 } __packed;
  153 
  154 struct rx_desc {
  155         uint32_t        addr_lo;
  156         uint32_t        len_gen;
  157         uint32_t        gen2;
  158         uint32_t        addr_hi;
  159 } __packed;
  160 
  161 struct rsp_desc {               /* response queue descriptor */
  162         struct rss_header       rss_hdr;
  163         uint32_t                flags;
  164         uint32_t                len_cq;
  165         uint8_t                 imm_data[47];
  166         uint8_t                 intr_gen;
  167 } __packed;
  168 
  169 #define RX_SW_DESC_MAP_CREATED  (1 << 0)
  170 #define TX_SW_DESC_MAP_CREATED  (1 << 1)
  171 #define RX_SW_DESC_INUSE        (1 << 3)
  172 #define TX_SW_DESC_MAPPED       (1 << 4)
  173 
  174 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
  175 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
  176 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
  177 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
  178 
  179 struct tx_sw_desc {                /* SW state per Tx descriptor */
  180         struct mbuf     *m;
  181         bus_dmamap_t    map;
  182         int             flags;
  183 };
  184 
  185 struct rx_sw_desc {                /* SW state per Rx descriptor */
  186         caddr_t         rxsd_cl;
  187         struct mbuf     *m;
  188         bus_dmamap_t    map;
  189         int             flags;
  190 };
  191 
  192 struct txq_state {
  193         unsigned int    compl;
  194         unsigned int    gen;
  195         unsigned int    pidx;
  196 };
  197 
  198 struct refill_fl_cb_arg {
  199         int               error;
  200         bus_dma_segment_t seg;
  201         int               nseg;
  202 };
  203 
  204 
  205 /*
  206  * Maps a number of flits to the number of Tx descriptors that can hold them.
  207  * The formula is
  208  *
  209  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
  210  *
  211  * HW allows up to 4 descriptors to be combined into a WR.
  212  */
  213 static uint8_t flit_desc_map[] = {
  214         0,
  215 #if SGE_NUM_GENBITS == 1
  216         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  217         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  218         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  219         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
  220 #elif SGE_NUM_GENBITS == 2
  221         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  222         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  223         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  224         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  225 #else
  226 # error "SGE_NUM_GENBITS must be 1 or 2"
  227 #endif
  228 };
  229 
  230 #define TXQ_LOCK_ASSERT(qs)     mtx_assert(&(qs)->lock, MA_OWNED)
  231 #define TXQ_TRYLOCK(qs)         mtx_trylock(&(qs)->lock)        
  232 #define TXQ_LOCK(qs)            mtx_lock(&(qs)->lock)   
  233 #define TXQ_UNLOCK(qs)          mtx_unlock(&(qs)->lock) 
  234 #define TXQ_RING_EMPTY(qs)      drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  235 #define TXQ_RING_NEEDS_ENQUEUE(qs)                                      \
  236         drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  237 #define TXQ_RING_FLUSH(qs)      drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  238 #define TXQ_RING_DEQUEUE_COND(qs, func, arg)                            \
  239         drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
  240 #define TXQ_RING_DEQUEUE(qs) \
  241         drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  242 
  243 int cxgb_debug = 0;
  244 
  245 static void sge_timer_cb(void *arg);
  246 static void sge_timer_reclaim(void *arg, int ncount);
  247 static void sge_txq_reclaim_handler(void *arg, int ncount);
  248 static void cxgb_start_locked(struct sge_qset *qs);
  249 
  250 /*
  251  * XXX need to cope with bursty scheduling by looking at a wider
  252  * window than we are now for determining the need for coalescing
  253  *
  254  */
  255 static __inline uint64_t
  256 check_pkt_coalesce(struct sge_qset *qs) 
  257 { 
  258         struct adapter *sc; 
  259         struct sge_txq *txq; 
  260         uint8_t *fill;
  261 
  262         if (__predict_false(cxgb_tx_coalesce_force))
  263                 return (1);
  264         txq = &qs->txq[TXQ_ETH]; 
  265         sc = qs->port->adapter; 
  266         fill = &sc->tunq_fill[qs->idx];
  267 
  268         if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
  269                 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
  270         if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
  271                 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
  272         /*
  273          * if the hardware transmit queue is more than 1/8 full
  274          * we mark it as coalescing - we drop back from coalescing
  275          * when we go below 1/32 full and there are no packets enqueued, 
  276          * this provides us with some degree of hysteresis
  277          */
  278         if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
  279             TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
  280                 *fill = 0; 
  281         else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
  282                 *fill = 1; 
  283 
  284         return (sc->tunq_coalesce);
  285 } 
  286 
  287 #ifdef __LP64__
  288 static void
  289 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  290 {
  291         uint64_t wr_hilo;
  292 #if _BYTE_ORDER == _LITTLE_ENDIAN
  293         wr_hilo = wr_hi;
  294         wr_hilo |= (((uint64_t)wr_lo)<<32);
  295 #else
  296         wr_hilo = wr_lo;
  297         wr_hilo |= (((uint64_t)wr_hi)<<32);
  298 #endif  
  299         wrp->wrh_hilo = wr_hilo;
  300 }
  301 #else
  302 static void
  303 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  304 {
  305 
  306         wrp->wrh_hi = wr_hi;
  307         wmb();
  308         wrp->wrh_lo = wr_lo;
  309 }
  310 #endif
  311 
  312 struct coalesce_info {
  313         int count;
  314         int nbytes;
  315 };
  316 
  317 static int
  318 coalesce_check(struct mbuf *m, void *arg)
  319 {
  320         struct coalesce_info *ci = arg;
  321         int *count = &ci->count;
  322         int *nbytes = &ci->nbytes;
  323 
  324         if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
  325                 (*count < 7) && (m->m_next == NULL))) {
  326                 *count += 1;
  327                 *nbytes += m->m_len;
  328                 return (1);
  329         }
  330         return (0);
  331 }
  332 
  333 static struct mbuf *
  334 cxgb_dequeue(struct sge_qset *qs)
  335 {
  336         struct mbuf *m, *m_head, *m_tail;
  337         struct coalesce_info ci;
  338 
  339         
  340         if (check_pkt_coalesce(qs) == 0) 
  341                 return TXQ_RING_DEQUEUE(qs);
  342 
  343         m_head = m_tail = NULL;
  344         ci.count = ci.nbytes = 0;
  345         do {
  346                 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
  347                 if (m_head == NULL) {
  348                         m_tail = m_head = m;
  349                 } else if (m != NULL) {
  350                         m_tail->m_nextpkt = m;
  351                         m_tail = m;
  352                 }
  353         } while (m != NULL);
  354         if (ci.count > 7)
  355                 panic("trying to coalesce %d packets in to one WR", ci.count);
  356         return (m_head);
  357 }
  358         
  359 /**
  360  *      reclaim_completed_tx - reclaims completed Tx descriptors
  361  *      @adapter: the adapter
  362  *      @q: the Tx queue to reclaim completed descriptors from
  363  *
  364  *      Reclaims Tx descriptors that the SGE has indicated it has processed,
  365  *      and frees the associated buffers if possible.  Called with the Tx
  366  *      queue's lock held.
  367  */
  368 static __inline int
  369 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
  370 {
  371         struct sge_txq *q = &qs->txq[queue];
  372         int reclaim = desc_reclaimable(q);
  373 
  374         if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
  375             (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
  376                 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  377 
  378         if (reclaim < reclaim_min)
  379                 return (0);
  380 
  381         mtx_assert(&qs->lock, MA_OWNED);
  382         if (reclaim > 0) {
  383                 t3_free_tx_desc(qs, reclaim, queue);
  384                 q->cleaned += reclaim;
  385                 q->in_use -= reclaim;
  386         }
  387         if (isset(&qs->txq_stopped, TXQ_ETH))
  388                 clrbit(&qs->txq_stopped, TXQ_ETH);
  389 
  390         return (reclaim);
  391 }
  392 
  393 #ifdef NETDUMP
  394 int
  395 cxgb_netdump_poll_tx(struct sge_qset *qs)
  396 {
  397 
  398         return (reclaim_completed_tx(qs, TX_RECLAIM_MAX, TXQ_ETH));
  399 }
  400 #endif
  401 
  402 /**
  403  *      should_restart_tx - are there enough resources to restart a Tx queue?
  404  *      @q: the Tx queue
  405  *
  406  *      Checks if there are enough descriptors to restart a suspended Tx queue.
  407  */
  408 static __inline int
  409 should_restart_tx(const struct sge_txq *q)
  410 {
  411         unsigned int r = q->processed - q->cleaned;
  412 
  413         return q->in_use - r < (q->size >> 1);
  414 }
  415 
  416 /**
  417  *      t3_sge_init - initialize SGE
  418  *      @adap: the adapter
  419  *      @p: the SGE parameters
  420  *
  421  *      Performs SGE initialization needed every time after a chip reset.
  422  *      We do not initialize any of the queue sets here, instead the driver
  423  *      top-level must request those individually.  We also do not enable DMA
  424  *      here, that should be done after the queues have been set up.
  425  */
  426 void
  427 t3_sge_init(adapter_t *adap, struct sge_params *p)
  428 {
  429         u_int ctrl, ups;
  430 
  431         ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
  432 
  433         ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
  434                F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
  435                V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
  436                V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
  437 #if SGE_NUM_GENBITS == 1
  438         ctrl |= F_EGRGENCTRL;
  439 #endif
  440         if (adap->params.rev > 0) {
  441                 if (!(adap->flags & (USING_MSIX | USING_MSI)))
  442                         ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
  443         }
  444         t3_write_reg(adap, A_SG_CONTROL, ctrl);
  445         t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
  446                      V_LORCQDRBTHRSH(512));
  447         t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
  448         t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
  449                      V_TIMEOUT(200 * core_ticks_per_usec(adap)));
  450         t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
  451                      adap->params.rev < T3_REV_C ? 1000 : 500);
  452         t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
  453         t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
  454         t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
  455         t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
  456         t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
  457 }
  458 
  459 
  460 /**
  461  *      sgl_len - calculates the size of an SGL of the given capacity
  462  *      @n: the number of SGL entries
  463  *
  464  *      Calculates the number of flits needed for a scatter/gather list that
  465  *      can hold the given number of entries.
  466  */
  467 static __inline unsigned int
  468 sgl_len(unsigned int n)
  469 {
  470         return ((3 * n) / 2 + (n & 1));
  471 }
  472 
  473 /**
  474  *      get_imm_packet - return the next ingress packet buffer from a response
  475  *      @resp: the response descriptor containing the packet data
  476  *
  477  *      Return a packet containing the immediate data of the given response.
  478  */
  479 static int
  480 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
  481 {
  482 
  483         if (resp->rss_hdr.opcode == CPL_RX_DATA) {
  484                 const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0];
  485                 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
  486         } else if (resp->rss_hdr.opcode == CPL_RX_PKT) {
  487                 const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0];
  488                 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
  489         } else
  490                 m->m_len = IMMED_PKT_SIZE;
  491         m->m_ext.ext_buf = NULL;
  492         m->m_ext.ext_type = 0;
  493         memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len); 
  494         return (0);     
  495 }
  496 
  497 static __inline u_int
  498 flits_to_desc(u_int n)
  499 {
  500         return (flit_desc_map[n]);
  501 }
  502 
  503 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
  504                     F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
  505                     V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
  506                     F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
  507                     F_HIRCQPARITYERROR)
  508 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
  509 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
  510                       F_RSPQDISABLED)
  511 
  512 /**
  513  *      t3_sge_err_intr_handler - SGE async event interrupt handler
  514  *      @adapter: the adapter
  515  *
  516  *      Interrupt handler for SGE asynchronous (non-data) events.
  517  */
  518 void
  519 t3_sge_err_intr_handler(adapter_t *adapter)
  520 {
  521         unsigned int v, status;
  522 
  523         status = t3_read_reg(adapter, A_SG_INT_CAUSE);
  524         if (status & SGE_PARERR)
  525                 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
  526                          status & SGE_PARERR);
  527         if (status & SGE_FRAMINGERR)
  528                 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
  529                          status & SGE_FRAMINGERR);
  530         if (status & F_RSPQCREDITOVERFOW)
  531                 CH_ALERT(adapter, "SGE response queue credit overflow\n");
  532 
  533         if (status & F_RSPQDISABLED) {
  534                 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
  535 
  536                 CH_ALERT(adapter,
  537                          "packet delivered to disabled response queue (0x%x)\n",
  538                          (v >> S_RSPQ0DISABLED) & 0xff);
  539         }
  540 
  541         t3_write_reg(adapter, A_SG_INT_CAUSE, status);
  542         if (status & SGE_FATALERR)
  543                 t3_fatal_err(adapter);
  544 }
  545 
  546 void
  547 t3_sge_prep(adapter_t *adap, struct sge_params *p)
  548 {
  549         int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
  550 
  551         nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
  552         nqsets *= adap->params.nports;
  553 
  554         fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
  555 
  556         while (!powerof2(fl_q_size))
  557                 fl_q_size--;
  558 
  559         use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
  560             is_offload(adap);
  561 
  562 #if __FreeBSD_version >= 700111
  563         if (use_16k) {
  564                 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
  565                 jumbo_buf_size = MJUM16BYTES;
  566         } else {
  567                 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
  568                 jumbo_buf_size = MJUM9BYTES;
  569         }
  570 #else
  571         jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE);
  572         jumbo_buf_size = MJUMPAGESIZE;
  573 #endif
  574         while (!powerof2(jumbo_q_size))
  575                 jumbo_q_size--;
  576 
  577         if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
  578                 device_printf(adap->dev,
  579                     "Insufficient clusters and/or jumbo buffers.\n");
  580 
  581         p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
  582 
  583         for (i = 0; i < SGE_QSETS; ++i) {
  584                 struct qset_params *q = p->qset + i;
  585 
  586                 if (adap->params.nports > 2) {
  587                         q->coalesce_usecs = 50;
  588                 } else {
  589 #ifdef INVARIANTS                       
  590                         q->coalesce_usecs = 10;
  591 #else
  592                         q->coalesce_usecs = 5;
  593 #endif                  
  594                 }
  595                 q->polling = 0;
  596                 q->rspq_size = RSPQ_Q_SIZE;
  597                 q->fl_size = fl_q_size;
  598                 q->jumbo_size = jumbo_q_size;
  599                 q->jumbo_buf_size = jumbo_buf_size;
  600                 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
  601                 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
  602                 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
  603                 q->cong_thres = 0;
  604         }
  605 }
  606 
  607 int
  608 t3_sge_alloc(adapter_t *sc)
  609 {
  610 
  611         /* The parent tag. */
  612         if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */
  613                                 1, 0,                   /* algnmnt, boundary */
  614                                 BUS_SPACE_MAXADDR,      /* lowaddr */
  615                                 BUS_SPACE_MAXADDR,      /* highaddr */
  616                                 NULL, NULL,             /* filter, filterarg */
  617                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
  618                                 BUS_SPACE_UNRESTRICTED, /* nsegments */
  619                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
  620                                 0,                      /* flags */
  621                                 NULL, NULL,             /* lock, lockarg */
  622                                 &sc->parent_dmat)) {
  623                 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
  624                 return (ENOMEM);
  625         }
  626 
  627         /*
  628          * DMA tag for normal sized RX frames
  629          */
  630         if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
  631                 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
  632                 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
  633                 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
  634                 return (ENOMEM);
  635         }
  636 
  637         /* 
  638          * DMA tag for jumbo sized RX frames.
  639          */
  640         if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
  641                 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
  642                 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
  643                 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
  644                 return (ENOMEM);
  645         }
  646 
  647         /* 
  648          * DMA tag for TX frames.
  649          */
  650         if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
  651                 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  652                 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  653                 NULL, NULL, &sc->tx_dmat)) {
  654                 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
  655                 return (ENOMEM);
  656         }
  657 
  658         return (0);
  659 }
  660 
  661 int
  662 t3_sge_free(struct adapter * sc)
  663 {
  664 
  665         if (sc->tx_dmat != NULL)
  666                 bus_dma_tag_destroy(sc->tx_dmat);
  667 
  668         if (sc->rx_jumbo_dmat != NULL)
  669                 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
  670 
  671         if (sc->rx_dmat != NULL)
  672                 bus_dma_tag_destroy(sc->rx_dmat);
  673 
  674         if (sc->parent_dmat != NULL)
  675                 bus_dma_tag_destroy(sc->parent_dmat);
  676 
  677         return (0);
  678 }
  679 
  680 void
  681 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
  682 {
  683 
  684         qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
  685         qs->rspq.polling = 0 /* p->polling */;
  686 }
  687 
  688 #if !defined(__i386__) && !defined(__amd64__)
  689 static void
  690 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
  691 {
  692         struct refill_fl_cb_arg *cb_arg = arg;
  693         
  694         cb_arg->error = error;
  695         cb_arg->seg = segs[0];
  696         cb_arg->nseg = nseg;
  697 
  698 }
  699 #endif
  700 /**
  701  *      refill_fl - refill an SGE free-buffer list
  702  *      @sc: the controller softc
  703  *      @q: the free-list to refill
  704  *      @n: the number of new buffers to allocate
  705  *
  706  *      (Re)populate an SGE free-buffer list with up to @n new packet buffers.
  707  *      The caller must assure that @n does not exceed the queue's capacity.
  708  */
  709 static void
  710 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
  711 {
  712         struct rx_sw_desc *sd = &q->sdesc[q->pidx];
  713         struct rx_desc *d = &q->desc[q->pidx];
  714         struct refill_fl_cb_arg cb_arg;
  715         struct mbuf *m;
  716         caddr_t cl;
  717         int err;
  718         
  719         cb_arg.error = 0;
  720         while (n--) {
  721                 /*
  722                  * We allocate an uninitialized mbuf + cluster, mbuf is
  723                  * initialized after rx.
  724                  */
  725                 if (q->zone == zone_pack) {
  726                         if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
  727                                 break;
  728                         cl = m->m_ext.ext_buf;                  
  729                 } else {
  730                         if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
  731                                 break;
  732                         if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
  733                                 uma_zfree(q->zone, cl);
  734                                 break;
  735                         }
  736                 }
  737                 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
  738                         if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
  739                                 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
  740                                 uma_zfree(q->zone, cl);
  741                                 goto done;
  742                         }
  743                         sd->flags |= RX_SW_DESC_MAP_CREATED;
  744                 }
  745 #if !defined(__i386__) && !defined(__amd64__)
  746                 err = bus_dmamap_load(q->entry_tag, sd->map,
  747                     cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
  748                 
  749                 if (err != 0 || cb_arg.error) {
  750                         if (q->zone != zone_pack)
  751                                 uma_zfree(q->zone, cl);
  752                         m_free(m);
  753                         goto done;
  754                 }
  755 #else
  756                 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
  757 #endif          
  758                 sd->flags |= RX_SW_DESC_INUSE;
  759                 sd->rxsd_cl = cl;
  760                 sd->m = m;
  761                 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
  762                 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
  763                 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
  764                 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
  765 
  766                 d++;
  767                 sd++;
  768 
  769                 if (++q->pidx == q->size) {
  770                         q->pidx = 0;
  771                         q->gen ^= 1;
  772                         sd = q->sdesc;
  773                         d = q->desc;
  774                 }
  775                 q->credits++;
  776                 q->db_pending++;
  777         }
  778 
  779 done:
  780         if (q->db_pending >= 32) {
  781                 q->db_pending = 0;
  782                 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  783         }
  784 }
  785 
  786 
  787 /**
  788  *      free_rx_bufs - free the Rx buffers on an SGE free list
  789  *      @sc: the controle softc
  790  *      @q: the SGE free list to clean up
  791  *
  792  *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
  793  *      this queue should be stopped before calling this function.
  794  */
  795 static void
  796 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
  797 {
  798         u_int cidx = q->cidx;
  799 
  800         while (q->credits--) {
  801                 struct rx_sw_desc *d = &q->sdesc[cidx];
  802 
  803                 if (d->flags & RX_SW_DESC_INUSE) {
  804                         bus_dmamap_unload(q->entry_tag, d->map);
  805                         bus_dmamap_destroy(q->entry_tag, d->map);
  806                         if (q->zone == zone_pack) {
  807                                 m_init(d->m, M_NOWAIT, MT_DATA, M_EXT);
  808                                 uma_zfree(zone_pack, d->m);
  809                         } else {
  810                                 m_init(d->m, M_NOWAIT, MT_DATA, 0);
  811                                 uma_zfree(zone_mbuf, d->m);
  812                                 uma_zfree(q->zone, d->rxsd_cl);
  813                         }                       
  814                 }
  815                 
  816                 d->rxsd_cl = NULL;
  817                 d->m = NULL;
  818                 if (++cidx == q->size)
  819                         cidx = 0;
  820         }
  821 }
  822 
  823 static __inline void
  824 __refill_fl(adapter_t *adap, struct sge_fl *fl)
  825 {
  826         refill_fl(adap, fl, min(16U, fl->size - fl->credits));
  827 }
  828 
  829 static __inline void
  830 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
  831 {
  832         uint32_t reclaimable = fl->size - fl->credits;
  833 
  834         if (reclaimable > 0)
  835                 refill_fl(adap, fl, min(max, reclaimable));
  836 }
  837 
  838 /**
  839  *      recycle_rx_buf - recycle a receive buffer
  840  *      @adapter: the adapter
  841  *      @q: the SGE free list
  842  *      @idx: index of buffer to recycle
  843  *
  844  *      Recycles the specified buffer on the given free list by adding it at
  845  *      the next available slot on the list.
  846  */
  847 static void
  848 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
  849 {
  850         struct rx_desc *from = &q->desc[idx];
  851         struct rx_desc *to   = &q->desc[q->pidx];
  852 
  853         q->sdesc[q->pidx] = q->sdesc[idx];
  854         to->addr_lo = from->addr_lo;        // already big endian
  855         to->addr_hi = from->addr_hi;        // likewise
  856         wmb();  /* necessary ? */
  857         to->len_gen = htobe32(V_FLD_GEN1(q->gen));
  858         to->gen2 = htobe32(V_FLD_GEN2(q->gen));
  859         q->credits++;
  860 
  861         if (++q->pidx == q->size) {
  862                 q->pidx = 0;
  863                 q->gen ^= 1;
  864         }
  865         t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  866 }
  867 
  868 static void
  869 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
  870 {
  871         uint32_t *addr;
  872 
  873         addr = arg;
  874         *addr = segs[0].ds_addr;
  875 }
  876 
  877 static int
  878 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
  879     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
  880     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
  881 {
  882         size_t len = nelem * elem_size;
  883         void *s = NULL;
  884         void *p = NULL;
  885         int err;
  886 
  887         if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
  888                                       BUS_SPACE_MAXADDR_32BIT,
  889                                       BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
  890                                       len, 0, NULL, NULL, tag)) != 0) {
  891                 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
  892                 return (ENOMEM);
  893         }
  894 
  895         if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
  896                                     map)) != 0) {
  897                 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
  898                 return (ENOMEM);
  899         }
  900 
  901         bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
  902         bzero(p, len);
  903         *(void **)desc = p;
  904 
  905         if (sw_size) {
  906                 len = nelem * sw_size;
  907                 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
  908                 *(void **)sdesc = s;
  909         }
  910         if (parent_entry_tag == NULL)
  911                 return (0);
  912             
  913         if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
  914                                       BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
  915                                       NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  916                                       TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  917                                       NULL, NULL, entry_tag)) != 0) {
  918                 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
  919                 return (ENOMEM);
  920         }
  921         return (0);
  922 }
  923 
  924 static void
  925 sge_slow_intr_handler(void *arg, int ncount)
  926 {
  927         adapter_t *sc = arg;
  928 
  929         t3_slow_intr_handler(sc);
  930         t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
  931         (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
  932 }
  933 
  934 /**
  935  *      sge_timer_cb - perform periodic maintenance of an SGE qset
  936  *      @data: the SGE queue set to maintain
  937  *
  938  *      Runs periodically from a timer to perform maintenance of an SGE queue
  939  *      set.  It performs two tasks:
  940  *
  941  *      a) Cleans up any completed Tx descriptors that may still be pending.
  942  *      Normal descriptor cleanup happens when new packets are added to a Tx
  943  *      queue so this timer is relatively infrequent and does any cleanup only
  944  *      if the Tx queue has not seen any new packets in a while.  We make a
  945  *      best effort attempt to reclaim descriptors, in that we don't wait
  946  *      around if we cannot get a queue's lock (which most likely is because
  947  *      someone else is queueing new packets and so will also handle the clean
  948  *      up).  Since control queues use immediate data exclusively we don't
  949  *      bother cleaning them up here.
  950  *
  951  *      b) Replenishes Rx queues that have run out due to memory shortage.
  952  *      Normally new Rx buffers are added when existing ones are consumed but
  953  *      when out of memory a queue can become empty.  We try to add only a few
  954  *      buffers here, the queue will be replenished fully as these new buffers
  955  *      are used up if memory shortage has subsided.
  956  *      
  957  *      c) Return coalesced response queue credits in case a response queue is
  958  *      starved.
  959  *
  960  *      d) Ring doorbells for T304 tunnel queues since we have seen doorbell 
  961  *      fifo overflows and the FW doesn't implement any recovery scheme yet.
  962  */
  963 static void
  964 sge_timer_cb(void *arg)
  965 {
  966         adapter_t *sc = arg;
  967         if ((sc->flags & USING_MSIX) == 0) {
  968                 
  969                 struct port_info *pi;
  970                 struct sge_qset *qs;
  971                 struct sge_txq  *txq;
  972                 int i, j;
  973                 int reclaim_ofl, refill_rx;
  974 
  975                 if (sc->open_device_map == 0) 
  976                         return;
  977 
  978                 for (i = 0; i < sc->params.nports; i++) {
  979                         pi = &sc->port[i];
  980                         for (j = 0; j < pi->nqsets; j++) {
  981                                 qs = &sc->sge.qs[pi->first_qset + j];
  982                                 txq = &qs->txq[0];
  983                                 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
  984                                 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 
  985                                     (qs->fl[1].credits < qs->fl[1].size));
  986                                 if (reclaim_ofl || refill_rx) {
  987                                         taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
  988                                         break;
  989                                 }
  990                         }
  991                 }
  992         }
  993         
  994         if (sc->params.nports > 2) {
  995                 int i;
  996 
  997                 for_each_port(sc, i) {
  998                         struct port_info *pi = &sc->port[i];
  999 
 1000                         t3_write_reg(sc, A_SG_KDOORBELL, 
 1001                                      F_SELEGRCNTX | 
 1002                                      (FW_TUNNEL_SGEEC_START + pi->first_qset));
 1003                 }
 1004         }       
 1005         if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
 1006             sc->open_device_map != 0)
 1007                 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1008 }
 1009 
 1010 /*
 1011  * This is meant to be a catch-all function to keep sge state private
 1012  * to sge.c
 1013  *
 1014  */
 1015 int
 1016 t3_sge_init_adapter(adapter_t *sc)
 1017 {
 1018         callout_init(&sc->sge_timer_ch, 1);
 1019         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1020         TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
 1021         return (0);
 1022 }
 1023 
 1024 int
 1025 t3_sge_reset_adapter(adapter_t *sc)
 1026 {
 1027         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1028         return (0);
 1029 }
 1030 
 1031 int
 1032 t3_sge_init_port(struct port_info *pi)
 1033 {
 1034         TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
 1035         return (0);
 1036 }
 1037 
 1038 /**
 1039  *      refill_rspq - replenish an SGE response queue
 1040  *      @adapter: the adapter
 1041  *      @q: the response queue to replenish
 1042  *      @credits: how many new responses to make available
 1043  *
 1044  *      Replenishes a response queue by making the supplied number of responses
 1045  *      available to HW.
 1046  */
 1047 static __inline void
 1048 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
 1049 {
 1050 
 1051         /* mbufs are allocated on demand when a rspq entry is processed. */
 1052         t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
 1053                      V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 1054 }
 1055 
 1056 static void
 1057 sge_txq_reclaim_handler(void *arg, int ncount)
 1058 {
 1059         struct sge_qset *qs = arg;
 1060         int i;
 1061 
 1062         for (i = 0; i < 3; i++)
 1063                 reclaim_completed_tx(qs, 16, i);
 1064 }
 1065 
 1066 static void
 1067 sge_timer_reclaim(void *arg, int ncount)
 1068 {
 1069         struct port_info *pi = arg;
 1070         int i, nqsets = pi->nqsets;
 1071         adapter_t *sc = pi->adapter;
 1072         struct sge_qset *qs;
 1073         struct mtx *lock;
 1074         
 1075         KASSERT((sc->flags & USING_MSIX) == 0,
 1076             ("can't call timer reclaim for msi-x"));
 1077 
 1078         for (i = 0; i < nqsets; i++) {
 1079                 qs = &sc->sge.qs[pi->first_qset + i];
 1080 
 1081                 reclaim_completed_tx(qs, 16, TXQ_OFLD);
 1082                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 1083                             &sc->sge.qs[0].rspq.lock;
 1084 
 1085                 if (mtx_trylock(lock)) {
 1086                         /* XXX currently assume that we are *NOT* polling */
 1087                         uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
 1088 
 1089                         if (qs->fl[0].credits < qs->fl[0].size - 16)
 1090                                 __refill_fl(sc, &qs->fl[0]);
 1091                         if (qs->fl[1].credits < qs->fl[1].size - 16)
 1092                                 __refill_fl(sc, &qs->fl[1]);
 1093                         
 1094                         if (status & (1 << qs->rspq.cntxt_id)) {
 1095                                 if (qs->rspq.credits) {
 1096                                         refill_rspq(sc, &qs->rspq, 1);
 1097                                         qs->rspq.credits--;
 1098                                         t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 
 1099                                             1 << qs->rspq.cntxt_id);
 1100                                 }
 1101                         }
 1102                         mtx_unlock(lock);
 1103                 }
 1104         }
 1105 }
 1106 
 1107 /**
 1108  *      init_qset_cntxt - initialize an SGE queue set context info
 1109  *      @qs: the queue set
 1110  *      @id: the queue set id
 1111  *
 1112  *      Initializes the TIDs and context ids for the queues of a queue set.
 1113  */
 1114 static void
 1115 init_qset_cntxt(struct sge_qset *qs, u_int id)
 1116 {
 1117 
 1118         qs->rspq.cntxt_id = id;
 1119         qs->fl[0].cntxt_id = 2 * id;
 1120         qs->fl[1].cntxt_id = 2 * id + 1;
 1121         qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
 1122         qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
 1123         qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 1124         qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 1125         qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
 1126 
 1127         /* XXX: a sane limit is needed instead of INT_MAX */
 1128         mbufq_init(&qs->txq[TXQ_ETH].sendq, INT_MAX);
 1129         mbufq_init(&qs->txq[TXQ_OFLD].sendq, INT_MAX);
 1130         mbufq_init(&qs->txq[TXQ_CTRL].sendq, INT_MAX);
 1131 }
 1132 
 1133 
 1134 static void
 1135 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
 1136 {
 1137         txq->in_use += ndesc;
 1138         /*
 1139          * XXX we don't handle stopping of queue
 1140          * presumably start handles this when we bump against the end
 1141          */
 1142         txqs->gen = txq->gen;
 1143         txq->unacked += ndesc;
 1144         txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
 1145         txq->unacked &= 31;
 1146         txqs->pidx = txq->pidx;
 1147         txq->pidx += ndesc;
 1148 #ifdef INVARIANTS
 1149         if (((txqs->pidx > txq->cidx) &&
 1150                 (txq->pidx < txqs->pidx) &&
 1151                 (txq->pidx >= txq->cidx)) ||
 1152             ((txqs->pidx < txq->cidx) &&
 1153                 (txq->pidx >= txq-> cidx)) ||
 1154             ((txqs->pidx < txq->cidx) &&
 1155                 (txq->cidx < txqs->pidx)))
 1156                 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
 1157                     txqs->pidx, txq->pidx, txq->cidx);
 1158 #endif
 1159         if (txq->pidx >= txq->size) {
 1160                 txq->pidx -= txq->size;
 1161                 txq->gen ^= 1;
 1162         }
 1163 
 1164 }
 1165 
 1166 /**
 1167  *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 1168  *      @m: the packet mbufs
 1169  *      @nsegs: the number of segments 
 1170  *
 1171  *      Returns the number of Tx descriptors needed for the given Ethernet
 1172  *      packet.  Ethernet packets require addition of WR and CPL headers.
 1173  */
 1174 static __inline unsigned int
 1175 calc_tx_descs(const struct mbuf *m, int nsegs)
 1176 {
 1177         unsigned int flits;
 1178 
 1179         if (m->m_pkthdr.len <= PIO_LEN)
 1180                 return 1;
 1181 
 1182         flits = sgl_len(nsegs) + 2;
 1183         if (m->m_pkthdr.csum_flags & CSUM_TSO)
 1184                 flits++;
 1185 
 1186         return flits_to_desc(flits);
 1187 }
 1188 
 1189 /**
 1190  *      make_sgl - populate a scatter/gather list for a packet
 1191  *      @sgp: the SGL to populate
 1192  *      @segs: the packet dma segments
 1193  *      @nsegs: the number of segments
 1194  *
 1195  *      Generates a scatter/gather list for the buffers that make up a packet
 1196  *      and returns the SGL size in 8-byte words.  The caller must size the SGL
 1197  *      appropriately.
 1198  */
 1199 static __inline void
 1200 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
 1201 {
 1202         int i, idx;
 1203         
 1204         for (idx = 0, i = 0; i < nsegs; i++) {
 1205                 /*
 1206                  * firmware doesn't like empty segments
 1207                  */
 1208                 if (segs[i].ds_len == 0)
 1209                         continue;
 1210                 if (i && idx == 0) 
 1211                         ++sgp;
 1212                 
 1213                 sgp->len[idx] = htobe32(segs[i].ds_len);
 1214                 sgp->addr[idx] = htobe64(segs[i].ds_addr);
 1215                 idx ^= 1;
 1216         }
 1217         
 1218         if (idx) {
 1219                 sgp->len[idx] = 0;
 1220                 sgp->addr[idx] = 0;
 1221         }
 1222 }
 1223         
 1224 /**
 1225  *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
 1226  *      @adap: the adapter
 1227  *      @q: the Tx queue
 1228  *
 1229  *      Ring the doorbell if a Tx queue is asleep.  There is a natural race,
 1230  *      where the HW is going to sleep just after we checked, however,
 1231  *      then the interrupt handler will detect the outstanding TX packet
 1232  *      and ring the doorbell for us.
 1233  *
 1234  *      When GTS is disabled we unconditionally ring the doorbell.
 1235  */
 1236 static __inline void
 1237 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
 1238 {
 1239 #if USE_GTS
 1240         clear_bit(TXQ_LAST_PKT_DB, &q->flags);
 1241         if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
 1242                 set_bit(TXQ_LAST_PKT_DB, &q->flags);
 1243 #ifdef T3_TRACE
 1244                 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
 1245                           q->cntxt_id);
 1246 #endif
 1247                 t3_write_reg(adap, A_SG_KDOORBELL,
 1248                              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1249         }
 1250 #else
 1251         if (mustring || ++q->db_pending >= 32) {
 1252                 wmb();            /* write descriptors before telling HW */
 1253                 t3_write_reg(adap, A_SG_KDOORBELL,
 1254                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1255                 q->db_pending = 0;
 1256         }
 1257 #endif
 1258 }
 1259 
 1260 static __inline void
 1261 wr_gen2(struct tx_desc *d, unsigned int gen)
 1262 {
 1263 #if SGE_NUM_GENBITS == 2
 1264         d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
 1265 #endif
 1266 }
 1267 
 1268 /**
 1269  *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
 1270  *      @ndesc: number of Tx descriptors spanned by the SGL
 1271  *      @txd: first Tx descriptor to be written
 1272  *      @txqs: txq state (generation and producer index)
 1273  *      @txq: the SGE Tx queue
 1274  *      @sgl: the SGL
 1275  *      @flits: number of flits to the start of the SGL in the first descriptor
 1276  *      @sgl_flits: the SGL size in flits
 1277  *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
 1278  *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
 1279  *
 1280  *      Write a work request header and an associated SGL.  If the SGL is
 1281  *      small enough to fit into one Tx descriptor it has already been written
 1282  *      and we just need to write the WR header.  Otherwise we distribute the
 1283  *      SGL across the number of descriptors it spans.
 1284  */
 1285 static void
 1286 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
 1287     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
 1288     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
 1289 {
 1290 
 1291         struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
 1292         struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
 1293         
 1294         if (__predict_true(ndesc == 1)) {
 1295                 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1296                     V_WR_SGLSFLT(flits)) | wr_hi,
 1297                     htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) |
 1298                     wr_lo);
 1299 
 1300                 wr_gen2(txd, txqs->gen);
 1301                 
 1302         } else {
 1303                 unsigned int ogen = txqs->gen;
 1304                 const uint64_t *fp = (const uint64_t *)sgl;
 1305                 struct work_request_hdr *wp = wrp;
 1306                 
 1307                 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
 1308                     V_WR_SGLSFLT(flits)) | wr_hi;
 1309                 
 1310                 while (sgl_flits) {
 1311                         unsigned int avail = WR_FLITS - flits;
 1312 
 1313                         if (avail > sgl_flits)
 1314                                 avail = sgl_flits;
 1315                         memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
 1316                         sgl_flits -= avail;
 1317                         ndesc--;
 1318                         if (!sgl_flits)
 1319                                 break;
 1320                         
 1321                         fp += avail;
 1322                         txd++;
 1323                         txsd++;
 1324                         if (++txqs->pidx == txq->size) {
 1325                                 txqs->pidx = 0;
 1326                                 txqs->gen ^= 1;
 1327                                 txd = txq->desc;
 1328                                 txsd = txq->sdesc;
 1329                         }
 1330 
 1331                         /*
 1332                          * when the head of the mbuf chain
 1333                          * is freed all clusters will be freed
 1334                          * with it
 1335                          */
 1336                         wrp = (struct work_request_hdr *)txd;
 1337                         wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
 1338                             V_WR_SGLSFLT(1)) | wr_hi;
 1339                         wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
 1340                                     sgl_flits + 1)) |
 1341                             V_WR_GEN(txqs->gen)) | wr_lo;
 1342                         wr_gen2(txd, txqs->gen);
 1343                         flits = 1;
 1344                 }
 1345                 wrp->wrh_hi |= htonl(F_WR_EOP);
 1346                 wmb();
 1347                 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
 1348                 wr_gen2((struct tx_desc *)wp, ogen);
 1349         }
 1350 }
 1351 
 1352 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
 1353 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
 1354 
 1355 #define GET_VTAG(cntrl, m) \
 1356 do { \
 1357         if ((m)->m_flags & M_VLANTAG)                                               \
 1358                 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
 1359 } while (0)
 1360 
 1361 static int
 1362 t3_encap(struct sge_qset *qs, struct mbuf **m)
 1363 {
 1364         adapter_t *sc;
 1365         struct mbuf *m0;
 1366         struct sge_txq *txq;
 1367         struct txq_state txqs;
 1368         struct port_info *pi;
 1369         unsigned int ndesc, flits, cntrl, mlen;
 1370         int err, nsegs, tso_info = 0;
 1371 
 1372         struct work_request_hdr *wrp;
 1373         struct tx_sw_desc *txsd;
 1374         struct sg_ent *sgp, *sgl;
 1375         uint32_t wr_hi, wr_lo, sgl_flits; 
 1376         bus_dma_segment_t segs[TX_MAX_SEGS];
 1377 
 1378         struct tx_desc *txd;
 1379                 
 1380         pi = qs->port;
 1381         sc = pi->adapter;
 1382         txq = &qs->txq[TXQ_ETH];
 1383         txd = &txq->desc[txq->pidx];
 1384         txsd = &txq->sdesc[txq->pidx];
 1385         sgl = txq->txq_sgl;
 1386 
 1387         prefetch(txd);
 1388         m0 = *m;
 1389 
 1390         mtx_assert(&qs->lock, MA_OWNED);
 1391         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1392         KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
 1393         
 1394         if  (m0->m_nextpkt == NULL && m0->m_next != NULL &&
 1395             m0->m_pkthdr.csum_flags & (CSUM_TSO))
 1396                 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
 1397 
 1398         if (m0->m_nextpkt != NULL) {
 1399                 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
 1400                 ndesc = 1;
 1401                 mlen = 0;
 1402         } else {
 1403                 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
 1404                     &m0, segs, &nsegs))) {
 1405                         if (cxgb_debug)
 1406                                 printf("failed ... err=%d\n", err);
 1407                         return (err);
 1408                 }
 1409                 mlen = m0->m_pkthdr.len;
 1410                 ndesc = calc_tx_descs(m0, nsegs);
 1411         }
 1412         txq_prod(txq, ndesc, &txqs);
 1413 
 1414         KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
 1415         txsd->m = m0;
 1416 
 1417         if (m0->m_nextpkt != NULL) {
 1418                 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
 1419                 int i, fidx;
 1420 
 1421                 if (nsegs > 7)
 1422                         panic("trying to coalesce %d packets in to one WR", nsegs);
 1423                 txq->txq_coalesced += nsegs;
 1424                 wrp = (struct work_request_hdr *)txd;
 1425                 flits = nsegs*2 + 1;
 1426 
 1427                 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
 1428                         struct cpl_tx_pkt_batch_entry *cbe;
 1429                         uint64_t flit;
 1430                         uint32_t *hflit = (uint32_t *)&flit;
 1431                         int cflags = m0->m_pkthdr.csum_flags;
 1432 
 1433                         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1434                         GET_VTAG(cntrl, m0);
 1435                         cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1436                         if (__predict_false(!(cflags & CSUM_IP)))
 1437                                 cntrl |= F_TXPKT_IPCSUM_DIS;
 1438                         if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP |
 1439                             CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
 1440                                 cntrl |= F_TXPKT_L4CSUM_DIS;
 1441 
 1442                         hflit[0] = htonl(cntrl);
 1443                         hflit[1] = htonl(segs[i].ds_len | 0x80000000);
 1444                         flit |= htobe64(1 << 24);
 1445                         cbe = &cpl_batch->pkt_entry[i];
 1446                         cbe->cntrl = hflit[0];
 1447                         cbe->len = hflit[1];
 1448                         cbe->addr = htobe64(segs[i].ds_addr);
 1449                 }
 1450 
 1451                 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1452                     V_WR_SGLSFLT(flits)) |
 1453                     htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1454                 wr_lo = htonl(V_WR_LEN(flits) |
 1455                     V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
 1456                 set_wr_hdr(wrp, wr_hi, wr_lo);
 1457                 wmb();
 1458                 ETHER_BPF_MTAP(pi->ifp, m0);
 1459                 wr_gen2(txd, txqs.gen);
 1460                 check_ring_tx_db(sc, txq, 0);
 1461                 return (0);             
 1462         } else if (tso_info) {
 1463                 uint16_t eth_type;
 1464                 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
 1465                 struct ether_header *eh;
 1466                 void *l3hdr;
 1467                 struct tcphdr *tcp;
 1468 
 1469                 txd->flit[2] = 0;
 1470                 GET_VTAG(cntrl, m0);
 1471                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
 1472                 hdr->cntrl = htonl(cntrl);
 1473                 hdr->len = htonl(mlen | 0x80000000);
 1474 
 1475                 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
 1476                         printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x",
 1477                             m0, mlen, m0->m_pkthdr.tso_segsz,
 1478                             (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags);
 1479                         panic("tx tso packet too small");
 1480                 }
 1481 
 1482                 /* Make sure that ether, ip, tcp headers are all in m0 */
 1483                 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
 1484                         m0 = m_pullup(m0, TCPPKTHDRSIZE);
 1485                         if (__predict_false(m0 == NULL)) {
 1486                                 /* XXX panic probably an overreaction */
 1487                                 panic("couldn't fit header into mbuf");
 1488                         }
 1489                 }
 1490 
 1491                 eh = mtod(m0, struct ether_header *);
 1492                 eth_type = eh->ether_type;
 1493                 if (eth_type == htons(ETHERTYPE_VLAN)) {
 1494                         struct ether_vlan_header *evh = (void *)eh;
 1495 
 1496                         tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN);
 1497                         l3hdr = evh + 1;
 1498                         eth_type = evh->evl_proto;
 1499                 } else {
 1500                         tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II);
 1501                         l3hdr = eh + 1;
 1502                 }
 1503 
 1504                 if (eth_type == htons(ETHERTYPE_IP)) {
 1505                         struct ip *ip = l3hdr;
 1506 
 1507                         tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl);
 1508                         tcp = (struct tcphdr *)(ip + 1);
 1509                 } else if (eth_type == htons(ETHERTYPE_IPV6)) {
 1510                         struct ip6_hdr *ip6 = l3hdr;
 1511 
 1512                         KASSERT(ip6->ip6_nxt == IPPROTO_TCP,
 1513                             ("%s: CSUM_TSO with ip6_nxt %d",
 1514                             __func__, ip6->ip6_nxt));
 1515 
 1516                         tso_info |= F_LSO_IPV6;
 1517                         tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2);
 1518                         tcp = (struct tcphdr *)(ip6 + 1);
 1519                 } else
 1520                         panic("%s: CSUM_TSO but neither ip nor ip6", __func__);
 1521 
 1522                 tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off);
 1523                 hdr->lso_info = htonl(tso_info);
 1524 
 1525                 if (__predict_false(mlen <= PIO_LEN)) {
 1526                         /*
 1527                          * pkt not undersized but fits in PIO_LEN
 1528                          * Indicates a TSO bug at the higher levels.
 1529                          */
 1530                         txsd->m = NULL;
 1531                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
 1532                         flits = (mlen + 7) / 8 + 3;
 1533                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1534                                           V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1535                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1536                         wr_lo = htonl(V_WR_LEN(flits) |
 1537                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1538                         set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
 1539                         wmb();
 1540                         ETHER_BPF_MTAP(pi->ifp, m0);
 1541                         wr_gen2(txd, txqs.gen);
 1542                         check_ring_tx_db(sc, txq, 0);
 1543                         m_freem(m0);
 1544                         return (0);
 1545                 }
 1546                 flits = 3;      
 1547         } else {
 1548                 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
 1549                 
 1550                 GET_VTAG(cntrl, m0);
 1551                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1552                 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
 1553                         cntrl |= F_TXPKT_IPCSUM_DIS;
 1554                 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP |
 1555                     CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
 1556                         cntrl |= F_TXPKT_L4CSUM_DIS;
 1557                 cpl->cntrl = htonl(cntrl);
 1558                 cpl->len = htonl(mlen | 0x80000000);
 1559 
 1560                 if (mlen <= PIO_LEN) {
 1561                         txsd->m = NULL;
 1562                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
 1563                         flits = (mlen + 7) / 8 + 2;
 1564                         
 1565                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1566                             V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1567                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1568                         wr_lo = htonl(V_WR_LEN(flits) |
 1569                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1570                         set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
 1571                         wmb();
 1572                         ETHER_BPF_MTAP(pi->ifp, m0);
 1573                         wr_gen2(txd, txqs.gen);
 1574                         check_ring_tx_db(sc, txq, 0);
 1575                         m_freem(m0);
 1576                         return (0);
 1577                 }
 1578                 flits = 2;
 1579         }
 1580         wrp = (struct work_request_hdr *)txd;
 1581         sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
 1582         make_sgl(sgp, segs, nsegs);
 1583 
 1584         sgl_flits = sgl_len(nsegs);
 1585 
 1586         ETHER_BPF_MTAP(pi->ifp, m0);
 1587 
 1588         KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
 1589         wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1590         wr_lo = htonl(V_WR_TID(txq->token));
 1591         write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
 1592             sgl_flits, wr_hi, wr_lo);
 1593         check_ring_tx_db(sc, txq, 0);
 1594 
 1595         return (0);
 1596 }
 1597 
 1598 #ifdef NETDUMP
 1599 int
 1600 cxgb_netdump_encap(struct sge_qset *qs, struct mbuf **m)
 1601 {
 1602         int error;
 1603 
 1604         error = t3_encap(qs, m);
 1605         if (error == 0)
 1606                 check_ring_tx_db(qs->port->adapter, &qs->txq[TXQ_ETH], 1);
 1607         else if (*m != NULL) {
 1608                 m_freem(*m);
 1609                 *m = NULL;
 1610         }
 1611         return (error);
 1612 }
 1613 #endif
 1614 
 1615 void
 1616 cxgb_tx_watchdog(void *arg)
 1617 {
 1618         struct sge_qset *qs = arg;
 1619         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1620 
 1621         if (qs->coalescing != 0 &&
 1622             (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
 1623             TXQ_RING_EMPTY(qs))
 1624                 qs->coalescing = 0; 
 1625         else if (qs->coalescing == 0 &&
 1626             (txq->in_use >= cxgb_tx_coalesce_enable_start))
 1627                 qs->coalescing = 1;
 1628         if (TXQ_TRYLOCK(qs)) {
 1629                 qs->qs_flags |= QS_FLUSHING;
 1630                 cxgb_start_locked(qs);
 1631                 qs->qs_flags &= ~QS_FLUSHING;
 1632                 TXQ_UNLOCK(qs);
 1633         }
 1634         if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
 1635                 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
 1636                     qs, txq->txq_watchdog.c_cpu);
 1637 }
 1638 
 1639 static void
 1640 cxgb_tx_timeout(void *arg)
 1641 {
 1642         struct sge_qset *qs = arg;
 1643         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1644 
 1645         if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
 1646                 qs->coalescing = 1;     
 1647         if (TXQ_TRYLOCK(qs)) {
 1648                 qs->qs_flags |= QS_TIMEOUT;
 1649                 cxgb_start_locked(qs);
 1650                 qs->qs_flags &= ~QS_TIMEOUT;
 1651                 TXQ_UNLOCK(qs);
 1652         }
 1653 }
 1654 
 1655 static void
 1656 cxgb_start_locked(struct sge_qset *qs)
 1657 {
 1658         struct mbuf *m_head = NULL;
 1659         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1660         struct port_info *pi = qs->port;
 1661         struct ifnet *ifp = pi->ifp;
 1662 
 1663         if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
 1664                 reclaim_completed_tx(qs, 0, TXQ_ETH);
 1665 
 1666         if (!pi->link_config.link_ok) {
 1667                 TXQ_RING_FLUSH(qs);
 1668                 return;
 1669         }
 1670         TXQ_LOCK_ASSERT(qs);
 1671         while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
 1672             pi->link_config.link_ok) {
 1673                 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1674 
 1675                 if (txq->size - txq->in_use <= TX_MAX_DESC)
 1676                         break;
 1677 
 1678                 if ((m_head = cxgb_dequeue(qs)) == NULL)
 1679                         break;
 1680                 /*
 1681                  *  Encapsulation can modify our pointer, and or make it
 1682                  *  NULL on failure.  In that event, we can't requeue.
 1683                  */
 1684                 if (t3_encap(qs, &m_head) || m_head == NULL)
 1685                         break;
 1686 
 1687                 m_head = NULL;
 1688         }
 1689 
 1690         if (txq->db_pending)
 1691                 check_ring_tx_db(pi->adapter, txq, 1);
 1692 
 1693         if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
 1694             pi->link_config.link_ok)
 1695                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1696                     qs, txq->txq_timer.c_cpu);
 1697         if (m_head != NULL)
 1698                 m_freem(m_head);
 1699 }
 1700 
 1701 static int
 1702 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
 1703 {
 1704         struct port_info *pi = qs->port;
 1705         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1706         struct buf_ring *br = txq->txq_mr;
 1707         int error, avail;
 1708 
 1709         avail = txq->size - txq->in_use;
 1710         TXQ_LOCK_ASSERT(qs);
 1711 
 1712         /*
 1713          * We can only do a direct transmit if the following are true:
 1714          * - we aren't coalescing (ring < 3/4 full)
 1715          * - the link is up -- checked in caller
 1716          * - there are no packets enqueued already
 1717          * - there is space in hardware transmit queue 
 1718          */
 1719         if (check_pkt_coalesce(qs) == 0 &&
 1720             !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
 1721                 if (t3_encap(qs, &m)) {
 1722                         if (m != NULL &&
 1723                             (error = drbr_enqueue(ifp, br, m)) != 0) 
 1724                                 return (error);
 1725                 } else {
 1726                         if (txq->db_pending)
 1727                                 check_ring_tx_db(pi->adapter, txq, 1);
 1728 
 1729                         /*
 1730                          * We've bypassed the buf ring so we need to update
 1731                          * the stats directly
 1732                          */
 1733                         txq->txq_direct_packets++;
 1734                         txq->txq_direct_bytes += m->m_pkthdr.len;
 1735                 }
 1736         } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
 1737                 return (error);
 1738 
 1739         reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1740         if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
 1741             (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
 1742                 cxgb_start_locked(qs);
 1743         else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
 1744                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1745                     qs, txq->txq_timer.c_cpu);
 1746         return (0);
 1747 }
 1748 
 1749 int
 1750 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
 1751 {
 1752         struct sge_qset *qs;
 1753         struct port_info *pi = ifp->if_softc;
 1754         int error, qidx = pi->first_qset;
 1755 
 1756         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
 1757             ||(!pi->link_config.link_ok)) {
 1758                 m_freem(m);
 1759                 return (0);
 1760         }
 1761 
 1762         /* check if flowid is set */
 1763         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)       
 1764                 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
 1765 
 1766         qs = &pi->adapter->sge.qs[qidx];
 1767         
 1768         if (TXQ_TRYLOCK(qs)) {
 1769                 /* XXX running */
 1770                 error = cxgb_transmit_locked(ifp, qs, m);
 1771                 TXQ_UNLOCK(qs);
 1772         } else
 1773                 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
 1774         return (error);
 1775 }
 1776 
 1777 void
 1778 cxgb_qflush(struct ifnet *ifp)
 1779 {
 1780         /*
 1781          * flush any enqueued mbufs in the buf_rings
 1782          * and in the transmit queues
 1783          * no-op for now
 1784          */
 1785         return;
 1786 }
 1787 
 1788 /**
 1789  *      write_imm - write a packet into a Tx descriptor as immediate data
 1790  *      @d: the Tx descriptor to write
 1791  *      @m: the packet
 1792  *      @len: the length of packet data to write as immediate data
 1793  *      @gen: the generation bit value to write
 1794  *
 1795  *      Writes a packet as immediate data into a Tx descriptor.  The packet
 1796  *      contains a work request at its beginning.  We must write the packet
 1797  *      carefully so the SGE doesn't read accidentally before it's written in
 1798  *      its entirety.
 1799  */
 1800 static __inline void
 1801 write_imm(struct tx_desc *d, caddr_t src,
 1802           unsigned int len, unsigned int gen)
 1803 {
 1804         struct work_request_hdr *from = (struct work_request_hdr *)src;
 1805         struct work_request_hdr *to = (struct work_request_hdr *)d;
 1806         uint32_t wr_hi, wr_lo;
 1807 
 1808         KASSERT(len <= WR_LEN && len >= sizeof(*from),
 1809             ("%s: invalid len %d", __func__, len));
 1810         
 1811         memcpy(&to[1], &from[1], len - sizeof(*from));
 1812         wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
 1813             V_WR_BCNTLFLT(len & 7));
 1814         wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8));
 1815         set_wr_hdr(to, wr_hi, wr_lo);
 1816         wmb();
 1817         wr_gen2(d, gen);
 1818 }
 1819 
 1820 /**
 1821  *      check_desc_avail - check descriptor availability on a send queue
 1822  *      @adap: the adapter
 1823  *      @q: the TX queue
 1824  *      @m: the packet needing the descriptors
 1825  *      @ndesc: the number of Tx descriptors needed
 1826  *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
 1827  *
 1828  *      Checks if the requested number of Tx descriptors is available on an
 1829  *      SGE send queue.  If the queue is already suspended or not enough
 1830  *      descriptors are available the packet is queued for later transmission.
 1831  *      Must be called with the Tx queue locked.
 1832  *
 1833  *      Returns 0 if enough descriptors are available, 1 if there aren't
 1834  *      enough descriptors and the packet has been queued, and 2 if the caller
 1835  *      needs to retry because there weren't enough descriptors at the
 1836  *      beginning of the call but some freed up in the mean time.
 1837  */
 1838 static __inline int
 1839 check_desc_avail(adapter_t *adap, struct sge_txq *q,
 1840                  struct mbuf *m, unsigned int ndesc,
 1841                  unsigned int qid)
 1842 {
 1843         /* 
 1844          * XXX We currently only use this for checking the control queue
 1845          * the control queue is only used for binding qsets which happens
 1846          * at init time so we are guaranteed enough descriptors
 1847          */
 1848         if (__predict_false(mbufq_len(&q->sendq))) {
 1849 addq_exit:      (void )mbufq_enqueue(&q->sendq, m);
 1850                 return 1;
 1851         }
 1852         if (__predict_false(q->size - q->in_use < ndesc)) {
 1853 
 1854                 struct sge_qset *qs = txq_to_qset(q, qid);
 1855 
 1856                 setbit(&qs->txq_stopped, qid);
 1857                 if (should_restart_tx(q) &&
 1858                     test_and_clear_bit(qid, &qs->txq_stopped))
 1859                         return 2;
 1860 
 1861                 q->stops++;
 1862                 goto addq_exit;
 1863         }
 1864         return 0;
 1865 }
 1866 
 1867 
 1868 /**
 1869  *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
 1870  *      @q: the SGE control Tx queue
 1871  *
 1872  *      This is a variant of reclaim_completed_tx() that is used for Tx queues
 1873  *      that send only immediate data (presently just the control queues) and
 1874  *      thus do not have any mbufs
 1875  */
 1876 static __inline void
 1877 reclaim_completed_tx_imm(struct sge_txq *q)
 1878 {
 1879         unsigned int reclaim = q->processed - q->cleaned;
 1880 
 1881         q->in_use -= reclaim;
 1882         q->cleaned += reclaim;
 1883 }
 1884 
 1885 /**
 1886  *      ctrl_xmit - send a packet through an SGE control Tx queue
 1887  *      @adap: the adapter
 1888  *      @q: the control queue
 1889  *      @m: the packet
 1890  *
 1891  *      Send a packet through an SGE control Tx queue.  Packets sent through
 1892  *      a control queue must fit entirely as immediate data in a single Tx
 1893  *      descriptor and have no page fragments.
 1894  */
 1895 static int
 1896 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 1897 {
 1898         int ret;
 1899         struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
 1900         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1901         
 1902         KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__));
 1903 
 1904         wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
 1905         wrp->wrh_lo = htonl(V_WR_TID(q->token));
 1906 
 1907         TXQ_LOCK(qs);
 1908 again:  reclaim_completed_tx_imm(q);
 1909 
 1910         ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
 1911         if (__predict_false(ret)) {
 1912                 if (ret == 1) {
 1913                         TXQ_UNLOCK(qs);
 1914                         return (ENOSPC);
 1915                 }
 1916                 goto again;
 1917         }
 1918         write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
 1919         
 1920         q->in_use++;
 1921         if (++q->pidx >= q->size) {
 1922                 q->pidx = 0;
 1923                 q->gen ^= 1;
 1924         }
 1925         TXQ_UNLOCK(qs);
 1926         wmb();
 1927         t3_write_reg(adap, A_SG_KDOORBELL,
 1928             F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1929 
 1930         m_free(m);
 1931         return (0);
 1932 }
 1933 
 1934 
 1935 /**
 1936  *      restart_ctrlq - restart a suspended control queue
 1937  *      @qs: the queue set cotaining the control queue
 1938  *
 1939  *      Resumes transmission on a suspended Tx control queue.
 1940  */
 1941 static void
 1942 restart_ctrlq(void *data, int npending)
 1943 {
 1944         struct mbuf *m;
 1945         struct sge_qset *qs = (struct sge_qset *)data;
 1946         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1947         adapter_t *adap = qs->port->adapter;
 1948 
 1949         TXQ_LOCK(qs);
 1950 again:  reclaim_completed_tx_imm(q);
 1951 
 1952         while (q->in_use < q->size &&
 1953                (m = mbufq_dequeue(&q->sendq)) != NULL) {
 1954 
 1955                 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
 1956                 m_free(m);
 1957 
 1958                 if (++q->pidx >= q->size) {
 1959                         q->pidx = 0;
 1960                         q->gen ^= 1;
 1961                 }
 1962                 q->in_use++;
 1963         }
 1964         if (mbufq_len(&q->sendq)) {
 1965                 setbit(&qs->txq_stopped, TXQ_CTRL);
 1966 
 1967                 if (should_restart_tx(q) &&
 1968                     test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
 1969                         goto again;
 1970                 q->stops++;
 1971         }
 1972         TXQ_UNLOCK(qs);
 1973         t3_write_reg(adap, A_SG_KDOORBELL,
 1974                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1975 }
 1976 
 1977 
 1978 /*
 1979  * Send a management message through control queue 0
 1980  */
 1981 int
 1982 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
 1983 {
 1984         return ctrl_xmit(adap, &adap->sge.qs[0], m);
 1985 }
 1986 
 1987 /**
 1988  *      free_qset - free the resources of an SGE queue set
 1989  *      @sc: the controller owning the queue set
 1990  *      @q: the queue set
 1991  *
 1992  *      Release the HW and SW resources associated with an SGE queue set, such
 1993  *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 1994  *      queue set must be quiesced prior to calling this.
 1995  */
 1996 static void
 1997 t3_free_qset(adapter_t *sc, struct sge_qset *q)
 1998 {
 1999         int i;
 2000         
 2001         reclaim_completed_tx(q, 0, TXQ_ETH);
 2002         if (q->txq[TXQ_ETH].txq_mr != NULL) 
 2003                 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
 2004         if (q->txq[TXQ_ETH].txq_ifq != NULL) {
 2005                 ifq_delete(q->txq[TXQ_ETH].txq_ifq);
 2006                 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
 2007         }
 2008 
 2009         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2010                 if (q->fl[i].desc) {
 2011                         mtx_lock_spin(&sc->sge.reg_lock);
 2012                         t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
 2013                         mtx_unlock_spin(&sc->sge.reg_lock);
 2014                         bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
 2015                         bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
 2016                                         q->fl[i].desc_map);
 2017                         bus_dma_tag_destroy(q->fl[i].desc_tag);
 2018                         bus_dma_tag_destroy(q->fl[i].entry_tag);
 2019                 }
 2020                 if (q->fl[i].sdesc) {
 2021                         free_rx_bufs(sc, &q->fl[i]);
 2022                         free(q->fl[i].sdesc, M_DEVBUF);
 2023                 }
 2024         }
 2025 
 2026         mtx_unlock(&q->lock);
 2027         MTX_DESTROY(&q->lock);
 2028         for (i = 0; i < SGE_TXQ_PER_SET; i++) {
 2029                 if (q->txq[i].desc) {
 2030                         mtx_lock_spin(&sc->sge.reg_lock);
 2031                         t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
 2032                         mtx_unlock_spin(&sc->sge.reg_lock);
 2033                         bus_dmamap_unload(q->txq[i].desc_tag,
 2034                                         q->txq[i].desc_map);
 2035                         bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
 2036                                         q->txq[i].desc_map);
 2037                         bus_dma_tag_destroy(q->txq[i].desc_tag);
 2038                         bus_dma_tag_destroy(q->txq[i].entry_tag);
 2039                 }
 2040                 if (q->txq[i].sdesc) {
 2041                         free(q->txq[i].sdesc, M_DEVBUF);
 2042                 }
 2043         }
 2044 
 2045         if (q->rspq.desc) {
 2046                 mtx_lock_spin(&sc->sge.reg_lock);
 2047                 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
 2048                 mtx_unlock_spin(&sc->sge.reg_lock);
 2049                 
 2050                 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
 2051                 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
 2052                                 q->rspq.desc_map);
 2053                 bus_dma_tag_destroy(q->rspq.desc_tag);
 2054                 MTX_DESTROY(&q->rspq.lock);
 2055         }
 2056 
 2057 #if defined(INET6) || defined(INET)
 2058         tcp_lro_free(&q->lro.ctrl);
 2059 #endif
 2060 
 2061         bzero(q, sizeof(*q));
 2062 }
 2063 
 2064 /**
 2065  *      t3_free_sge_resources - free SGE resources
 2066  *      @sc: the adapter softc
 2067  *
 2068  *      Frees resources used by the SGE queue sets.
 2069  */
 2070 void
 2071 t3_free_sge_resources(adapter_t *sc, int nqsets)
 2072 {
 2073         int i;
 2074 
 2075         for (i = 0; i < nqsets; ++i) {
 2076                 TXQ_LOCK(&sc->sge.qs[i]);
 2077                 t3_free_qset(sc, &sc->sge.qs[i]);
 2078         }
 2079 }
 2080 
 2081 /**
 2082  *      t3_sge_start - enable SGE
 2083  *      @sc: the controller softc
 2084  *
 2085  *      Enables the SGE for DMAs.  This is the last step in starting packet
 2086  *      transfers.
 2087  */
 2088 void
 2089 t3_sge_start(adapter_t *sc)
 2090 {
 2091         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
 2092 }
 2093 
 2094 /**
 2095  *      t3_sge_stop - disable SGE operation
 2096  *      @sc: the adapter
 2097  *
 2098  *      Disables the DMA engine.  This can be called in emeregencies (e.g.,
 2099  *      from error interrupts) or from normal process context.  In the latter
 2100  *      case it also disables any pending queue restart tasklets.  Note that
 2101  *      if it is called in interrupt context it cannot disable the restart
 2102  *      tasklets as it cannot wait, however the tasklets will have no effect
 2103  *      since the doorbells are disabled and the driver will call this again
 2104  *      later from process context, at which time the tasklets will be stopped
 2105  *      if they are still running.
 2106  */
 2107 void
 2108 t3_sge_stop(adapter_t *sc)
 2109 {
 2110         int i, nqsets;
 2111         
 2112         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
 2113 
 2114         if (sc->tq == NULL)
 2115                 return;
 2116         
 2117         for (nqsets = i = 0; i < (sc)->params.nports; i++) 
 2118                 nqsets += sc->port[i].nqsets;
 2119 #ifdef notyet
 2120         /*
 2121          * 
 2122          * XXX
 2123          */
 2124         for (i = 0; i < nqsets; ++i) {
 2125                 struct sge_qset *qs = &sc->sge.qs[i];
 2126                 
 2127                 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2128                 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2129         }
 2130 #endif
 2131 }
 2132 
 2133 /**
 2134  *      t3_free_tx_desc - reclaims Tx descriptors and their buffers
 2135  *      @adapter: the adapter
 2136  *      @q: the Tx queue to reclaim descriptors from
 2137  *      @reclaimable: the number of descriptors to reclaim
 2138  *      @m_vec_size: maximum number of buffers to reclaim
 2139  *      @desc_reclaimed: returns the number of descriptors reclaimed
 2140  *
 2141  *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 2142  *      Tx buffers.  Called with the Tx queue lock held.
 2143  *
 2144  *      Returns number of buffers of reclaimed   
 2145  */
 2146 void
 2147 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
 2148 {
 2149         struct tx_sw_desc *txsd;
 2150         unsigned int cidx, mask;
 2151         struct sge_txq *q = &qs->txq[queue];
 2152 
 2153 #ifdef T3_TRACE
 2154         T3_TRACE2(sc->tb[q->cntxt_id & 7],
 2155                   "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
 2156 #endif
 2157         cidx = q->cidx;
 2158         mask = q->size - 1;
 2159         txsd = &q->sdesc[cidx];
 2160 
 2161         mtx_assert(&qs->lock, MA_OWNED);
 2162         while (reclaimable--) {
 2163                 prefetch(q->sdesc[(cidx + 1) & mask].m);
 2164                 prefetch(q->sdesc[(cidx + 2) & mask].m);
 2165 
 2166                 if (txsd->m != NULL) {
 2167                         if (txsd->flags & TX_SW_DESC_MAPPED) {
 2168                                 bus_dmamap_unload(q->entry_tag, txsd->map);
 2169                                 txsd->flags &= ~TX_SW_DESC_MAPPED;
 2170                         }
 2171                         m_freem_list(txsd->m);
 2172                         txsd->m = NULL;
 2173                 } else
 2174                         q->txq_skipped++;
 2175                 
 2176                 ++txsd;
 2177                 if (++cidx == q->size) {
 2178                         cidx = 0;
 2179                         txsd = q->sdesc;
 2180                 }
 2181         }
 2182         q->cidx = cidx;
 2183 
 2184 }
 2185 
 2186 /**
 2187  *      is_new_response - check if a response is newly written
 2188  *      @r: the response descriptor
 2189  *      @q: the response queue
 2190  *
 2191  *      Returns true if a response descriptor contains a yet unprocessed
 2192  *      response.
 2193  */
 2194 static __inline int
 2195 is_new_response(const struct rsp_desc *r,
 2196     const struct sge_rspq *q)
 2197 {
 2198         return (r->intr_gen & F_RSPD_GEN2) == q->gen;
 2199 }
 2200 
 2201 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
 2202 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
 2203                         V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
 2204                         V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
 2205                         V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
 2206 
 2207 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
 2208 #define NOMEM_INTR_DELAY 2500
 2209 
 2210 #ifdef TCP_OFFLOAD
 2211 /**
 2212  *      write_ofld_wr - write an offload work request
 2213  *      @adap: the adapter
 2214  *      @m: the packet to send
 2215  *      @q: the Tx queue
 2216  *      @pidx: index of the first Tx descriptor to write
 2217  *      @gen: the generation value to use
 2218  *      @ndesc: number of descriptors the packet will occupy
 2219  *
 2220  *      Write an offload work request to send the supplied packet.  The packet
 2221  *      data already carry the work request with most fields populated.
 2222  */
 2223 static void
 2224 write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q,
 2225     unsigned int pidx, unsigned int gen, unsigned int ndesc)
 2226 {
 2227         unsigned int sgl_flits, flits;
 2228         int i, idx, nsegs, wrlen;
 2229         struct work_request_hdr *from;
 2230         struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1];
 2231         struct tx_desc *d = &q->desc[pidx];
 2232         struct txq_state txqs;
 2233         struct sglist_seg *segs;
 2234         struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2235         struct sglist *sgl;
 2236 
 2237         from = (void *)(oh + 1);        /* Start of WR within mbuf */
 2238         wrlen = m->m_len - sizeof(*oh);
 2239 
 2240         if (!(oh->flags & F_HDR_SGL)) {
 2241                 write_imm(d, (caddr_t)from, wrlen, gen);
 2242 
 2243                 /*
 2244                  * mbuf with "real" immediate tx data will be enqueue_wr'd by
 2245                  * t3_push_frames and freed in wr_ack.  Others, like those sent
 2246                  * down by close_conn, t3_send_reset, etc. should be freed here.
 2247                  */
 2248                 if (!(oh->flags & F_HDR_DF))
 2249                         m_free(m);
 2250                 return;
 2251         }
 2252 
 2253         memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from));
 2254 
 2255         sgl = oh->sgl;
 2256         flits = wrlen / 8;
 2257         sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl;
 2258 
 2259         nsegs = sgl->sg_nseg;
 2260         segs = sgl->sg_segs;
 2261         for (idx = 0, i = 0; i < nsegs; i++) {
 2262                 KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__));
 2263                 if (i && idx == 0) 
 2264                         ++sgp;
 2265                 sgp->len[idx] = htobe32(segs[i].ss_len);
 2266                 sgp->addr[idx] = htobe64(segs[i].ss_paddr);
 2267                 idx ^= 1;
 2268         }
 2269         if (idx) {
 2270                 sgp->len[idx] = 0;
 2271                 sgp->addr[idx] = 0;
 2272         }
 2273 
 2274         sgl_flits = sgl_len(nsegs);
 2275         txqs.gen = gen;
 2276         txqs.pidx = pidx;
 2277         txqs.compl = 0;
 2278 
 2279         write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits,
 2280             from->wrh_hi, from->wrh_lo);
 2281 }
 2282 
 2283 /**
 2284  *      ofld_xmit - send a packet through an offload queue
 2285  *      @adap: the adapter
 2286  *      @q: the Tx offload queue
 2287  *      @m: the packet
 2288  *
 2289  *      Send an offload packet through an SGE offload queue.
 2290  */
 2291 static int
 2292 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 2293 {
 2294         int ret;
 2295         unsigned int ndesc;
 2296         unsigned int pidx, gen;
 2297         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2298         struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2299 
 2300         ndesc = G_HDR_NDESC(oh->flags);
 2301 
 2302         TXQ_LOCK(qs);
 2303 again:  reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2304         ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
 2305         if (__predict_false(ret)) {
 2306                 if (ret == 1) {
 2307                         TXQ_UNLOCK(qs);
 2308                         return (EINTR);
 2309                 }
 2310                 goto again;
 2311         }
 2312 
 2313         gen = q->gen;
 2314         q->in_use += ndesc;
 2315         pidx = q->pidx;
 2316         q->pidx += ndesc;
 2317         if (q->pidx >= q->size) {
 2318                 q->pidx -= q->size;
 2319                 q->gen ^= 1;
 2320         }
 2321 
 2322         write_ofld_wr(adap, m, q, pidx, gen, ndesc);
 2323         check_ring_tx_db(adap, q, 1);
 2324         TXQ_UNLOCK(qs);
 2325 
 2326         return (0);
 2327 }
 2328 
 2329 /**
 2330  *      restart_offloadq - restart a suspended offload queue
 2331  *      @qs: the queue set cotaining the offload queue
 2332  *
 2333  *      Resumes transmission on a suspended Tx offload queue.
 2334  */
 2335 static void
 2336 restart_offloadq(void *data, int npending)
 2337 {
 2338         struct mbuf *m;
 2339         struct sge_qset *qs = data;
 2340         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2341         adapter_t *adap = qs->port->adapter;
 2342         int cleaned;
 2343                 
 2344         TXQ_LOCK(qs);
 2345 again:  cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2346 
 2347         while ((m = mbufq_first(&q->sendq)) != NULL) {
 2348                 unsigned int gen, pidx;
 2349                 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2350                 unsigned int ndesc = G_HDR_NDESC(oh->flags);
 2351 
 2352                 if (__predict_false(q->size - q->in_use < ndesc)) {
 2353                         setbit(&qs->txq_stopped, TXQ_OFLD);
 2354                         if (should_restart_tx(q) &&
 2355                             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
 2356                                 goto again;
 2357                         q->stops++;
 2358                         break;
 2359                 }
 2360 
 2361                 gen = q->gen;
 2362                 q->in_use += ndesc;
 2363                 pidx = q->pidx;
 2364                 q->pidx += ndesc;
 2365                 if (q->pidx >= q->size) {
 2366                         q->pidx -= q->size;
 2367                         q->gen ^= 1;
 2368                 }
 2369                 
 2370                 (void)mbufq_dequeue(&q->sendq);
 2371                 TXQ_UNLOCK(qs);
 2372                 write_ofld_wr(adap, m, q, pidx, gen, ndesc);
 2373                 TXQ_LOCK(qs);
 2374         }
 2375 #if USE_GTS
 2376         set_bit(TXQ_RUNNING, &q->flags);
 2377         set_bit(TXQ_LAST_PKT_DB, &q->flags);
 2378 #endif
 2379         TXQ_UNLOCK(qs);
 2380         wmb();
 2381         t3_write_reg(adap, A_SG_KDOORBELL,
 2382                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 2383 }
 2384 
 2385 /**
 2386  *      t3_offload_tx - send an offload packet
 2387  *      @m: the packet
 2388  *
 2389  *      Sends an offload packet.  We use the packet priority to select the
 2390  *      appropriate Tx queue as follows: bit 0 indicates whether the packet
 2391  *      should be sent as regular or control, bits 1-3 select the queue set.
 2392  */
 2393 int
 2394 t3_offload_tx(struct adapter *sc, struct mbuf *m)
 2395 {
 2396         struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2397         struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)];
 2398 
 2399         if (oh->flags & F_HDR_CTRL) {
 2400                 m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */
 2401                 return (ctrl_xmit(sc, qs, m));
 2402         } else
 2403                 return (ofld_xmit(sc, qs, m));
 2404 }
 2405 #endif
 2406 
 2407 static void
 2408 restart_tx(struct sge_qset *qs)
 2409 {
 2410         struct adapter *sc = qs->port->adapter;
 2411 
 2412         if (isset(&qs->txq_stopped, TXQ_OFLD) &&
 2413             should_restart_tx(&qs->txq[TXQ_OFLD]) &&
 2414             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
 2415                 qs->txq[TXQ_OFLD].restarts++;
 2416                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2417         }
 2418 
 2419         if (isset(&qs->txq_stopped, TXQ_CTRL) &&
 2420             should_restart_tx(&qs->txq[TXQ_CTRL]) &&
 2421             test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
 2422                 qs->txq[TXQ_CTRL].restarts++;
 2423                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2424         }
 2425 }
 2426 
 2427 /**
 2428  *      t3_sge_alloc_qset - initialize an SGE queue set
 2429  *      @sc: the controller softc
 2430  *      @id: the queue set id
 2431  *      @nports: how many Ethernet ports will be using this queue set
 2432  *      @irq_vec_idx: the IRQ vector index for response queue interrupts
 2433  *      @p: configuration parameters for this queue set
 2434  *      @ntxq: number of Tx queues for the queue set
 2435  *      @pi: port info for queue set
 2436  *
 2437  *      Allocate resources and initialize an SGE queue set.  A queue set
 2438  *      comprises a response queue, two Rx free-buffer queues, and up to 3
 2439  *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
 2440  *      queue, offload queue, and control queue.
 2441  */
 2442 int
 2443 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
 2444                   const struct qset_params *p, int ntxq, struct port_info *pi)
 2445 {
 2446         struct sge_qset *q = &sc->sge.qs[id];
 2447         int i, ret = 0;
 2448 
 2449         MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
 2450         q->port = pi;
 2451         q->adap = sc;
 2452 
 2453         if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
 2454             M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
 2455                 device_printf(sc->dev, "failed to allocate mbuf ring\n");
 2456                 goto err;
 2457         }
 2458         if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
 2459             M_NOWAIT | M_ZERO)) == NULL) {
 2460                 device_printf(sc->dev, "failed to allocate ifq\n");
 2461                 goto err;
 2462         }
 2463         ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);     
 2464         callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
 2465         callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
 2466         q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
 2467         q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
 2468 
 2469         init_qset_cntxt(q, id);
 2470         q->idx = id;
 2471         if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
 2472                     sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
 2473                     &q->fl[0].desc, &q->fl[0].sdesc,
 2474                     &q->fl[0].desc_tag, &q->fl[0].desc_map,
 2475                     sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
 2476                 printf("error %d from alloc ring fl0\n", ret);
 2477                 goto err;
 2478         }
 2479 
 2480         if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
 2481                     sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
 2482                     &q->fl[1].desc, &q->fl[1].sdesc,
 2483                     &q->fl[1].desc_tag, &q->fl[1].desc_map,
 2484                     sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
 2485                 printf("error %d from alloc ring fl1\n", ret);
 2486                 goto err;
 2487         }
 2488 
 2489         if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
 2490                     &q->rspq.phys_addr, &q->rspq.desc, NULL,
 2491                     &q->rspq.desc_tag, &q->rspq.desc_map,
 2492                     NULL, NULL)) != 0) {
 2493                 printf("error %d from alloc ring rspq\n", ret);
 2494                 goto err;
 2495         }
 2496 
 2497         snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
 2498             device_get_unit(sc->dev), irq_vec_idx);
 2499         MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
 2500 
 2501         for (i = 0; i < ntxq; ++i) {
 2502                 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
 2503 
 2504                 if ((ret = alloc_ring(sc, p->txq_size[i],
 2505                             sizeof(struct tx_desc), sz,
 2506                             &q->txq[i].phys_addr, &q->txq[i].desc,
 2507                             &q->txq[i].sdesc, &q->txq[i].desc_tag,
 2508                             &q->txq[i].desc_map,
 2509                             sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
 2510                         printf("error %d from alloc ring tx %i\n", ret, i);
 2511                         goto err;
 2512                 }
 2513                 mbufq_init(&q->txq[i].sendq, INT_MAX);
 2514                 q->txq[i].gen = 1;
 2515                 q->txq[i].size = p->txq_size[i];
 2516         }
 2517 
 2518 #ifdef TCP_OFFLOAD
 2519         TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
 2520 #endif
 2521         TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
 2522         TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2523         TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2524 
 2525         q->fl[0].gen = q->fl[1].gen = 1;
 2526         q->fl[0].size = p->fl_size;
 2527         q->fl[1].size = p->jumbo_size;
 2528 
 2529         q->rspq.gen = 1;
 2530         q->rspq.cidx = 0;
 2531         q->rspq.size = p->rspq_size;
 2532 
 2533         q->txq[TXQ_ETH].stop_thres = nports *
 2534             flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
 2535 
 2536         q->fl[0].buf_size = MCLBYTES;
 2537         q->fl[0].zone = zone_pack;
 2538         q->fl[0].type = EXT_PACKET;
 2539 
 2540         if (p->jumbo_buf_size ==  MJUM16BYTES) {
 2541                 q->fl[1].zone = zone_jumbo16;
 2542                 q->fl[1].type = EXT_JUMBO16;
 2543         } else if (p->jumbo_buf_size ==  MJUM9BYTES) {
 2544                 q->fl[1].zone = zone_jumbo9;
 2545                 q->fl[1].type = EXT_JUMBO9;             
 2546         } else if (p->jumbo_buf_size ==  MJUMPAGESIZE) {
 2547                 q->fl[1].zone = zone_jumbop;
 2548                 q->fl[1].type = EXT_JUMBOP;
 2549         } else {
 2550                 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
 2551                 ret = EDOOFUS;
 2552                 goto err;
 2553         }
 2554         q->fl[1].buf_size = p->jumbo_buf_size;
 2555 
 2556         /* Allocate and setup the lro_ctrl structure */
 2557         q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
 2558 #if defined(INET6) || defined(INET)
 2559         ret = tcp_lro_init(&q->lro.ctrl);
 2560         if (ret) {
 2561                 printf("error %d from tcp_lro_init\n", ret);
 2562                 goto err;
 2563         }
 2564 #endif
 2565         q->lro.ctrl.ifp = pi->ifp;
 2566 
 2567         mtx_lock_spin(&sc->sge.reg_lock);
 2568         ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
 2569                                    q->rspq.phys_addr, q->rspq.size,
 2570                                    q->fl[0].buf_size, 1, 0);
 2571         if (ret) {
 2572                 printf("error %d from t3_sge_init_rspcntxt\n", ret);
 2573                 goto err_unlock;
 2574         }
 2575 
 2576         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2577                 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
 2578                                           q->fl[i].phys_addr, q->fl[i].size,
 2579                                           q->fl[i].buf_size, p->cong_thres, 1,
 2580                                           0);
 2581                 if (ret) {
 2582                         printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
 2583                         goto err_unlock;
 2584                 }
 2585         }
 2586 
 2587         ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
 2588                                  SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
 2589                                  q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
 2590                                  1, 0);
 2591         if (ret) {
 2592                 printf("error %d from t3_sge_init_ecntxt\n", ret);
 2593                 goto err_unlock;
 2594         }
 2595 
 2596         if (ntxq > 1) {
 2597                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
 2598                                          USE_GTS, SGE_CNTXT_OFLD, id,
 2599                                          q->txq[TXQ_OFLD].phys_addr,
 2600                                          q->txq[TXQ_OFLD].size, 0, 1, 0);
 2601                 if (ret) {
 2602                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2603                         goto err_unlock;
 2604                 }
 2605         }
 2606 
 2607         if (ntxq > 2) {
 2608                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
 2609                                          SGE_CNTXT_CTRL, id,
 2610                                          q->txq[TXQ_CTRL].phys_addr,
 2611                                          q->txq[TXQ_CTRL].size,
 2612                                          q->txq[TXQ_CTRL].token, 1, 0);
 2613                 if (ret) {
 2614                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2615                         goto err_unlock;
 2616                 }
 2617         }
 2618 
 2619         mtx_unlock_spin(&sc->sge.reg_lock);
 2620         t3_update_qset_coalesce(q, p);
 2621 
 2622         refill_fl(sc, &q->fl[0], q->fl[0].size);
 2623         refill_fl(sc, &q->fl[1], q->fl[1].size);
 2624         refill_rspq(sc, &q->rspq, q->rspq.size - 1);
 2625 
 2626         t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
 2627                      V_NEWTIMER(q->rspq.holdoff_tmr));
 2628 
 2629         return (0);
 2630 
 2631 err_unlock:
 2632         mtx_unlock_spin(&sc->sge.reg_lock);
 2633 err:    
 2634         TXQ_LOCK(q);
 2635         t3_free_qset(sc, q);
 2636 
 2637         return (ret);
 2638 }
 2639 
 2640 /*
 2641  * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
 2642  * ethernet data.  Hardware assistance with various checksums and any vlan tag
 2643  * will also be taken into account here.
 2644  */
 2645 void
 2646 t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad)
 2647 {
 2648         struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
 2649         struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
 2650         struct ifnet *ifp = pi->ifp;
 2651         
 2652         if (cpl->vlan_valid) {
 2653                 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
 2654                 m->m_flags |= M_VLANTAG;
 2655         } 
 2656 
 2657         m->m_pkthdr.rcvif = ifp;
 2658         /*
 2659          * adjust after conversion to mbuf chain
 2660          */
 2661         m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
 2662         m->m_len -= (sizeof(*cpl) + ethpad);
 2663         m->m_data += (sizeof(*cpl) + ethpad);
 2664 
 2665         if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) {
 2666                 struct ether_header *eh = mtod(m, void *);
 2667                 uint16_t eh_type;
 2668 
 2669                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 2670                         struct ether_vlan_header *evh = mtod(m, void *);
 2671 
 2672                         eh_type = evh->evl_proto;
 2673                 } else
 2674                         eh_type = eh->ether_type;
 2675 
 2676                 if (ifp->if_capenable & IFCAP_RXCSUM &&
 2677                     eh_type == htons(ETHERTYPE_IP)) {
 2678                         m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
 2679                             CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 2680                         m->m_pkthdr.csum_data = 0xffff;
 2681                 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
 2682                     eh_type == htons(ETHERTYPE_IPV6)) {
 2683                         m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
 2684                             CSUM_PSEUDO_HDR);
 2685                         m->m_pkthdr.csum_data = 0xffff;
 2686                 }
 2687         }
 2688 }
 2689 
 2690 /**
 2691  *      get_packet - return the next ingress packet buffer from a free list
 2692  *      @adap: the adapter that received the packet
 2693  *      @drop_thres: # of remaining buffers before we start dropping packets
 2694  *      @qs: the qset that the SGE free list holding the packet belongs to
 2695  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
 2696  *      @r: response descriptor 
 2697  *
 2698  *      Get the next packet from a free list and complete setup of the
 2699  *      sk_buff.  If the packet is small we make a copy and recycle the
 2700  *      original buffer, otherwise we use the original buffer itself.  If a
 2701  *      positive drop threshold is supplied packets are dropped and their
 2702  *      buffers recycled if (a) the number of remaining buffers is under the
 2703  *      threshold and the packet is too big to copy, or (b) the packet should
 2704  *      be copied but there is no memory for the copy.
 2705  */
 2706 static int
 2707 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
 2708     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
 2709 {
 2710 
 2711         unsigned int len_cq =  ntohl(r->len_cq);
 2712         struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
 2713         int mask, cidx = fl->cidx;
 2714         struct rx_sw_desc *sd = &fl->sdesc[cidx];
 2715         uint32_t len = G_RSPD_LEN(len_cq);
 2716         uint32_t flags = M_EXT;
 2717         uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
 2718         caddr_t cl;
 2719         struct mbuf *m;
 2720         int ret = 0;
 2721 
 2722         mask = fl->size - 1;
 2723         prefetch(fl->sdesc[(cidx + 1) & mask].m);
 2724         prefetch(fl->sdesc[(cidx + 2) & mask].m);
 2725         prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
 2726         prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 
 2727 
 2728         fl->credits--;
 2729         bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
 2730         
 2731         if (recycle_enable && len <= SGE_RX_COPY_THRES &&
 2732             sopeop == RSPQ_SOP_EOP) {
 2733                 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
 2734                         goto skip_recycle;
 2735                 cl = mtod(m, void *);
 2736                 memcpy(cl, sd->rxsd_cl, len);
 2737                 recycle_rx_buf(adap, fl, fl->cidx);
 2738                 m->m_pkthdr.len = m->m_len = len;
 2739                 m->m_flags = 0;
 2740                 mh->mh_head = mh->mh_tail = m;
 2741                 ret = 1;
 2742                 goto done;
 2743         } else {
 2744         skip_recycle:
 2745                 bus_dmamap_unload(fl->entry_tag, sd->map);
 2746                 cl = sd->rxsd_cl;
 2747                 m = sd->m;
 2748 
 2749                 if ((sopeop == RSPQ_SOP_EOP) ||
 2750                     (sopeop == RSPQ_SOP))
 2751                         flags |= M_PKTHDR;
 2752                 m_init(m, M_NOWAIT, MT_DATA, flags);
 2753                 if (fl->zone == zone_pack) {
 2754                         /*
 2755                          * restore clobbered data pointer
 2756                          */
 2757                         m->m_data = m->m_ext.ext_buf;
 2758                 } else {
 2759                         m_cljset(m, cl, fl->type);
 2760                 }
 2761                 m->m_len = len;
 2762         }               
 2763         switch(sopeop) {
 2764         case RSPQ_SOP_EOP:
 2765                 ret = 1;
 2766                 /* FALLTHROUGH */
 2767         case RSPQ_SOP:
 2768                 mh->mh_head = mh->mh_tail = m;
 2769                 m->m_pkthdr.len = len;
 2770                 break;
 2771         case RSPQ_EOP:
 2772                 ret = 1;
 2773                 /* FALLTHROUGH */
 2774         case RSPQ_NSOP_NEOP:
 2775                 if (mh->mh_tail == NULL) {
 2776                         log(LOG_ERR, "discarding intermediate descriptor entry\n");
 2777                         m_freem(m);
 2778                         break;
 2779                 }
 2780                 mh->mh_tail->m_next = m;
 2781                 mh->mh_tail = m;
 2782                 mh->mh_head->m_pkthdr.len += len;
 2783                 break;
 2784         }
 2785         if (cxgb_debug)
 2786                 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
 2787 done:
 2788         if (++fl->cidx == fl->size)
 2789                 fl->cidx = 0;
 2790 
 2791         return (ret);
 2792 }
 2793 
 2794 /**
 2795  *      handle_rsp_cntrl_info - handles control information in a response
 2796  *      @qs: the queue set corresponding to the response
 2797  *      @flags: the response control flags
 2798  *
 2799  *      Handles the control information of an SGE response, such as GTS
 2800  *      indications and completion credits for the queue set's Tx queues.
 2801  *      HW coalesces credits, we don't do any extra SW coalescing.
 2802  */
 2803 static __inline void
 2804 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
 2805 {
 2806         unsigned int credits;
 2807 
 2808 #if USE_GTS
 2809         if (flags & F_RSPD_TXQ0_GTS)
 2810                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
 2811 #endif
 2812         credits = G_RSPD_TXQ0_CR(flags);
 2813         if (credits) 
 2814                 qs->txq[TXQ_ETH].processed += credits;
 2815 
 2816         credits = G_RSPD_TXQ2_CR(flags);
 2817         if (credits)
 2818                 qs->txq[TXQ_CTRL].processed += credits;
 2819 
 2820 # if USE_GTS
 2821         if (flags & F_RSPD_TXQ1_GTS)
 2822                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
 2823 # endif
 2824         credits = G_RSPD_TXQ1_CR(flags);
 2825         if (credits)
 2826                 qs->txq[TXQ_OFLD].processed += credits;
 2827 
 2828 }
 2829 
 2830 static void
 2831 check_ring_db(adapter_t *adap, struct sge_qset *qs,
 2832     unsigned int sleeping)
 2833 {
 2834         ;
 2835 }
 2836 
 2837 /**
 2838  *      process_responses - process responses from an SGE response queue
 2839  *      @adap: the adapter
 2840  *      @qs: the queue set to which the response queue belongs
 2841  *      @budget: how many responses can be processed in this round
 2842  *
 2843  *      Process responses from an SGE response queue up to the supplied budget.
 2844  *      Responses include received packets as well as credits and other events
 2845  *      for the queues that belong to the response queue's queue set.
 2846  *      A negative budget is effectively unlimited.
 2847  *
 2848  *      Additionally choose the interrupt holdoff time for the next interrupt
 2849  *      on this queue.  If the system is under memory shortage use a fairly
 2850  *      long delay to help recovery.
 2851  */
 2852 static int
 2853 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
 2854 {
 2855         struct sge_rspq *rspq = &qs->rspq;
 2856         struct rsp_desc *r = &rspq->desc[rspq->cidx];
 2857         int budget_left = budget;
 2858         unsigned int sleeping = 0;
 2859 #if defined(INET6) || defined(INET)
 2860         int lro_enabled = qs->lro.enabled;
 2861         int skip_lro;
 2862         struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
 2863 #endif
 2864         struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
 2865 #ifdef DEBUG    
 2866         static int last_holdoff = 0;
 2867         if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
 2868                 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
 2869                 last_holdoff = rspq->holdoff_tmr;
 2870         }
 2871 #endif
 2872         rspq->next_holdoff = rspq->holdoff_tmr;
 2873 
 2874         while (__predict_true(budget_left && is_new_response(r, rspq))) {
 2875                 int eth, eop = 0, ethpad = 0;
 2876                 uint32_t flags = ntohl(r->flags);
 2877                 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
 2878                 uint8_t opcode = r->rss_hdr.opcode;
 2879                 
 2880                 eth = (opcode == CPL_RX_PKT);
 2881                 
 2882                 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
 2883                         struct mbuf *m;
 2884 
 2885                         if (cxgb_debug)
 2886                                 printf("async notification\n");
 2887 
 2888                         if (mh->mh_head == NULL) {
 2889                                 mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA);
 2890                                 m = mh->mh_head;
 2891                         } else {
 2892                                 m = m_gethdr(M_NOWAIT, MT_DATA);
 2893                         }
 2894                         if (m == NULL)
 2895                                 goto no_mem;
 2896 
 2897                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
 2898                         m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
 2899                         *mtod(m, uint8_t *) = CPL_ASYNC_NOTIF;
 2900                         opcode = CPL_ASYNC_NOTIF;
 2901                         eop = 1;
 2902                         rspq->async_notif++;
 2903                         goto skip;
 2904                 } else if  (flags & F_RSPD_IMM_DATA_VALID) {
 2905                         struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
 2906 
 2907                         if (m == NULL) {        
 2908                 no_mem:
 2909                                 rspq->next_holdoff = NOMEM_INTR_DELAY;
 2910                                 budget_left--;
 2911                                 break;
 2912                         }
 2913                         if (mh->mh_head == NULL)
 2914                                 mh->mh_head = m;
 2915                         else 
 2916                                 mh->mh_tail->m_next = m;
 2917                         mh->mh_tail = m;
 2918 
 2919                         get_imm_packet(adap, r, m);
 2920                         mh->mh_head->m_pkthdr.len += m->m_len;
 2921                         eop = 1;
 2922                         rspq->imm_data++;
 2923                 } else if (r->len_cq) {
 2924                         int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
 2925                         
 2926                         eop = get_packet(adap, drop_thresh, qs, mh, r);
 2927                         if (eop) {
 2928                                 if (r->rss_hdr.hash_type && !adap->timestamp) {
 2929                                         M_HASHTYPE_SET(mh->mh_head,
 2930                                             M_HASHTYPE_OPAQUE_HASH);
 2931                                         mh->mh_head->m_pkthdr.flowid = rss_hash;
 2932                                 }
 2933                         }
 2934                         
 2935                         ethpad = 2;
 2936                 } else {
 2937                         rspq->pure_rsps++;
 2938                 }
 2939         skip:
 2940                 if (flags & RSPD_CTRL_MASK) {
 2941                         sleeping |= flags & RSPD_GTS_MASK;
 2942                         handle_rsp_cntrl_info(qs, flags);
 2943                 }
 2944 
 2945                 if (!eth && eop) {
 2946                         rspq->offload_pkts++;
 2947 #ifdef TCP_OFFLOAD
 2948                         adap->cpl_handler[opcode](qs, r, mh->mh_head);
 2949 #else
 2950                         m_freem(mh->mh_head);
 2951 #endif
 2952                         mh->mh_head = NULL;
 2953                 } else if (eth && eop) {
 2954                         struct mbuf *m = mh->mh_head;
 2955 
 2956                         t3_rx_eth(adap, m, ethpad);
 2957 
 2958                         /*
 2959                          * The T304 sends incoming packets on any qset.  If LRO
 2960                          * is also enabled, we could end up sending packet up
 2961                          * lro_ctrl->ifp's input.  That is incorrect.
 2962                          *
 2963                          * The mbuf's rcvif was derived from the cpl header and
 2964                          * is accurate.  Skip LRO and just use that.
 2965                          */
 2966 #if defined(INET6) || defined(INET)
 2967                         skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
 2968 
 2969                         if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
 2970                             && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
 2971                             ) {
 2972                                 /* successfully queue'd for LRO */
 2973                         } else
 2974 #endif
 2975                         {
 2976                                 /*
 2977                                  * LRO not enabled, packet unsuitable for LRO,
 2978                                  * or unable to queue.  Pass it up right now in
 2979                                  * either case.
 2980                                  */
 2981                                 struct ifnet *ifp = m->m_pkthdr.rcvif;
 2982                                 (*ifp->if_input)(ifp, m);
 2983                         }
 2984                         mh->mh_head = NULL;
 2985 
 2986                 }
 2987 
 2988                 r++;
 2989                 if (__predict_false(++rspq->cidx == rspq->size)) {
 2990                         rspq->cidx = 0;
 2991                         rspq->gen ^= 1;
 2992                         r = rspq->desc;
 2993                 }
 2994 
 2995                 if (++rspq->credits >= 64) {
 2996                         refill_rspq(adap, rspq, rspq->credits);
 2997                         rspq->credits = 0;
 2998                 }
 2999                 __refill_fl_lt(adap, &qs->fl[0], 32);
 3000                 __refill_fl_lt(adap, &qs->fl[1], 32);
 3001                 --budget_left;
 3002         }
 3003 
 3004 #if defined(INET6) || defined(INET)
 3005         /* Flush LRO */
 3006         tcp_lro_flush_all(lro_ctrl);
 3007 #endif
 3008 
 3009         if (sleeping)
 3010                 check_ring_db(adap, qs, sleeping);
 3011 
 3012         mb();  /* commit Tx queue processed updates */
 3013         if (__predict_false(qs->txq_stopped > 1))
 3014                 restart_tx(qs);
 3015 
 3016         __refill_fl_lt(adap, &qs->fl[0], 512);
 3017         __refill_fl_lt(adap, &qs->fl[1], 512);
 3018         budget -= budget_left;
 3019         return (budget);
 3020 }
 3021 
 3022 /*
 3023  * A helper function that processes responses and issues GTS.
 3024  */
 3025 static __inline int
 3026 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
 3027 {
 3028         int work;
 3029         static int last_holdoff = 0;
 3030         
 3031         work = process_responses(adap, rspq_to_qset(rq), -1);
 3032 
 3033         if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
 3034                 printf("next_holdoff=%d\n", rq->next_holdoff);
 3035                 last_holdoff = rq->next_holdoff;
 3036         }
 3037         t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
 3038             V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
 3039         
 3040         return (work);
 3041 }
 3042 
 3043 #ifdef NETDUMP
 3044 int
 3045 cxgb_netdump_poll_rx(adapter_t *adap, struct sge_qset *qs)
 3046 {
 3047 
 3048         return (process_responses_gts(adap, &qs->rspq));
 3049 }
 3050 #endif
 3051 
 3052 /*
 3053  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
 3054  * Handles data events from SGE response queues as well as error and other
 3055  * async events as they all use the same interrupt pin.  We use one SGE
 3056  * response queue per port in this mode and protect all response queues with
 3057  * queue 0's lock.
 3058  */
 3059 void
 3060 t3b_intr(void *data)
 3061 {
 3062         uint32_t i, map;
 3063         adapter_t *adap = data;
 3064         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3065         
 3066         t3_write_reg(adap, A_PL_CLI, 0);
 3067         map = t3_read_reg(adap, A_SG_DATA_INTR);
 3068 
 3069         if (!map) 
 3070                 return;
 3071 
 3072         if (__predict_false(map & F_ERRINTR)) {
 3073                 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
 3074                 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
 3075                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3076         }
 3077 
 3078         mtx_lock(&q0->lock);
 3079         for_each_port(adap, i)
 3080             if (map & (1 << i))
 3081                         process_responses_gts(adap, &adap->sge.qs[i].rspq);
 3082         mtx_unlock(&q0->lock);
 3083 }
 3084 
 3085 /*
 3086  * The MSI interrupt handler.  This needs to handle data events from SGE
 3087  * response queues as well as error and other async events as they all use
 3088  * the same MSI vector.  We use one SGE response queue per port in this mode
 3089  * and protect all response queues with queue 0's lock.
 3090  */
 3091 void
 3092 t3_intr_msi(void *data)
 3093 {
 3094         adapter_t *adap = data;
 3095         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3096         int i, new_packets = 0;
 3097 
 3098         mtx_lock(&q0->lock);
 3099 
 3100         for_each_port(adap, i)
 3101             if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 
 3102                     new_packets = 1;
 3103         mtx_unlock(&q0->lock);
 3104         if (new_packets == 0) {
 3105                 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
 3106                 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
 3107                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3108         }
 3109 }
 3110 
 3111 void
 3112 t3_intr_msix(void *data)
 3113 {
 3114         struct sge_qset *qs = data;
 3115         adapter_t *adap = qs->port->adapter;
 3116         struct sge_rspq *rspq = &qs->rspq;
 3117 
 3118         if (process_responses_gts(adap, rspq) == 0)
 3119                 rspq->unhandled_irqs++;
 3120 }
 3121 
 3122 #define QDUMP_SBUF_SIZE         32 * 400
 3123 static int
 3124 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
 3125 {
 3126         struct sge_rspq *rspq;
 3127         struct sge_qset *qs;
 3128         int i, err, dump_end, idx;
 3129         struct sbuf *sb;
 3130         struct rsp_desc *rspd;
 3131         uint32_t data[4];
 3132         
 3133         rspq = arg1;
 3134         qs = rspq_to_qset(rspq);
 3135         if (rspq->rspq_dump_count == 0) 
 3136                 return (0);
 3137         if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
 3138                 log(LOG_WARNING,
 3139                     "dump count is too large %d\n", rspq->rspq_dump_count);
 3140                 rspq->rspq_dump_count = 0;
 3141                 return (EINVAL);
 3142         }
 3143         if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
 3144                 log(LOG_WARNING,
 3145                     "dump start of %d is greater than queue size\n",
 3146                     rspq->rspq_dump_start);
 3147                 rspq->rspq_dump_start = 0;
 3148                 return (EINVAL);
 3149         }
 3150         err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
 3151         if (err)
 3152                 return (err);
 3153         err = sysctl_wire_old_buffer(req, 0);
 3154         if (err)
 3155                 return (err);
 3156         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3157 
 3158         sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
 3159             (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
 3160             ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
 3161         sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
 3162             ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
 3163         
 3164         sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
 3165             (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
 3166         
 3167         dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
 3168         for (i = rspq->rspq_dump_start; i < dump_end; i++) {
 3169                 idx = i & (RSPQ_Q_SIZE-1);
 3170                 
 3171                 rspd = &rspq->desc[idx];
 3172                 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
 3173                     idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
 3174                     rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
 3175                 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
 3176                     rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
 3177                     be32toh(rspd->len_cq), rspd->intr_gen);
 3178         }
 3179 
 3180         err = sbuf_finish(sb);
 3181         sbuf_delete(sb);
 3182         return (err);
 3183 }       
 3184 
 3185 static int
 3186 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
 3187 {
 3188         struct sge_txq *txq;
 3189         struct sge_qset *qs;
 3190         int i, j, err, dump_end;
 3191         struct sbuf *sb;
 3192         struct tx_desc *txd;
 3193         uint32_t *WR, wr_hi, wr_lo, gen;
 3194         uint32_t data[4];
 3195         
 3196         txq = arg1;
 3197         qs = txq_to_qset(txq, TXQ_ETH);
 3198         if (txq->txq_dump_count == 0) {
 3199                 return (0);
 3200         }
 3201         if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
 3202                 log(LOG_WARNING,
 3203                     "dump count is too large %d\n", txq->txq_dump_count);
 3204                 txq->txq_dump_count = 1;
 3205                 return (EINVAL);
 3206         }
 3207         if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
 3208                 log(LOG_WARNING,
 3209                     "dump start of %d is greater than queue size\n",
 3210                     txq->txq_dump_start);
 3211                 txq->txq_dump_start = 0;
 3212                 return (EINVAL);
 3213         }
 3214         err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
 3215         if (err)
 3216                 return (err);
 3217         err = sysctl_wire_old_buffer(req, 0);
 3218         if (err)
 3219                 return (err);
 3220         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3221 
 3222         sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
 3223             (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 
 3224             (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
 3225         sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
 3226             ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
 3227             ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
 3228         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3229             txq->txq_dump_start,
 3230             (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
 3231 
 3232         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3233         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3234                 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
 3235                 WR = (uint32_t *)txd->flit;
 3236                 wr_hi = ntohl(WR[0]);
 3237                 wr_lo = ntohl(WR[1]);           
 3238                 gen = G_WR_GEN(wr_lo);
 3239                 
 3240                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3241                     wr_hi, wr_lo, gen);
 3242                 for (j = 2; j < 30; j += 4) 
 3243                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3244                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3245 
 3246         }
 3247         err = sbuf_finish(sb);
 3248         sbuf_delete(sb);
 3249         return (err);
 3250 }
 3251 
 3252 static int
 3253 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
 3254 {
 3255         struct sge_txq *txq;
 3256         struct sge_qset *qs;
 3257         int i, j, err, dump_end;
 3258         struct sbuf *sb;
 3259         struct tx_desc *txd;
 3260         uint32_t *WR, wr_hi, wr_lo, gen;
 3261         
 3262         txq = arg1;
 3263         qs = txq_to_qset(txq, TXQ_CTRL);
 3264         if (txq->txq_dump_count == 0) {
 3265                 return (0);
 3266         }
 3267         if (txq->txq_dump_count > 256) {
 3268                 log(LOG_WARNING,
 3269                     "dump count is too large %d\n", txq->txq_dump_count);
 3270                 txq->txq_dump_count = 1;
 3271                 return (EINVAL);
 3272         }
 3273         if (txq->txq_dump_start > 255) {
 3274                 log(LOG_WARNING,
 3275                     "dump start of %d is greater than queue size\n",
 3276                     txq->txq_dump_start);
 3277                 txq->txq_dump_start = 0;
 3278                 return (EINVAL);
 3279         }
 3280 
 3281         err = sysctl_wire_old_buffer(req, 0);
 3282         if (err != 0)
 3283                 return (err);
 3284         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3285         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3286             txq->txq_dump_start,
 3287             (txq->txq_dump_start + txq->txq_dump_count) & 255);
 3288 
 3289         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3290         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3291                 txd = &txq->desc[i & (255)];
 3292                 WR = (uint32_t *)txd->flit;
 3293                 wr_hi = ntohl(WR[0]);
 3294                 wr_lo = ntohl(WR[1]);           
 3295                 gen = G_WR_GEN(wr_lo);
 3296                 
 3297                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3298                     wr_hi, wr_lo, gen);
 3299                 for (j = 2; j < 30; j += 4) 
 3300                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3301                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3302 
 3303         }
 3304         err = sbuf_finish(sb);
 3305         sbuf_delete(sb);
 3306         return (err);
 3307 }
 3308 
 3309 static int
 3310 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
 3311 {
 3312         adapter_t *sc = arg1;
 3313         struct qset_params *qsp = &sc->params.sge.qset[0]; 
 3314         int coalesce_usecs;     
 3315         struct sge_qset *qs;
 3316         int i, j, err, nqsets = 0;
 3317         struct mtx *lock;
 3318 
 3319         if ((sc->flags & FULL_INIT_DONE) == 0)
 3320                 return (ENXIO);
 3321                 
 3322         coalesce_usecs = qsp->coalesce_usecs;
 3323         err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
 3324 
 3325         if (err != 0) {
 3326                 return (err);
 3327         }
 3328         if (coalesce_usecs == qsp->coalesce_usecs)
 3329                 return (0);
 3330 
 3331         for (i = 0; i < sc->params.nports; i++) 
 3332                 for (j = 0; j < sc->port[i].nqsets; j++)
 3333                         nqsets++;
 3334 
 3335         coalesce_usecs = max(1, coalesce_usecs);
 3336 
 3337         for (i = 0; i < nqsets; i++) {
 3338                 qs = &sc->sge.qs[i];
 3339                 qsp = &sc->params.sge.qset[i];
 3340                 qsp->coalesce_usecs = coalesce_usecs;
 3341                 
 3342                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 3343                             &sc->sge.qs[0].rspq.lock;
 3344 
 3345                 mtx_lock(lock);
 3346                 t3_update_qset_coalesce(qs, qsp);
 3347                 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
 3348                     V_NEWTIMER(qs->rspq.holdoff_tmr));
 3349                 mtx_unlock(lock);
 3350         }
 3351 
 3352         return (0);
 3353 }
 3354 
 3355 static int
 3356 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)
 3357 {
 3358         adapter_t *sc = arg1;
 3359         int rc, timestamp;
 3360 
 3361         if ((sc->flags & FULL_INIT_DONE) == 0)
 3362                 return (ENXIO);
 3363 
 3364         timestamp = sc->timestamp;
 3365         rc = sysctl_handle_int(oidp, &timestamp, arg2, req);
 3366 
 3367         if (rc != 0)
 3368                 return (rc);
 3369 
 3370         if (timestamp != sc->timestamp) {
 3371                 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS,
 3372                     timestamp ? F_ENABLERXPKTTMSTPRSS : 0);
 3373                 sc->timestamp = timestamp;
 3374         }
 3375 
 3376         return (0);
 3377 }
 3378 
 3379 void
 3380 t3_add_attach_sysctls(adapter_t *sc)
 3381 {
 3382         struct sysctl_ctx_list *ctx;
 3383         struct sysctl_oid_list *children;
 3384 
 3385         ctx = device_get_sysctl_ctx(sc->dev);
 3386         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3387 
 3388         /* random information */
 3389         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3390             "firmware_version",
 3391             CTLFLAG_RD, sc->fw_version,
 3392             0, "firmware version");
 3393         SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
 3394             "hw_revision",
 3395             CTLFLAG_RD, &sc->params.rev,
 3396             0, "chip model");
 3397         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3398             "port_types",
 3399             CTLFLAG_RD, sc->port_types,
 3400             0, "type of ports");
 3401         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3402             "enable_debug",
 3403             CTLFLAG_RW, &cxgb_debug,
 3404             0, "enable verbose debugging output");
 3405         SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce",
 3406             CTLFLAG_RD, &sc->tunq_coalesce,
 3407             "#tunneled packets freed");
 3408         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3409             "txq_overrun",
 3410             CTLFLAG_RD, &txq_fills,
 3411             0, "#times txq overrun");
 3412         SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
 3413             "core_clock",
 3414             CTLFLAG_RD, &sc->params.vpd.cclk,
 3415             0, "core clock frequency (in KHz)");
 3416 }
 3417 
 3418 
 3419 static const char *rspq_name = "rspq";
 3420 static const char *txq_names[] =
 3421 {
 3422         "txq_eth",
 3423         "txq_ofld",
 3424         "txq_ctrl"      
 3425 };
 3426 
 3427 static int
 3428 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
 3429 {
 3430         struct port_info *p = arg1;
 3431         uint64_t *parg;
 3432 
 3433         if (!p)
 3434                 return (EINVAL);
 3435 
 3436         cxgb_refresh_stats(p);
 3437         parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
 3438 
 3439         return (sysctl_handle_64(oidp, parg, 0, req));
 3440 }
 3441 
 3442 void
 3443 t3_add_configured_sysctls(adapter_t *sc)
 3444 {
 3445         struct sysctl_ctx_list *ctx;
 3446         struct sysctl_oid_list *children;
 3447         int i, j;
 3448         
 3449         ctx = device_get_sysctl_ctx(sc->dev);
 3450         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3451 
 3452         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3453             "intr_coal",
 3454             CTLTYPE_INT|CTLFLAG_RW, sc,
 3455             0, t3_set_coalesce_usecs,
 3456             "I", "interrupt coalescing timer (us)");
 3457 
 3458         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3459             "pkt_timestamp",
 3460             CTLTYPE_INT | CTLFLAG_RW, sc,
 3461             0, t3_pkt_timestamp,
 3462             "I", "provide packet timestamp instead of connection hash");
 3463 
 3464         for (i = 0; i < sc->params.nports; i++) {
 3465                 struct port_info *pi = &sc->port[i];
 3466                 struct sysctl_oid *poid;
 3467                 struct sysctl_oid_list *poidlist;
 3468                 struct mac_stats *mstats = &pi->mac.stats;
 3469                 
 3470                 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
 3471                 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 
 3472                     pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
 3473                 poidlist = SYSCTL_CHILDREN(poid);
 3474                 SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO,
 3475                     "nqsets", CTLFLAG_RD, &pi->nqsets,
 3476                     0, "#queue sets");
 3477 
 3478                 for (j = 0; j < pi->nqsets; j++) {
 3479                         struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
 3480                         struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
 3481                                           *ctrlqpoid, *lropoid;
 3482                         struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
 3483                                                *txqpoidlist, *ctrlqpoidlist,
 3484                                                *lropoidlist;
 3485                         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 3486                         
 3487                         snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
 3488                         
 3489                         qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 
 3490                             qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
 3491                         qspoidlist = SYSCTL_CHILDREN(qspoid);
 3492 
 3493                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
 3494                                         CTLFLAG_RD, &qs->fl[0].empty, 0,
 3495                                         "freelist #0 empty");
 3496                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
 3497                                         CTLFLAG_RD, &qs->fl[1].empty, 0,
 3498                                         "freelist #1 empty");
 3499 
 3500                         rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3501                             rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
 3502                         rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
 3503 
 3504                         txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3505                             txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
 3506                         txqpoidlist = SYSCTL_CHILDREN(txqpoid);
 3507 
 3508                         ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3509                             txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
 3510                         ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
 3511 
 3512                         lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3513                             "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
 3514                         lropoidlist = SYSCTL_CHILDREN(lropoid);
 3515 
 3516                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
 3517                             CTLFLAG_RD, &qs->rspq.size,
 3518                             0, "#entries in response queue");
 3519                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
 3520                             CTLFLAG_RD, &qs->rspq.cidx,
 3521                             0, "consumer index");
 3522                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
 3523                             CTLFLAG_RD, &qs->rspq.credits,
 3524                             0, "#credits");
 3525                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
 3526                             CTLFLAG_RD, &qs->rspq.starved,
 3527                             0, "#times starved");
 3528                         SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
 3529                             CTLFLAG_RD, &qs->rspq.phys_addr,
 3530                             "physical_address_of the queue");
 3531                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
 3532                             CTLFLAG_RW, &qs->rspq.rspq_dump_start,
 3533                             0, "start rspq dump entry");
 3534                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
 3535                             CTLFLAG_RW, &qs->rspq.rspq_dump_count,
 3536                             0, "#rspq entries to dump");
 3537                         SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
 3538                             CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
 3539                             0, t3_dump_rspq, "A", "dump of the response queue");
 3540 
 3541                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
 3542                             CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
 3543                             "#tunneled packets dropped");
 3544                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
 3545                             CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.mq_len,
 3546                             0, "#tunneled packets waiting to be sent");
 3547 #if 0                   
 3548                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
 3549                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
 3550                             0, "#tunneled packets queue producer index");
 3551                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
 3552                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
 3553                             0, "#tunneled packets queue consumer index");
 3554 #endif                  
 3555                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed",
 3556                             CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
 3557                             0, "#tunneled packets processed by the card");
 3558                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
 3559                             CTLFLAG_RD, &txq->cleaned,
 3560                             0, "#tunneled packets cleaned");
 3561                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
 3562                             CTLFLAG_RD, &txq->in_use,
 3563                             0, "#tunneled packet slots in use");
 3564                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees",
 3565                             CTLFLAG_RD, &txq->txq_frees,
 3566                             "#tunneled packets freed");
 3567                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
 3568                             CTLFLAG_RD, &txq->txq_skipped,
 3569                             0, "#tunneled packet descriptors skipped");
 3570                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
 3571                             CTLFLAG_RD, &txq->txq_coalesced,
 3572                             "#tunneled packets coalesced");
 3573                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
 3574                             CTLFLAG_RD, &txq->txq_enqueued,
 3575                             0, "#tunneled packets enqueued to hardware");
 3576                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
 3577                             CTLFLAG_RD, &qs->txq_stopped,
 3578                             0, "tx queues stopped");
 3579                         SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr",
 3580                             CTLFLAG_RD, &txq->phys_addr,
 3581                             "physical_address_of the queue");
 3582                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
 3583                             CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
 3584                             0, "txq generation");
 3585                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
 3586                             CTLFLAG_RD, &txq->cidx,
 3587                             0, "hardware queue cidx");                  
 3588                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
 3589                             CTLFLAG_RD, &txq->pidx,
 3590                             0, "hardware queue pidx");
 3591                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
 3592                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
 3593                             0, "txq start idx for dump");
 3594                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
 3595                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
 3596                             0, "txq #entries to dump");                 
 3597                         SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
 3598                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
 3599                             0, t3_dump_txq_eth, "A", "dump of the transmit queue");
 3600 
 3601                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
 3602                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
 3603                             0, "ctrlq start idx for dump");
 3604                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
 3605                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
 3606                             0, "ctrl #entries to dump");                        
 3607                         SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
 3608                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
 3609                             0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
 3610 
 3611                         SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_queued",
 3612                             CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
 3613                         SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_flushed",
 3614                             CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
 3615                         SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
 3616                             CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
 3617                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
 3618                             CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
 3619                 }
 3620 
 3621                 /* Now add a node for mac stats. */
 3622                 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
 3623                     CTLFLAG_RD, NULL, "MAC statistics");
 3624                 poidlist = SYSCTL_CHILDREN(poid);
 3625 
 3626                 /*
 3627                  * We (ab)use the length argument (arg2) to pass on the offset
 3628                  * of the data that we are interested in.  This is only required
 3629                  * for the quad counters that are updated from the hardware (we
 3630                  * make sure that we return the latest value).
 3631                  * sysctl_handle_macstat first updates *all* the counters from
 3632                  * the hardware, and then returns the latest value of the
 3633                  * requested counter.  Best would be to update only the
 3634                  * requested counter from hardware, but t3_mac_update_stats()
 3635                  * hides all the register details and we don't want to dive into
 3636                  * all that here.
 3637                  */
 3638 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
 3639     (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
 3640     sysctl_handle_macstat, "QU", 0)
 3641                 CXGB_SYSCTL_ADD_QUAD(tx_octets);
 3642                 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
 3643                 CXGB_SYSCTL_ADD_QUAD(tx_frames);
 3644                 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
 3645                 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
 3646                 CXGB_SYSCTL_ADD_QUAD(tx_pause);
 3647                 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
 3648                 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
 3649                 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
 3650                 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
 3651                 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
 3652                 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
 3653                 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
 3654                 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
 3655                 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
 3656                 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
 3657                 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
 3658                 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
 3659                 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
 3660                 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
 3661                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
 3662                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
 3663                 CXGB_SYSCTL_ADD_QUAD(rx_octets);
 3664                 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
 3665                 CXGB_SYSCTL_ADD_QUAD(rx_frames);
 3666                 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
 3667                 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
 3668                 CXGB_SYSCTL_ADD_QUAD(rx_pause);
 3669                 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
 3670                 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
 3671                 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
 3672                 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
 3673                 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
 3674                 CXGB_SYSCTL_ADD_QUAD(rx_runt);
 3675                 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
 3676                 CXGB_SYSCTL_ADD_QUAD(rx_short);
 3677                 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
 3678                 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
 3679                 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
 3680                 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
 3681                 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
 3682                 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
 3683                 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
 3684                 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
 3685                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
 3686                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
 3687 #undef CXGB_SYSCTL_ADD_QUAD
 3688 
 3689 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
 3690     CTLFLAG_RD, &mstats->a, 0)
 3691                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
 3692                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
 3693                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
 3694                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
 3695                 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
 3696                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
 3697                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
 3698                 CXGB_SYSCTL_ADD_ULONG(num_toggled);
 3699                 CXGB_SYSCTL_ADD_ULONG(num_resets);
 3700                 CXGB_SYSCTL_ADD_ULONG(link_faults);
 3701 #undef CXGB_SYSCTL_ADD_ULONG
 3702         }
 3703 }
 3704         
 3705 /**
 3706  *      t3_get_desc - dump an SGE descriptor for debugging purposes
 3707  *      @qs: the queue set
 3708  *      @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
 3709  *      @idx: the descriptor index in the queue
 3710  *      @data: where to dump the descriptor contents
 3711  *
 3712  *      Dumps the contents of a HW descriptor of an SGE queue.  Returns the
 3713  *      size of the descriptor.
 3714  */
 3715 int
 3716 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
 3717                 unsigned char *data)
 3718 {
 3719         if (qnum >= 6)
 3720                 return (EINVAL);
 3721 
 3722         if (qnum < 3) {
 3723                 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
 3724                         return -EINVAL;
 3725                 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
 3726                 return sizeof(struct tx_desc);
 3727         }
 3728 
 3729         if (qnum == 3) {
 3730                 if (!qs->rspq.desc || idx >= qs->rspq.size)
 3731                         return (EINVAL);
 3732                 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
 3733                 return sizeof(struct rsp_desc);
 3734         }
 3735 
 3736         qnum -= 4;
 3737         if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
 3738                 return (EINVAL);
 3739         memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
 3740         return sizeof(struct rx_desc);
 3741 }

Cache object: b5ff541e0bcdd8d83a27616611d9b690


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.