The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/cxgb/cxgb_sge.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /**************************************************************************
    2 
    3 Copyright (c) 2007-2009, Chelsio Inc.
    4 All rights reserved.
    5 
    6 Redistribution and use in source and binary forms, with or without
    7 modification, are permitted provided that the following conditions are met:
    8 
    9  1. Redistributions of source code must retain the above copyright notice,
   10     this list of conditions and the following disclaimer.
   11 
   12  2. Neither the name of the Chelsio Corporation nor the names of its
   13     contributors may be used to endorse or promote products derived from
   14     this software without specific prior written permission.
   15  
   16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   26 POSSIBILITY OF SUCH DAMAGE.
   27 
   28 ***************************************************************************/
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD: releng/8.1/sys/dev/cxgb/cxgb_sge.c 208039 2010-05-13 17:45:32Z np $");
   32 
   33 #include "opt_inet.h"
   34 
   35 #include <sys/param.h>
   36 #include <sys/systm.h>
   37 #include <sys/kernel.h>
   38 #include <sys/module.h>
   39 #include <sys/bus.h>
   40 #include <sys/conf.h>
   41 #include <machine/bus.h>
   42 #include <machine/resource.h>
   43 #include <sys/bus_dma.h>
   44 #include <sys/rman.h>
   45 #include <sys/queue.h>
   46 #include <sys/sysctl.h>
   47 #include <sys/taskqueue.h>
   48 
   49 #include <sys/proc.h>
   50 #include <sys/sbuf.h>
   51 #include <sys/sched.h>
   52 #include <sys/smp.h>
   53 #include <sys/systm.h>
   54 #include <sys/syslog.h>
   55 #include <sys/socket.h>
   56 
   57 #include <net/bpf.h>    
   58 #include <net/ethernet.h>
   59 #include <net/if.h>
   60 #include <net/if_vlan_var.h>
   61 
   62 #include <netinet/in_systm.h>
   63 #include <netinet/in.h>
   64 #include <netinet/ip.h>
   65 #include <netinet/tcp.h>
   66 
   67 #include <dev/pci/pcireg.h>
   68 #include <dev/pci/pcivar.h>
   69 
   70 #include <vm/vm.h>
   71 #include <vm/pmap.h>
   72 
   73 #include <cxgb_include.h>
   74 #include <sys/mvec.h>
   75 
   76 int     txq_fills = 0;
   77 int     multiq_tx_enable = 1;
   78 
   79 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
   80 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
   81 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
   82 SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
   83     "size of per-queue mbuf ring");
   84 
   85 static int cxgb_tx_coalesce_force = 0;
   86 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force);
   87 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW,
   88     &cxgb_tx_coalesce_force, 0,
   89     "coalesce small packets into a single work request regardless of ring state");
   90 
   91 #define COALESCE_START_DEFAULT          TX_ETH_Q_SIZE>>1
   92 #define COALESCE_START_MAX              (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
   93 #define COALESCE_STOP_DEFAULT           TX_ETH_Q_SIZE>>2
   94 #define COALESCE_STOP_MIN               TX_ETH_Q_SIZE>>5
   95 #define TX_RECLAIM_DEFAULT              TX_ETH_Q_SIZE>>5
   96 #define TX_RECLAIM_MAX                  TX_ETH_Q_SIZE>>2
   97 #define TX_RECLAIM_MIN                  TX_ETH_Q_SIZE>>6
   98 
   99 
  100 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
  101 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start",
  102     &cxgb_tx_coalesce_enable_start);
  103 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW,
  104     &cxgb_tx_coalesce_enable_start, 0,
  105     "coalesce enable threshold");
  106 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
  107 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop);
  108 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW,
  109     &cxgb_tx_coalesce_enable_stop, 0,
  110     "coalesce disable threshold");
  111 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  112 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold);
  113 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW,
  114     &cxgb_tx_reclaim_threshold, 0,
  115     "tx cleaning minimum threshold");
  116 
  117 /*
  118  * XXX don't re-enable this until TOE stops assuming
  119  * we have an m_ext
  120  */
  121 static int recycle_enable = 0;
  122 
  123 extern int cxgb_use_16k_clusters;
  124 extern int nmbjumbop;
  125 extern int nmbjumbo9;
  126 extern int nmbjumbo16;
  127 
  128 #define USE_GTS 0
  129 
  130 #define SGE_RX_SM_BUF_SIZE      1536
  131 #define SGE_RX_DROP_THRES       16
  132 #define SGE_RX_COPY_THRES       128
  133 
  134 /*
  135  * Period of the Tx buffer reclaim timer.  This timer does not need to run
  136  * frequently as Tx buffers are usually reclaimed by new Tx packets.
  137  */
  138 #define TX_RECLAIM_PERIOD       (hz >> 1)
  139 
  140 /* 
  141  * Values for sge_txq.flags
  142  */
  143 enum {
  144         TXQ_RUNNING     = 1 << 0,  /* fetch engine is running */
  145         TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
  146 };
  147 
  148 struct tx_desc {
  149         uint64_t        flit[TX_DESC_FLITS];
  150 } __packed;
  151 
  152 struct rx_desc {
  153         uint32_t        addr_lo;
  154         uint32_t        len_gen;
  155         uint32_t        gen2;
  156         uint32_t        addr_hi;
  157 } __packed;
  158 
  159 struct rsp_desc {               /* response queue descriptor */
  160         struct rss_header       rss_hdr;
  161         uint32_t                flags;
  162         uint32_t                len_cq;
  163         uint8_t                 imm_data[47];
  164         uint8_t                 intr_gen;
  165 } __packed;
  166 
  167 #define RX_SW_DESC_MAP_CREATED  (1 << 0)
  168 #define TX_SW_DESC_MAP_CREATED  (1 << 1)
  169 #define RX_SW_DESC_INUSE        (1 << 3)
  170 #define TX_SW_DESC_MAPPED       (1 << 4)
  171 
  172 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
  173 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
  174 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
  175 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
  176 
  177 struct tx_sw_desc {                /* SW state per Tx descriptor */
  178         struct mbuf     *m;
  179         bus_dmamap_t    map;
  180         int             flags;
  181 };
  182 
  183 struct rx_sw_desc {                /* SW state per Rx descriptor */
  184         caddr_t         rxsd_cl;
  185         struct mbuf     *m;
  186         bus_dmamap_t    map;
  187         int             flags;
  188 };
  189 
  190 struct txq_state {
  191         unsigned int    compl;
  192         unsigned int    gen;
  193         unsigned int    pidx;
  194 };
  195 
  196 struct refill_fl_cb_arg {
  197         int               error;
  198         bus_dma_segment_t seg;
  199         int               nseg;
  200 };
  201 
  202 
  203 /*
  204  * Maps a number of flits to the number of Tx descriptors that can hold them.
  205  * The formula is
  206  *
  207  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
  208  *
  209  * HW allows up to 4 descriptors to be combined into a WR.
  210  */
  211 static uint8_t flit_desc_map[] = {
  212         0,
  213 #if SGE_NUM_GENBITS == 1
  214         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  215         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  216         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  217         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
  218 #elif SGE_NUM_GENBITS == 2
  219         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  220         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  221         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  222         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  223 #else
  224 # error "SGE_NUM_GENBITS must be 1 or 2"
  225 #endif
  226 };
  227 
  228 #define TXQ_LOCK_ASSERT(qs)     mtx_assert(&(qs)->lock, MA_OWNED)
  229 #define TXQ_TRYLOCK(qs)         mtx_trylock(&(qs)->lock)        
  230 #define TXQ_LOCK(qs)            mtx_lock(&(qs)->lock)   
  231 #define TXQ_UNLOCK(qs)          mtx_unlock(&(qs)->lock) 
  232 #define TXQ_RING_EMPTY(qs)      drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  233 #define TXQ_RING_NEEDS_ENQUEUE(qs)                                      \
  234         drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  235 #define TXQ_RING_FLUSH(qs)      drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  236 #define TXQ_RING_DEQUEUE_COND(qs, func, arg)                            \
  237         drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
  238 #define TXQ_RING_DEQUEUE(qs) \
  239         drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  240 
  241 int cxgb_debug = 0;
  242 
  243 static void sge_timer_cb(void *arg);
  244 static void sge_timer_reclaim(void *arg, int ncount);
  245 static void sge_txq_reclaim_handler(void *arg, int ncount);
  246 static void cxgb_start_locked(struct sge_qset *qs);
  247 
  248 /*
  249  * XXX need to cope with bursty scheduling by looking at a wider
  250  * window than we are now for determining the need for coalescing
  251  *
  252  */
  253 static __inline uint64_t
  254 check_pkt_coalesce(struct sge_qset *qs) 
  255 { 
  256         struct adapter *sc; 
  257         struct sge_txq *txq; 
  258         uint8_t *fill;
  259 
  260         if (__predict_false(cxgb_tx_coalesce_force))
  261                 return (1);
  262         txq = &qs->txq[TXQ_ETH]; 
  263         sc = qs->port->adapter; 
  264         fill = &sc->tunq_fill[qs->idx];
  265 
  266         if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
  267                 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
  268         if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
  269                 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
  270         /*
  271          * if the hardware transmit queue is more than 1/8 full
  272          * we mark it as coalescing - we drop back from coalescing
  273          * when we go below 1/32 full and there are no packets enqueued, 
  274          * this provides us with some degree of hysteresis
  275          */
  276         if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
  277             TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
  278                 *fill = 0; 
  279         else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
  280                 *fill = 1; 
  281 
  282         return (sc->tunq_coalesce);
  283 } 
  284 
  285 #ifdef __LP64__
  286 static void
  287 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  288 {
  289         uint64_t wr_hilo;
  290 #if _BYTE_ORDER == _LITTLE_ENDIAN
  291         wr_hilo = wr_hi;
  292         wr_hilo |= (((uint64_t)wr_lo)<<32);
  293 #else
  294         wr_hilo = wr_lo;
  295         wr_hilo |= (((uint64_t)wr_hi)<<32);
  296 #endif  
  297         wrp->wrh_hilo = wr_hilo;
  298 }
  299 #else
  300 static void
  301 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  302 {
  303 
  304         wrp->wrh_hi = wr_hi;
  305         wmb();
  306         wrp->wrh_lo = wr_lo;
  307 }
  308 #endif
  309 
  310 struct coalesce_info {
  311         int count;
  312         int nbytes;
  313 };
  314 
  315 static int
  316 coalesce_check(struct mbuf *m, void *arg)
  317 {
  318         struct coalesce_info *ci = arg;
  319         int *count = &ci->count;
  320         int *nbytes = &ci->nbytes;
  321 
  322         if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
  323                 (*count < 7) && (m->m_next == NULL))) {
  324                 *count += 1;
  325                 *nbytes += m->m_len;
  326                 return (1);
  327         }
  328         return (0);
  329 }
  330 
  331 static struct mbuf *
  332 cxgb_dequeue(struct sge_qset *qs)
  333 {
  334         struct mbuf *m, *m_head, *m_tail;
  335         struct coalesce_info ci;
  336 
  337         
  338         if (check_pkt_coalesce(qs) == 0) 
  339                 return TXQ_RING_DEQUEUE(qs);
  340 
  341         m_head = m_tail = NULL;
  342         ci.count = ci.nbytes = 0;
  343         do {
  344                 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
  345                 if (m_head == NULL) {
  346                         m_tail = m_head = m;
  347                 } else if (m != NULL) {
  348                         m_tail->m_nextpkt = m;
  349                         m_tail = m;
  350                 }
  351         } while (m != NULL);
  352         if (ci.count > 7)
  353                 panic("trying to coalesce %d packets in to one WR", ci.count);
  354         return (m_head);
  355 }
  356         
  357 /**
  358  *      reclaim_completed_tx - reclaims completed Tx descriptors
  359  *      @adapter: the adapter
  360  *      @q: the Tx queue to reclaim completed descriptors from
  361  *
  362  *      Reclaims Tx descriptors that the SGE has indicated it has processed,
  363  *      and frees the associated buffers if possible.  Called with the Tx
  364  *      queue's lock held.
  365  */
  366 static __inline int
  367 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
  368 {
  369         struct sge_txq *q = &qs->txq[queue];
  370         int reclaim = desc_reclaimable(q);
  371 
  372         if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
  373             (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
  374                 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  375 
  376         if (reclaim < reclaim_min)
  377                 return (0);
  378 
  379         mtx_assert(&qs->lock, MA_OWNED);
  380         if (reclaim > 0) {
  381                 t3_free_tx_desc(qs, reclaim, queue);
  382                 q->cleaned += reclaim;
  383                 q->in_use -= reclaim;
  384         }
  385         if (isset(&qs->txq_stopped, TXQ_ETH))
  386                 clrbit(&qs->txq_stopped, TXQ_ETH);
  387 
  388         return (reclaim);
  389 }
  390 
  391 /**
  392  *      should_restart_tx - are there enough resources to restart a Tx queue?
  393  *      @q: the Tx queue
  394  *
  395  *      Checks if there are enough descriptors to restart a suspended Tx queue.
  396  */
  397 static __inline int
  398 should_restart_tx(const struct sge_txq *q)
  399 {
  400         unsigned int r = q->processed - q->cleaned;
  401 
  402         return q->in_use - r < (q->size >> 1);
  403 }
  404 
  405 /**
  406  *      t3_sge_init - initialize SGE
  407  *      @adap: the adapter
  408  *      @p: the SGE parameters
  409  *
  410  *      Performs SGE initialization needed every time after a chip reset.
  411  *      We do not initialize any of the queue sets here, instead the driver
  412  *      top-level must request those individually.  We also do not enable DMA
  413  *      here, that should be done after the queues have been set up.
  414  */
  415 void
  416 t3_sge_init(adapter_t *adap, struct sge_params *p)
  417 {
  418         u_int ctrl, ups;
  419 
  420         ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
  421 
  422         ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
  423                F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
  424                V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
  425                V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
  426 #if SGE_NUM_GENBITS == 1
  427         ctrl |= F_EGRGENCTRL;
  428 #endif
  429         if (adap->params.rev > 0) {
  430                 if (!(adap->flags & (USING_MSIX | USING_MSI)))
  431                         ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
  432         }
  433         t3_write_reg(adap, A_SG_CONTROL, ctrl);
  434         t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
  435                      V_LORCQDRBTHRSH(512));
  436         t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
  437         t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
  438                      V_TIMEOUT(200 * core_ticks_per_usec(adap)));
  439         t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
  440                      adap->params.rev < T3_REV_C ? 1000 : 500);
  441         t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
  442         t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
  443         t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
  444         t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
  445         t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
  446 }
  447 
  448 
  449 /**
  450  *      sgl_len - calculates the size of an SGL of the given capacity
  451  *      @n: the number of SGL entries
  452  *
  453  *      Calculates the number of flits needed for a scatter/gather list that
  454  *      can hold the given number of entries.
  455  */
  456 static __inline unsigned int
  457 sgl_len(unsigned int n)
  458 {
  459         return ((3 * n) / 2 + (n & 1));
  460 }
  461 
  462 /**
  463  *      get_imm_packet - return the next ingress packet buffer from a response
  464  *      @resp: the response descriptor containing the packet data
  465  *
  466  *      Return a packet containing the immediate data of the given response.
  467  */
  468 static int
  469 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
  470 {
  471 
  472         m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
  473         m->m_ext.ext_buf = NULL;
  474         m->m_ext.ext_type = 0;
  475         memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 
  476         return (0);     
  477 }
  478 
  479 static __inline u_int
  480 flits_to_desc(u_int n)
  481 {
  482         return (flit_desc_map[n]);
  483 }
  484 
  485 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
  486                     F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
  487                     V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
  488                     F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
  489                     F_HIRCQPARITYERROR)
  490 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
  491 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
  492                       F_RSPQDISABLED)
  493 
  494 /**
  495  *      t3_sge_err_intr_handler - SGE async event interrupt handler
  496  *      @adapter: the adapter
  497  *
  498  *      Interrupt handler for SGE asynchronous (non-data) events.
  499  */
  500 void
  501 t3_sge_err_intr_handler(adapter_t *adapter)
  502 {
  503         unsigned int v, status;
  504 
  505         status = t3_read_reg(adapter, A_SG_INT_CAUSE);
  506         if (status & SGE_PARERR)
  507                 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
  508                          status & SGE_PARERR);
  509         if (status & SGE_FRAMINGERR)
  510                 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
  511                          status & SGE_FRAMINGERR);
  512         if (status & F_RSPQCREDITOVERFOW)
  513                 CH_ALERT(adapter, "SGE response queue credit overflow\n");
  514 
  515         if (status & F_RSPQDISABLED) {
  516                 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
  517 
  518                 CH_ALERT(adapter,
  519                          "packet delivered to disabled response queue (0x%x)\n",
  520                          (v >> S_RSPQ0DISABLED) & 0xff);
  521         }
  522 
  523         t3_write_reg(adapter, A_SG_INT_CAUSE, status);
  524         if (status & SGE_FATALERR)
  525                 t3_fatal_err(adapter);
  526 }
  527 
  528 void
  529 t3_sge_prep(adapter_t *adap, struct sge_params *p)
  530 {
  531         int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
  532 
  533         nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
  534         nqsets *= adap->params.nports;
  535 
  536         fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
  537 
  538         while (!powerof2(fl_q_size))
  539                 fl_q_size--;
  540 
  541         use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
  542             is_offload(adap);
  543 
  544 #if __FreeBSD_version >= 700111
  545         if (use_16k) {
  546                 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
  547                 jumbo_buf_size = MJUM16BYTES;
  548         } else {
  549                 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
  550                 jumbo_buf_size = MJUM9BYTES;
  551         }
  552 #else
  553         jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE);
  554         jumbo_buf_size = MJUMPAGESIZE;
  555 #endif
  556         while (!powerof2(jumbo_q_size))
  557                 jumbo_q_size--;
  558 
  559         if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
  560                 device_printf(adap->dev,
  561                     "Insufficient clusters and/or jumbo buffers.\n");
  562 
  563         p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
  564 
  565         for (i = 0; i < SGE_QSETS; ++i) {
  566                 struct qset_params *q = p->qset + i;
  567 
  568                 if (adap->params.nports > 2) {
  569                         q->coalesce_usecs = 50;
  570                 } else {
  571 #ifdef INVARIANTS                       
  572                         q->coalesce_usecs = 10;
  573 #else
  574                         q->coalesce_usecs = 5;
  575 #endif                  
  576                 }
  577                 q->polling = 0;
  578                 q->rspq_size = RSPQ_Q_SIZE;
  579                 q->fl_size = fl_q_size;
  580                 q->jumbo_size = jumbo_q_size;
  581                 q->jumbo_buf_size = jumbo_buf_size;
  582                 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
  583                 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
  584                 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
  585                 q->cong_thres = 0;
  586         }
  587 }
  588 
  589 int
  590 t3_sge_alloc(adapter_t *sc)
  591 {
  592 
  593         /* The parent tag. */
  594         if (bus_dma_tag_create( NULL,                   /* parent */
  595                                 1, 0,                   /* algnmnt, boundary */
  596                                 BUS_SPACE_MAXADDR,      /* lowaddr */
  597                                 BUS_SPACE_MAXADDR,      /* highaddr */
  598                                 NULL, NULL,             /* filter, filterarg */
  599                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
  600                                 BUS_SPACE_UNRESTRICTED, /* nsegments */
  601                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
  602                                 0,                      /* flags */
  603                                 NULL, NULL,             /* lock, lockarg */
  604                                 &sc->parent_dmat)) {
  605                 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
  606                 return (ENOMEM);
  607         }
  608 
  609         /*
  610          * DMA tag for normal sized RX frames
  611          */
  612         if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
  613                 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
  614                 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
  615                 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
  616                 return (ENOMEM);
  617         }
  618 
  619         /* 
  620          * DMA tag for jumbo sized RX frames.
  621          */
  622         if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
  623                 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
  624                 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
  625                 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
  626                 return (ENOMEM);
  627         }
  628 
  629         /* 
  630          * DMA tag for TX frames.
  631          */
  632         if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
  633                 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  634                 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  635                 NULL, NULL, &sc->tx_dmat)) {
  636                 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
  637                 return (ENOMEM);
  638         }
  639 
  640         return (0);
  641 }
  642 
  643 int
  644 t3_sge_free(struct adapter * sc)
  645 {
  646 
  647         if (sc->tx_dmat != NULL)
  648                 bus_dma_tag_destroy(sc->tx_dmat);
  649 
  650         if (sc->rx_jumbo_dmat != NULL)
  651                 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
  652 
  653         if (sc->rx_dmat != NULL)
  654                 bus_dma_tag_destroy(sc->rx_dmat);
  655 
  656         if (sc->parent_dmat != NULL)
  657                 bus_dma_tag_destroy(sc->parent_dmat);
  658 
  659         return (0);
  660 }
  661 
  662 void
  663 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
  664 {
  665 
  666         qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
  667         qs->rspq.polling = 0 /* p->polling */;
  668 }
  669 
  670 #if !defined(__i386__) && !defined(__amd64__)
  671 static void
  672 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
  673 {
  674         struct refill_fl_cb_arg *cb_arg = arg;
  675         
  676         cb_arg->error = error;
  677         cb_arg->seg = segs[0];
  678         cb_arg->nseg = nseg;
  679 
  680 }
  681 #endif
  682 /**
  683  *      refill_fl - refill an SGE free-buffer list
  684  *      @sc: the controller softc
  685  *      @q: the free-list to refill
  686  *      @n: the number of new buffers to allocate
  687  *
  688  *      (Re)populate an SGE free-buffer list with up to @n new packet buffers.
  689  *      The caller must assure that @n does not exceed the queue's capacity.
  690  */
  691 static void
  692 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
  693 {
  694         struct rx_sw_desc *sd = &q->sdesc[q->pidx];
  695         struct rx_desc *d = &q->desc[q->pidx];
  696         struct refill_fl_cb_arg cb_arg;
  697         struct mbuf *m;
  698         caddr_t cl;
  699         int err;
  700         
  701         cb_arg.error = 0;
  702         while (n--) {
  703                 /*
  704                  * We only allocate a cluster, mbuf allocation happens after rx
  705                  */
  706                 if (q->zone == zone_pack) {
  707                         if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
  708                                 break;
  709                         cl = m->m_ext.ext_buf;                  
  710                 } else {
  711                         if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
  712                                 break;
  713                         if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
  714                                 uma_zfree(q->zone, cl);
  715                                 break;
  716                         }
  717                 }
  718                 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
  719                         if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
  720                                 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
  721                                 uma_zfree(q->zone, cl);
  722                                 goto done;
  723                         }
  724                         sd->flags |= RX_SW_DESC_MAP_CREATED;
  725                 }
  726 #if !defined(__i386__) && !defined(__amd64__)
  727                 err = bus_dmamap_load(q->entry_tag, sd->map,
  728                     cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
  729                 
  730                 if (err != 0 || cb_arg.error) {
  731                         if (q->zone == zone_pack)
  732                                 uma_zfree(q->zone, cl);
  733                         m_free(m);
  734                         goto done;
  735                 }
  736 #else
  737                 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
  738 #endif          
  739                 sd->flags |= RX_SW_DESC_INUSE;
  740                 sd->rxsd_cl = cl;
  741                 sd->m = m;
  742                 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
  743                 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
  744                 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
  745                 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
  746 
  747                 d++;
  748                 sd++;
  749 
  750                 if (++q->pidx == q->size) {
  751                         q->pidx = 0;
  752                         q->gen ^= 1;
  753                         sd = q->sdesc;
  754                         d = q->desc;
  755                 }
  756                 q->credits++;
  757                 q->db_pending++;
  758         }
  759 
  760 done:
  761         if (q->db_pending >= 32) {
  762                 q->db_pending = 0;
  763                 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  764         }
  765 }
  766 
  767 
  768 /**
  769  *      free_rx_bufs - free the Rx buffers on an SGE free list
  770  *      @sc: the controle softc
  771  *      @q: the SGE free list to clean up
  772  *
  773  *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
  774  *      this queue should be stopped before calling this function.
  775  */
  776 static void
  777 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
  778 {
  779         u_int cidx = q->cidx;
  780 
  781         while (q->credits--) {
  782                 struct rx_sw_desc *d = &q->sdesc[cidx];
  783 
  784                 if (d->flags & RX_SW_DESC_INUSE) {
  785                         bus_dmamap_unload(q->entry_tag, d->map);
  786                         bus_dmamap_destroy(q->entry_tag, d->map);
  787                         if (q->zone == zone_pack) {
  788                                 m_init(d->m, zone_pack, MCLBYTES,
  789                                     M_NOWAIT, MT_DATA, M_EXT);
  790                                 uma_zfree(zone_pack, d->m);
  791                         } else {
  792                                 m_init(d->m, zone_mbuf, MLEN,
  793                                     M_NOWAIT, MT_DATA, 0);
  794                                 uma_zfree(zone_mbuf, d->m);
  795                                 uma_zfree(q->zone, d->rxsd_cl);
  796                         }                       
  797                 }
  798                 
  799                 d->rxsd_cl = NULL;
  800                 d->m = NULL;
  801                 if (++cidx == q->size)
  802                         cidx = 0;
  803         }
  804 }
  805 
  806 static __inline void
  807 __refill_fl(adapter_t *adap, struct sge_fl *fl)
  808 {
  809         refill_fl(adap, fl, min(16U, fl->size - fl->credits));
  810 }
  811 
  812 static __inline void
  813 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
  814 {
  815         uint32_t reclaimable = fl->size - fl->credits;
  816 
  817         if (reclaimable > 0)
  818                 refill_fl(adap, fl, min(max, reclaimable));
  819 }
  820 
  821 /**
  822  *      recycle_rx_buf - recycle a receive buffer
  823  *      @adapter: the adapter
  824  *      @q: the SGE free list
  825  *      @idx: index of buffer to recycle
  826  *
  827  *      Recycles the specified buffer on the given free list by adding it at
  828  *      the next available slot on the list.
  829  */
  830 static void
  831 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
  832 {
  833         struct rx_desc *from = &q->desc[idx];
  834         struct rx_desc *to   = &q->desc[q->pidx];
  835 
  836         q->sdesc[q->pidx] = q->sdesc[idx];
  837         to->addr_lo = from->addr_lo;        // already big endian
  838         to->addr_hi = from->addr_hi;        // likewise
  839         wmb();  /* necessary ? */
  840         to->len_gen = htobe32(V_FLD_GEN1(q->gen));
  841         to->gen2 = htobe32(V_FLD_GEN2(q->gen));
  842         q->credits++;
  843 
  844         if (++q->pidx == q->size) {
  845                 q->pidx = 0;
  846                 q->gen ^= 1;
  847         }
  848         t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  849 }
  850 
  851 static void
  852 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
  853 {
  854         uint32_t *addr;
  855 
  856         addr = arg;
  857         *addr = segs[0].ds_addr;
  858 }
  859 
  860 static int
  861 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
  862     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
  863     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
  864 {
  865         size_t len = nelem * elem_size;
  866         void *s = NULL;
  867         void *p = NULL;
  868         int err;
  869 
  870         if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
  871                                       BUS_SPACE_MAXADDR_32BIT,
  872                                       BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
  873                                       len, 0, NULL, NULL, tag)) != 0) {
  874                 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
  875                 return (ENOMEM);
  876         }
  877 
  878         if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
  879                                     map)) != 0) {
  880                 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
  881                 return (ENOMEM);
  882         }
  883 
  884         bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
  885         bzero(p, len);
  886         *(void **)desc = p;
  887 
  888         if (sw_size) {
  889                 len = nelem * sw_size;
  890                 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
  891                 *(void **)sdesc = s;
  892         }
  893         if (parent_entry_tag == NULL)
  894                 return (0);
  895             
  896         if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
  897                                       BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
  898                                       NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  899                                       TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  900                                       NULL, NULL, entry_tag)) != 0) {
  901                 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
  902                 return (ENOMEM);
  903         }
  904         return (0);
  905 }
  906 
  907 static void
  908 sge_slow_intr_handler(void *arg, int ncount)
  909 {
  910         adapter_t *sc = arg;
  911 
  912         t3_slow_intr_handler(sc);
  913 }
  914 
  915 /**
  916  *      sge_timer_cb - perform periodic maintenance of an SGE qset
  917  *      @data: the SGE queue set to maintain
  918  *
  919  *      Runs periodically from a timer to perform maintenance of an SGE queue
  920  *      set.  It performs two tasks:
  921  *
  922  *      a) Cleans up any completed Tx descriptors that may still be pending.
  923  *      Normal descriptor cleanup happens when new packets are added to a Tx
  924  *      queue so this timer is relatively infrequent and does any cleanup only
  925  *      if the Tx queue has not seen any new packets in a while.  We make a
  926  *      best effort attempt to reclaim descriptors, in that we don't wait
  927  *      around if we cannot get a queue's lock (which most likely is because
  928  *      someone else is queueing new packets and so will also handle the clean
  929  *      up).  Since control queues use immediate data exclusively we don't
  930  *      bother cleaning them up here.
  931  *
  932  *      b) Replenishes Rx queues that have run out due to memory shortage.
  933  *      Normally new Rx buffers are added when existing ones are consumed but
  934  *      when out of memory a queue can become empty.  We try to add only a few
  935  *      buffers here, the queue will be replenished fully as these new buffers
  936  *      are used up if memory shortage has subsided.
  937  *      
  938  *      c) Return coalesced response queue credits in case a response queue is
  939  *      starved.
  940  *
  941  *      d) Ring doorbells for T304 tunnel queues since we have seen doorbell 
  942  *      fifo overflows and the FW doesn't implement any recovery scheme yet.
  943  */
  944 static void
  945 sge_timer_cb(void *arg)
  946 {
  947         adapter_t *sc = arg;
  948         if ((sc->flags & USING_MSIX) == 0) {
  949                 
  950                 struct port_info *pi;
  951                 struct sge_qset *qs;
  952                 struct sge_txq  *txq;
  953                 int i, j;
  954                 int reclaim_ofl, refill_rx;
  955 
  956                 if (sc->open_device_map == 0) 
  957                         return;
  958 
  959                 for (i = 0; i < sc->params.nports; i++) {
  960                         pi = &sc->port[i];
  961                         for (j = 0; j < pi->nqsets; j++) {
  962                                 qs = &sc->sge.qs[pi->first_qset + j];
  963                                 txq = &qs->txq[0];
  964                                 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
  965                                 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 
  966                                     (qs->fl[1].credits < qs->fl[1].size));
  967                                 if (reclaim_ofl || refill_rx) {
  968                                         taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
  969                                         break;
  970                                 }
  971                         }
  972                 }
  973         }
  974         
  975         if (sc->params.nports > 2) {
  976                 int i;
  977 
  978                 for_each_port(sc, i) {
  979                         struct port_info *pi = &sc->port[i];
  980 
  981                         t3_write_reg(sc, A_SG_KDOORBELL, 
  982                                      F_SELEGRCNTX | 
  983                                      (FW_TUNNEL_SGEEC_START + pi->first_qset));
  984                 }
  985         }       
  986         if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
  987             sc->open_device_map != 0)
  988                 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
  989 }
  990 
  991 /*
  992  * This is meant to be a catch-all function to keep sge state private
  993  * to sge.c
  994  *
  995  */
  996 int
  997 t3_sge_init_adapter(adapter_t *sc)
  998 {
  999         callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
 1000         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1001         TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
 1002         return (0);
 1003 }
 1004 
 1005 int
 1006 t3_sge_reset_adapter(adapter_t *sc)
 1007 {
 1008         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1009         return (0);
 1010 }
 1011 
 1012 int
 1013 t3_sge_init_port(struct port_info *pi)
 1014 {
 1015         TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
 1016         return (0);
 1017 }
 1018 
 1019 /**
 1020  *      refill_rspq - replenish an SGE response queue
 1021  *      @adapter: the adapter
 1022  *      @q: the response queue to replenish
 1023  *      @credits: how many new responses to make available
 1024  *
 1025  *      Replenishes a response queue by making the supplied number of responses
 1026  *      available to HW.
 1027  */
 1028 static __inline void
 1029 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
 1030 {
 1031 
 1032         /* mbufs are allocated on demand when a rspq entry is processed. */
 1033         t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
 1034                      V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 1035 }
 1036 
 1037 static void
 1038 sge_txq_reclaim_handler(void *arg, int ncount)
 1039 {
 1040         struct sge_qset *qs = arg;
 1041         int i;
 1042 
 1043         for (i = 0; i < 3; i++)
 1044                 reclaim_completed_tx(qs, 16, i);
 1045 }
 1046 
 1047 static void
 1048 sge_timer_reclaim(void *arg, int ncount)
 1049 {
 1050         struct port_info *pi = arg;
 1051         int i, nqsets = pi->nqsets;
 1052         adapter_t *sc = pi->adapter;
 1053         struct sge_qset *qs;
 1054         struct mtx *lock;
 1055         
 1056         KASSERT((sc->flags & USING_MSIX) == 0,
 1057             ("can't call timer reclaim for msi-x"));
 1058 
 1059         for (i = 0; i < nqsets; i++) {
 1060                 qs = &sc->sge.qs[pi->first_qset + i];
 1061 
 1062                 reclaim_completed_tx(qs, 16, TXQ_OFLD);
 1063                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 1064                             &sc->sge.qs[0].rspq.lock;
 1065 
 1066                 if (mtx_trylock(lock)) {
 1067                         /* XXX currently assume that we are *NOT* polling */
 1068                         uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
 1069 
 1070                         if (qs->fl[0].credits < qs->fl[0].size - 16)
 1071                                 __refill_fl(sc, &qs->fl[0]);
 1072                         if (qs->fl[1].credits < qs->fl[1].size - 16)
 1073                                 __refill_fl(sc, &qs->fl[1]);
 1074                         
 1075                         if (status & (1 << qs->rspq.cntxt_id)) {
 1076                                 if (qs->rspq.credits) {
 1077                                         refill_rspq(sc, &qs->rspq, 1);
 1078                                         qs->rspq.credits--;
 1079                                         t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 
 1080                                             1 << qs->rspq.cntxt_id);
 1081                                 }
 1082                         }
 1083                         mtx_unlock(lock);
 1084                 }
 1085         }
 1086 }
 1087 
 1088 /**
 1089  *      init_qset_cntxt - initialize an SGE queue set context info
 1090  *      @qs: the queue set
 1091  *      @id: the queue set id
 1092  *
 1093  *      Initializes the TIDs and context ids for the queues of a queue set.
 1094  */
 1095 static void
 1096 init_qset_cntxt(struct sge_qset *qs, u_int id)
 1097 {
 1098 
 1099         qs->rspq.cntxt_id = id;
 1100         qs->fl[0].cntxt_id = 2 * id;
 1101         qs->fl[1].cntxt_id = 2 * id + 1;
 1102         qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
 1103         qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
 1104         qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 1105         qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 1106         qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
 1107 
 1108         mbufq_init(&qs->txq[TXQ_ETH].sendq);
 1109         mbufq_init(&qs->txq[TXQ_OFLD].sendq);
 1110         mbufq_init(&qs->txq[TXQ_CTRL].sendq);
 1111 }
 1112 
 1113 
 1114 static void
 1115 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
 1116 {
 1117         txq->in_use += ndesc;
 1118         /*
 1119          * XXX we don't handle stopping of queue
 1120          * presumably start handles this when we bump against the end
 1121          */
 1122         txqs->gen = txq->gen;
 1123         txq->unacked += ndesc;
 1124         txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
 1125         txq->unacked &= 31;
 1126         txqs->pidx = txq->pidx;
 1127         txq->pidx += ndesc;
 1128 #ifdef INVARIANTS
 1129         if (((txqs->pidx > txq->cidx) &&
 1130                 (txq->pidx < txqs->pidx) &&
 1131                 (txq->pidx >= txq->cidx)) ||
 1132             ((txqs->pidx < txq->cidx) &&
 1133                 (txq->pidx >= txq-> cidx)) ||
 1134             ((txqs->pidx < txq->cidx) &&
 1135                 (txq->cidx < txqs->pidx)))
 1136                 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
 1137                     txqs->pidx, txq->pidx, txq->cidx);
 1138 #endif
 1139         if (txq->pidx >= txq->size) {
 1140                 txq->pidx -= txq->size;
 1141                 txq->gen ^= 1;
 1142         }
 1143 
 1144 }
 1145 
 1146 /**
 1147  *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 1148  *      @m: the packet mbufs
 1149  *      @nsegs: the number of segments 
 1150  *
 1151  *      Returns the number of Tx descriptors needed for the given Ethernet
 1152  *      packet.  Ethernet packets require addition of WR and CPL headers.
 1153  */
 1154 static __inline unsigned int
 1155 calc_tx_descs(const struct mbuf *m, int nsegs)
 1156 {
 1157         unsigned int flits;
 1158 
 1159         if (m->m_pkthdr.len <= PIO_LEN)
 1160                 return 1;
 1161 
 1162         flits = sgl_len(nsegs) + 2;
 1163         if (m->m_pkthdr.csum_flags & CSUM_TSO)
 1164                 flits++;
 1165 
 1166         return flits_to_desc(flits);
 1167 }
 1168 
 1169 static unsigned int
 1170 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
 1171     struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
 1172 {
 1173         struct mbuf *m0;
 1174         int err, pktlen, pass = 0;
 1175         bus_dma_tag_t tag = txq->entry_tag;
 1176 
 1177 retry:
 1178         err = 0;
 1179         m0 = *m;
 1180         pktlen = m0->m_pkthdr.len;
 1181 #if defined(__i386__) || defined(__amd64__)
 1182         if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) {
 1183                 goto done;
 1184         } else
 1185 #endif
 1186                 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0);
 1187 
 1188         if (err == 0) {
 1189                 goto done;
 1190         }
 1191         if (err == EFBIG && pass == 0) {
 1192                 pass = 1;
 1193                 /* Too many segments, try to defrag */
 1194                 m0 = m_defrag(m0, M_DONTWAIT);
 1195                 if (m0 == NULL) {
 1196                         m_freem(*m);
 1197                         *m = NULL;
 1198                         return (ENOBUFS);
 1199                 }
 1200                 *m = m0;
 1201                 goto retry;
 1202         } else if (err == ENOMEM) {
 1203                 return (err);
 1204         } if (err) {
 1205                 if (cxgb_debug)
 1206                         printf("map failure err=%d pktlen=%d\n", err, pktlen);
 1207                 m_freem(m0);
 1208                 *m = NULL;
 1209                 return (err);
 1210         }
 1211 done:
 1212 #if !defined(__i386__) && !defined(__amd64__)
 1213         bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE);
 1214 #endif  
 1215         txsd->flags |= TX_SW_DESC_MAPPED;
 1216 
 1217         return (0);
 1218 }
 1219 
 1220 /**
 1221  *      make_sgl - populate a scatter/gather list for a packet
 1222  *      @sgp: the SGL to populate
 1223  *      @segs: the packet dma segments
 1224  *      @nsegs: the number of segments
 1225  *
 1226  *      Generates a scatter/gather list for the buffers that make up a packet
 1227  *      and returns the SGL size in 8-byte words.  The caller must size the SGL
 1228  *      appropriately.
 1229  */
 1230 static __inline void
 1231 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
 1232 {
 1233         int i, idx;
 1234         
 1235         for (idx = 0, i = 0; i < nsegs; i++) {
 1236                 /*
 1237                  * firmware doesn't like empty segments
 1238                  */
 1239                 if (segs[i].ds_len == 0)
 1240                         continue;
 1241                 if (i && idx == 0) 
 1242                         ++sgp;
 1243                 
 1244                 sgp->len[idx] = htobe32(segs[i].ds_len);
 1245                 sgp->addr[idx] = htobe64(segs[i].ds_addr);
 1246                 idx ^= 1;
 1247         }
 1248         
 1249         if (idx) {
 1250                 sgp->len[idx] = 0;
 1251                 sgp->addr[idx] = 0;
 1252         }
 1253 }
 1254         
 1255 /**
 1256  *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
 1257  *      @adap: the adapter
 1258  *      @q: the Tx queue
 1259  *
 1260  *      Ring the doorbell if a Tx queue is asleep.  There is a natural race,
 1261  *      where the HW is going to sleep just after we checked, however,
 1262  *      then the interrupt handler will detect the outstanding TX packet
 1263  *      and ring the doorbell for us.
 1264  *
 1265  *      When GTS is disabled we unconditionally ring the doorbell.
 1266  */
 1267 static __inline void
 1268 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
 1269 {
 1270 #if USE_GTS
 1271         clear_bit(TXQ_LAST_PKT_DB, &q->flags);
 1272         if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
 1273                 set_bit(TXQ_LAST_PKT_DB, &q->flags);
 1274 #ifdef T3_TRACE
 1275                 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
 1276                           q->cntxt_id);
 1277 #endif
 1278                 t3_write_reg(adap, A_SG_KDOORBELL,
 1279                              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1280         }
 1281 #else
 1282         if (mustring || ++q->db_pending >= 32) {
 1283                 wmb();            /* write descriptors before telling HW */
 1284                 t3_write_reg(adap, A_SG_KDOORBELL,
 1285                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1286                 q->db_pending = 0;
 1287         }
 1288 #endif
 1289 }
 1290 
 1291 static __inline void
 1292 wr_gen2(struct tx_desc *d, unsigned int gen)
 1293 {
 1294 #if SGE_NUM_GENBITS == 2
 1295         d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
 1296 #endif
 1297 }
 1298 
 1299 /**
 1300  *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
 1301  *      @ndesc: number of Tx descriptors spanned by the SGL
 1302  *      @txd: first Tx descriptor to be written
 1303  *      @txqs: txq state (generation and producer index)
 1304  *      @txq: the SGE Tx queue
 1305  *      @sgl: the SGL
 1306  *      @flits: number of flits to the start of the SGL in the first descriptor
 1307  *      @sgl_flits: the SGL size in flits
 1308  *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
 1309  *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
 1310  *
 1311  *      Write a work request header and an associated SGL.  If the SGL is
 1312  *      small enough to fit into one Tx descriptor it has already been written
 1313  *      and we just need to write the WR header.  Otherwise we distribute the
 1314  *      SGL across the number of descriptors it spans.
 1315  */
 1316 static void
 1317 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
 1318     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
 1319     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
 1320 {
 1321 
 1322         struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
 1323         struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
 1324         
 1325         if (__predict_true(ndesc == 1)) {
 1326                 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1327                         V_WR_SGLSFLT(flits)) | wr_hi,
 1328                     htonl(V_WR_LEN(flits + sgl_flits) |
 1329                         V_WR_GEN(txqs->gen)) | wr_lo);
 1330                 /* XXX gen? */
 1331                 wr_gen2(txd, txqs->gen);
 1332                 
 1333         } else {
 1334                 unsigned int ogen = txqs->gen;
 1335                 const uint64_t *fp = (const uint64_t *)sgl;
 1336                 struct work_request_hdr *wp = wrp;
 1337                 
 1338                 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
 1339                     V_WR_SGLSFLT(flits)) | wr_hi;
 1340                 
 1341                 while (sgl_flits) {
 1342                         unsigned int avail = WR_FLITS - flits;
 1343 
 1344                         if (avail > sgl_flits)
 1345                                 avail = sgl_flits;
 1346                         memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
 1347                         sgl_flits -= avail;
 1348                         ndesc--;
 1349                         if (!sgl_flits)
 1350                                 break;
 1351                         
 1352                         fp += avail;
 1353                         txd++;
 1354                         txsd++;
 1355                         if (++txqs->pidx == txq->size) {
 1356                                 txqs->pidx = 0;
 1357                                 txqs->gen ^= 1;
 1358                                 txd = txq->desc;
 1359                                 txsd = txq->sdesc;
 1360                         }
 1361 
 1362                         /*
 1363                          * when the head of the mbuf chain
 1364                          * is freed all clusters will be freed
 1365                          * with it
 1366                          */
 1367                         wrp = (struct work_request_hdr *)txd;
 1368                         wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
 1369                             V_WR_SGLSFLT(1)) | wr_hi;
 1370                         wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
 1371                                     sgl_flits + 1)) |
 1372                             V_WR_GEN(txqs->gen)) | wr_lo;
 1373                         wr_gen2(txd, txqs->gen);
 1374                         flits = 1;
 1375                 }
 1376                 wrp->wrh_hi |= htonl(F_WR_EOP);
 1377                 wmb();
 1378                 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
 1379                 wr_gen2((struct tx_desc *)wp, ogen);
 1380         }
 1381 }
 1382 
 1383 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
 1384 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
 1385 
 1386 #define GET_VTAG(cntrl, m) \
 1387 do { \
 1388         if ((m)->m_flags & M_VLANTAG)                                               \
 1389                 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
 1390 } while (0)
 1391 
 1392 static int
 1393 t3_encap(struct sge_qset *qs, struct mbuf **m)
 1394 {
 1395         adapter_t *sc;
 1396         struct mbuf *m0;
 1397         struct sge_txq *txq;
 1398         struct txq_state txqs;
 1399         struct port_info *pi;
 1400         unsigned int ndesc, flits, cntrl, mlen;
 1401         int err, nsegs, tso_info = 0;
 1402 
 1403         struct work_request_hdr *wrp;
 1404         struct tx_sw_desc *txsd;
 1405         struct sg_ent *sgp, *sgl;
 1406         uint32_t wr_hi, wr_lo, sgl_flits; 
 1407         bus_dma_segment_t segs[TX_MAX_SEGS];
 1408 
 1409         struct tx_desc *txd;
 1410                 
 1411         pi = qs->port;
 1412         sc = pi->adapter;
 1413         txq = &qs->txq[TXQ_ETH];
 1414         txd = &txq->desc[txq->pidx];
 1415         txsd = &txq->sdesc[txq->pidx];
 1416         sgl = txq->txq_sgl;
 1417 
 1418         prefetch(txd);
 1419         m0 = *m;
 1420 
 1421         mtx_assert(&qs->lock, MA_OWNED);
 1422         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1423         KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
 1424         
 1425         if  (m0->m_nextpkt == NULL && m0->m_next != NULL &&
 1426             m0->m_pkthdr.csum_flags & (CSUM_TSO))
 1427                 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
 1428 
 1429         if (m0->m_nextpkt != NULL) {
 1430                 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
 1431                 ndesc = 1;
 1432                 mlen = 0;
 1433         } else {
 1434                 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
 1435                     &m0, segs, &nsegs))) {
 1436                         if (cxgb_debug)
 1437                                 printf("failed ... err=%d\n", err);
 1438                         return (err);
 1439                 }
 1440                 mlen = m0->m_pkthdr.len;
 1441                 ndesc = calc_tx_descs(m0, nsegs);
 1442         }
 1443         txq_prod(txq, ndesc, &txqs);
 1444 
 1445         KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
 1446         txsd->m = m0;
 1447 
 1448         if (m0->m_nextpkt != NULL) {
 1449                 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
 1450                 int i, fidx;
 1451 
 1452                 if (nsegs > 7)
 1453                         panic("trying to coalesce %d packets in to one WR", nsegs);
 1454                 txq->txq_coalesced += nsegs;
 1455                 wrp = (struct work_request_hdr *)txd;
 1456                 flits = nsegs*2 + 1;
 1457 
 1458                 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
 1459                         struct cpl_tx_pkt_batch_entry *cbe;
 1460                         uint64_t flit;
 1461                         uint32_t *hflit = (uint32_t *)&flit;
 1462                         int cflags = m0->m_pkthdr.csum_flags;
 1463 
 1464                         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1465                         GET_VTAG(cntrl, m0);
 1466                         cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1467                         if (__predict_false(!(cflags & CSUM_IP)))
 1468                                 cntrl |= F_TXPKT_IPCSUM_DIS;
 1469                         if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP))))
 1470                                 cntrl |= F_TXPKT_L4CSUM_DIS;
 1471 
 1472                         hflit[0] = htonl(cntrl);
 1473                         hflit[1] = htonl(segs[i].ds_len | 0x80000000);
 1474                         flit |= htobe64(1 << 24);
 1475                         cbe = &cpl_batch->pkt_entry[i];
 1476                         cbe->cntrl = hflit[0];
 1477                         cbe->len = hflit[1];
 1478                         cbe->addr = htobe64(segs[i].ds_addr);
 1479                 }
 1480 
 1481                 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1482                     V_WR_SGLSFLT(flits)) |
 1483                     htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1484                 wr_lo = htonl(V_WR_LEN(flits) |
 1485                     V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
 1486                 set_wr_hdr(wrp, wr_hi, wr_lo);
 1487                 wmb();
 1488                 ETHER_BPF_MTAP(pi->ifp, m0);
 1489                 wr_gen2(txd, txqs.gen);
 1490                 check_ring_tx_db(sc, txq, 0);
 1491                 return (0);             
 1492         } else if (tso_info) {
 1493                 int eth_type;
 1494                 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
 1495                 struct ether_header *eh;
 1496                 struct ip *ip;
 1497                 struct tcphdr *tcp;
 1498 
 1499                 txd->flit[2] = 0;
 1500                 GET_VTAG(cntrl, m0);
 1501                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
 1502                 hdr->cntrl = htonl(cntrl);
 1503                 hdr->len = htonl(mlen | 0x80000000);
 1504 
 1505                 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
 1506                         printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
 1507                             m0, mlen, m0->m_pkthdr.tso_segsz,
 1508                             m0->m_pkthdr.csum_flags, m0->m_flags);
 1509                         panic("tx tso packet too small");
 1510                 }
 1511 
 1512                 /* Make sure that ether, ip, tcp headers are all in m0 */
 1513                 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
 1514                         m0 = m_pullup(m0, TCPPKTHDRSIZE);
 1515                         if (__predict_false(m0 == NULL)) {
 1516                                 /* XXX panic probably an overreaction */
 1517                                 panic("couldn't fit header into mbuf");
 1518                         }
 1519                 }
 1520 
 1521                 eh = mtod(m0, struct ether_header *);
 1522                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 1523                         eth_type = CPL_ETH_II_VLAN;
 1524                         ip = (struct ip *)((struct ether_vlan_header *)eh + 1);
 1525                 } else {
 1526                         eth_type = CPL_ETH_II;
 1527                         ip = (struct ip *)(eh + 1);
 1528                 }
 1529                 tcp = (struct tcphdr *)(ip + 1);
 1530 
 1531                 tso_info |= V_LSO_ETH_TYPE(eth_type) |
 1532                             V_LSO_IPHDR_WORDS(ip->ip_hl) |
 1533                             V_LSO_TCPHDR_WORDS(tcp->th_off);
 1534                 hdr->lso_info = htonl(tso_info);
 1535 
 1536                 if (__predict_false(mlen <= PIO_LEN)) {
 1537                         /*
 1538                          * pkt not undersized but fits in PIO_LEN
 1539                          * Indicates a TSO bug at the higher levels.
 1540                          */
 1541                         txsd->m = NULL;
 1542                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
 1543                         flits = (mlen + 7) / 8 + 3;
 1544                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1545                                           V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1546                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1547                         wr_lo = htonl(V_WR_LEN(flits) |
 1548                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1549                         set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
 1550                         wmb();
 1551                         ETHER_BPF_MTAP(pi->ifp, m0);
 1552                         wr_gen2(txd, txqs.gen);
 1553                         check_ring_tx_db(sc, txq, 0);
 1554                         m_freem(m0);
 1555                         return (0);
 1556                 }
 1557                 flits = 3;      
 1558         } else {
 1559                 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
 1560                 
 1561                 GET_VTAG(cntrl, m0);
 1562                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1563                 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
 1564                         cntrl |= F_TXPKT_IPCSUM_DIS;
 1565                 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
 1566                         cntrl |= F_TXPKT_L4CSUM_DIS;
 1567                 cpl->cntrl = htonl(cntrl);
 1568                 cpl->len = htonl(mlen | 0x80000000);
 1569 
 1570                 if (mlen <= PIO_LEN) {
 1571                         txsd->m = NULL;
 1572                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
 1573                         flits = (mlen + 7) / 8 + 2;
 1574                         
 1575                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1576                             V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1577                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1578                         wr_lo = htonl(V_WR_LEN(flits) |
 1579                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1580                         set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
 1581                         wmb();
 1582                         ETHER_BPF_MTAP(pi->ifp, m0);
 1583                         wr_gen2(txd, txqs.gen);
 1584                         check_ring_tx_db(sc, txq, 0);
 1585                         m_freem(m0);
 1586                         return (0);
 1587                 }
 1588                 flits = 2;
 1589         }
 1590         wrp = (struct work_request_hdr *)txd;
 1591         sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
 1592         make_sgl(sgp, segs, nsegs);
 1593 
 1594         sgl_flits = sgl_len(nsegs);
 1595 
 1596         ETHER_BPF_MTAP(pi->ifp, m0);
 1597 
 1598         KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
 1599         wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1600         wr_lo = htonl(V_WR_TID(txq->token));
 1601         write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
 1602             sgl_flits, wr_hi, wr_lo);
 1603         check_ring_tx_db(sc, txq, 0);
 1604 
 1605         return (0);
 1606 }
 1607 
 1608 void
 1609 cxgb_tx_watchdog(void *arg)
 1610 {
 1611         struct sge_qset *qs = arg;
 1612         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1613 
 1614         if (qs->coalescing != 0 &&
 1615             (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
 1616             TXQ_RING_EMPTY(qs))
 1617                 qs->coalescing = 0; 
 1618         else if (qs->coalescing == 0 &&
 1619             (txq->in_use >= cxgb_tx_coalesce_enable_start))
 1620                 qs->coalescing = 1;
 1621         if (TXQ_TRYLOCK(qs)) {
 1622                 qs->qs_flags |= QS_FLUSHING;
 1623                 cxgb_start_locked(qs);
 1624                 qs->qs_flags &= ~QS_FLUSHING;
 1625                 TXQ_UNLOCK(qs);
 1626         }
 1627         if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
 1628                 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
 1629                     qs, txq->txq_watchdog.c_cpu);
 1630 }
 1631 
 1632 static void
 1633 cxgb_tx_timeout(void *arg)
 1634 {
 1635         struct sge_qset *qs = arg;
 1636         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1637 
 1638         if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
 1639                 qs->coalescing = 1;     
 1640         if (TXQ_TRYLOCK(qs)) {
 1641                 qs->qs_flags |= QS_TIMEOUT;
 1642                 cxgb_start_locked(qs);
 1643                 qs->qs_flags &= ~QS_TIMEOUT;
 1644                 TXQ_UNLOCK(qs);
 1645         }
 1646 }
 1647 
 1648 static void
 1649 cxgb_start_locked(struct sge_qset *qs)
 1650 {
 1651         struct mbuf *m_head = NULL;
 1652         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1653         struct port_info *pi = qs->port;
 1654         struct ifnet *ifp = pi->ifp;
 1655 
 1656         if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
 1657                 reclaim_completed_tx(qs, 0, TXQ_ETH);
 1658 
 1659         if (!pi->link_config.link_ok) {
 1660                 TXQ_RING_FLUSH(qs);
 1661                 return;
 1662         }
 1663         TXQ_LOCK_ASSERT(qs);
 1664         while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
 1665             pi->link_config.link_ok) {
 1666                 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1667 
 1668                 if (txq->size - txq->in_use <= TX_MAX_DESC)
 1669                         break;
 1670 
 1671                 if ((m_head = cxgb_dequeue(qs)) == NULL)
 1672                         break;
 1673                 /*
 1674                  *  Encapsulation can modify our pointer, and or make it
 1675                  *  NULL on failure.  In that event, we can't requeue.
 1676                  */
 1677                 if (t3_encap(qs, &m_head) || m_head == NULL)
 1678                         break;
 1679 
 1680                 m_head = NULL;
 1681         }
 1682 
 1683         if (txq->db_pending)
 1684                 check_ring_tx_db(pi->adapter, txq, 1);
 1685 
 1686         if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
 1687             pi->link_config.link_ok)
 1688                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1689                     qs, txq->txq_timer.c_cpu);
 1690         if (m_head != NULL)
 1691                 m_freem(m_head);
 1692 }
 1693 
 1694 static int
 1695 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
 1696 {
 1697         struct port_info *pi = qs->port;
 1698         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1699         struct buf_ring *br = txq->txq_mr;
 1700         int error, avail;
 1701 
 1702         avail = txq->size - txq->in_use;
 1703         TXQ_LOCK_ASSERT(qs);
 1704 
 1705         /*
 1706          * We can only do a direct transmit if the following are true:
 1707          * - we aren't coalescing (ring < 3/4 full)
 1708          * - the link is up -- checked in caller
 1709          * - there are no packets enqueued already
 1710          * - there is space in hardware transmit queue 
 1711          */
 1712         if (check_pkt_coalesce(qs) == 0 &&
 1713             !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
 1714                 if (t3_encap(qs, &m)) {
 1715                         if (m != NULL &&
 1716                             (error = drbr_enqueue(ifp, br, m)) != 0) 
 1717                                 return (error);
 1718                 } else {
 1719                         if (txq->db_pending)
 1720                                 check_ring_tx_db(pi->adapter, txq, 1);
 1721 
 1722                         /*
 1723                          * We've bypassed the buf ring so we need to update
 1724                          * the stats directly
 1725                          */
 1726                         txq->txq_direct_packets++;
 1727                         txq->txq_direct_bytes += m->m_pkthdr.len;
 1728                 }
 1729         } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
 1730                 return (error);
 1731 
 1732         reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1733         if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
 1734             (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
 1735                 cxgb_start_locked(qs);
 1736         else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
 1737                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1738                     qs, txq->txq_timer.c_cpu);
 1739         return (0);
 1740 }
 1741 
 1742 int
 1743 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
 1744 {
 1745         struct sge_qset *qs;
 1746         struct port_info *pi = ifp->if_softc;
 1747         int error, qidx = pi->first_qset;
 1748 
 1749         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
 1750             ||(!pi->link_config.link_ok)) {
 1751                 m_freem(m);
 1752                 return (0);
 1753         }
 1754         
 1755         if (m->m_flags & M_FLOWID)
 1756                 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
 1757 
 1758         qs = &pi->adapter->sge.qs[qidx];
 1759         
 1760         if (TXQ_TRYLOCK(qs)) {
 1761                 /* XXX running */
 1762                 error = cxgb_transmit_locked(ifp, qs, m);
 1763                 TXQ_UNLOCK(qs);
 1764         } else
 1765                 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
 1766         return (error);
 1767 }
 1768 void
 1769 cxgb_start(struct ifnet *ifp)
 1770 {
 1771         struct port_info *pi = ifp->if_softc;
 1772         struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset];
 1773         
 1774         if (!pi->link_config.link_ok)
 1775                 return;
 1776 
 1777         TXQ_LOCK(qs);
 1778         cxgb_start_locked(qs);
 1779         TXQ_UNLOCK(qs);
 1780 }
 1781 
 1782 void
 1783 cxgb_qflush(struct ifnet *ifp)
 1784 {
 1785         /*
 1786          * flush any enqueued mbufs in the buf_rings
 1787          * and in the transmit queues
 1788          * no-op for now
 1789          */
 1790         return;
 1791 }
 1792 
 1793 /**
 1794  *      write_imm - write a packet into a Tx descriptor as immediate data
 1795  *      @d: the Tx descriptor to write
 1796  *      @m: the packet
 1797  *      @len: the length of packet data to write as immediate data
 1798  *      @gen: the generation bit value to write
 1799  *
 1800  *      Writes a packet as immediate data into a Tx descriptor.  The packet
 1801  *      contains a work request at its beginning.  We must write the packet
 1802  *      carefully so the SGE doesn't read accidentally before it's written in
 1803  *      its entirety.
 1804  */
 1805 static __inline void
 1806 write_imm(struct tx_desc *d, struct mbuf *m,
 1807           unsigned int len, unsigned int gen)
 1808 {
 1809         struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
 1810         struct work_request_hdr *to = (struct work_request_hdr *)d;
 1811         uint32_t wr_hi, wr_lo;
 1812 
 1813         if (len > WR_LEN)
 1814                 panic("len too big %d\n", len);
 1815         if (len < sizeof(*from))
 1816                 panic("len too small %d", len);
 1817         
 1818         memcpy(&to[1], &from[1], len - sizeof(*from));
 1819         wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
 1820                                         V_WR_BCNTLFLT(len & 7));
 1821         wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) |
 1822                                         V_WR_LEN((len + 7) / 8));
 1823         set_wr_hdr(to, wr_hi, wr_lo);
 1824         wmb();
 1825         wr_gen2(d, gen);
 1826 
 1827         /*
 1828          * This check is a hack we should really fix the logic so
 1829          * that this can't happen
 1830          */
 1831         if (m->m_type != MT_DONTFREE)
 1832                 m_freem(m);
 1833         
 1834 }
 1835 
 1836 /**
 1837  *      check_desc_avail - check descriptor availability on a send queue
 1838  *      @adap: the adapter
 1839  *      @q: the TX queue
 1840  *      @m: the packet needing the descriptors
 1841  *      @ndesc: the number of Tx descriptors needed
 1842  *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
 1843  *
 1844  *      Checks if the requested number of Tx descriptors is available on an
 1845  *      SGE send queue.  If the queue is already suspended or not enough
 1846  *      descriptors are available the packet is queued for later transmission.
 1847  *      Must be called with the Tx queue locked.
 1848  *
 1849  *      Returns 0 if enough descriptors are available, 1 if there aren't
 1850  *      enough descriptors and the packet has been queued, and 2 if the caller
 1851  *      needs to retry because there weren't enough descriptors at the
 1852  *      beginning of the call but some freed up in the mean time.
 1853  */
 1854 static __inline int
 1855 check_desc_avail(adapter_t *adap, struct sge_txq *q,
 1856                  struct mbuf *m, unsigned int ndesc,
 1857                  unsigned int qid)
 1858 {
 1859         /* 
 1860          * XXX We currently only use this for checking the control queue
 1861          * the control queue is only used for binding qsets which happens
 1862          * at init time so we are guaranteed enough descriptors
 1863          */
 1864         if (__predict_false(!mbufq_empty(&q->sendq))) {
 1865 addq_exit:      mbufq_tail(&q->sendq, m);
 1866                 return 1;
 1867         }
 1868         if (__predict_false(q->size - q->in_use < ndesc)) {
 1869 
 1870                 struct sge_qset *qs = txq_to_qset(q, qid);
 1871 
 1872                 setbit(&qs->txq_stopped, qid);
 1873                 if (should_restart_tx(q) &&
 1874                     test_and_clear_bit(qid, &qs->txq_stopped))
 1875                         return 2;
 1876 
 1877                 q->stops++;
 1878                 goto addq_exit;
 1879         }
 1880         return 0;
 1881 }
 1882 
 1883 
 1884 /**
 1885  *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
 1886  *      @q: the SGE control Tx queue
 1887  *
 1888  *      This is a variant of reclaim_completed_tx() that is used for Tx queues
 1889  *      that send only immediate data (presently just the control queues) and
 1890  *      thus do not have any mbufs
 1891  */
 1892 static __inline void
 1893 reclaim_completed_tx_imm(struct sge_txq *q)
 1894 {
 1895         unsigned int reclaim = q->processed - q->cleaned;
 1896 
 1897         q->in_use -= reclaim;
 1898         q->cleaned += reclaim;
 1899 }
 1900 
 1901 static __inline int
 1902 immediate(const struct mbuf *m)
 1903 {
 1904         return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
 1905 }
 1906 
 1907 /**
 1908  *      ctrl_xmit - send a packet through an SGE control Tx queue
 1909  *      @adap: the adapter
 1910  *      @q: the control queue
 1911  *      @m: the packet
 1912  *
 1913  *      Send a packet through an SGE control Tx queue.  Packets sent through
 1914  *      a control queue must fit entirely as immediate data in a single Tx
 1915  *      descriptor and have no page fragments.
 1916  */
 1917 static int
 1918 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 1919 {
 1920         int ret;
 1921         struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
 1922         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1923         
 1924         if (__predict_false(!immediate(m))) {
 1925                 m_freem(m);
 1926                 return 0;
 1927         }
 1928         
 1929         wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
 1930         wrp->wrh_lo = htonl(V_WR_TID(q->token));
 1931 
 1932         TXQ_LOCK(qs);
 1933 again:  reclaim_completed_tx_imm(q);
 1934 
 1935         ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
 1936         if (__predict_false(ret)) {
 1937                 if (ret == 1) {
 1938                         TXQ_UNLOCK(qs);
 1939                         return (ENOSPC);
 1940                 }
 1941                 goto again;
 1942         }
 1943         write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
 1944         
 1945         q->in_use++;
 1946         if (++q->pidx >= q->size) {
 1947                 q->pidx = 0;
 1948                 q->gen ^= 1;
 1949         }
 1950         TXQ_UNLOCK(qs);
 1951         wmb();
 1952         t3_write_reg(adap, A_SG_KDOORBELL,
 1953                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1954         return (0);
 1955 }
 1956 
 1957 
 1958 /**
 1959  *      restart_ctrlq - restart a suspended control queue
 1960  *      @qs: the queue set cotaining the control queue
 1961  *
 1962  *      Resumes transmission on a suspended Tx control queue.
 1963  */
 1964 static void
 1965 restart_ctrlq(void *data, int npending)
 1966 {
 1967         struct mbuf *m;
 1968         struct sge_qset *qs = (struct sge_qset *)data;
 1969         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1970         adapter_t *adap = qs->port->adapter;
 1971 
 1972         TXQ_LOCK(qs);
 1973 again:  reclaim_completed_tx_imm(q);
 1974 
 1975         while (q->in_use < q->size &&
 1976                (m = mbufq_dequeue(&q->sendq)) != NULL) {
 1977 
 1978                 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
 1979 
 1980                 if (++q->pidx >= q->size) {
 1981                         q->pidx = 0;
 1982                         q->gen ^= 1;
 1983                 }
 1984                 q->in_use++;
 1985         }
 1986         if (!mbufq_empty(&q->sendq)) {
 1987                 setbit(&qs->txq_stopped, TXQ_CTRL);
 1988 
 1989                 if (should_restart_tx(q) &&
 1990                     test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
 1991                         goto again;
 1992                 q->stops++;
 1993         }
 1994         TXQ_UNLOCK(qs);
 1995         t3_write_reg(adap, A_SG_KDOORBELL,
 1996                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1997 }
 1998 
 1999 
 2000 /*
 2001  * Send a management message through control queue 0
 2002  */
 2003 int
 2004 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
 2005 {
 2006         return ctrl_xmit(adap, &adap->sge.qs[0], m);
 2007 }
 2008 
 2009 /**
 2010  *      free_qset - free the resources of an SGE queue set
 2011  *      @sc: the controller owning the queue set
 2012  *      @q: the queue set
 2013  *
 2014  *      Release the HW and SW resources associated with an SGE queue set, such
 2015  *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 2016  *      queue set must be quiesced prior to calling this.
 2017  */
 2018 static void
 2019 t3_free_qset(adapter_t *sc, struct sge_qset *q)
 2020 {
 2021         int i;
 2022         
 2023         reclaim_completed_tx(q, 0, TXQ_ETH);
 2024         if (q->txq[TXQ_ETH].txq_mr != NULL) 
 2025                 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
 2026         if (q->txq[TXQ_ETH].txq_ifq != NULL) {
 2027                 ifq_delete(q->txq[TXQ_ETH].txq_ifq);
 2028                 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
 2029         }
 2030 
 2031         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2032                 if (q->fl[i].desc) {
 2033                         mtx_lock_spin(&sc->sge.reg_lock);
 2034                         t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
 2035                         mtx_unlock_spin(&sc->sge.reg_lock);
 2036                         bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
 2037                         bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
 2038                                         q->fl[i].desc_map);
 2039                         bus_dma_tag_destroy(q->fl[i].desc_tag);
 2040                         bus_dma_tag_destroy(q->fl[i].entry_tag);
 2041                 }
 2042                 if (q->fl[i].sdesc) {
 2043                         free_rx_bufs(sc, &q->fl[i]);
 2044                         free(q->fl[i].sdesc, M_DEVBUF);
 2045                 }
 2046         }
 2047 
 2048         mtx_unlock(&q->lock);
 2049         MTX_DESTROY(&q->lock);
 2050         for (i = 0; i < SGE_TXQ_PER_SET; i++) {
 2051                 if (q->txq[i].desc) {
 2052                         mtx_lock_spin(&sc->sge.reg_lock);
 2053                         t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
 2054                         mtx_unlock_spin(&sc->sge.reg_lock);
 2055                         bus_dmamap_unload(q->txq[i].desc_tag,
 2056                                         q->txq[i].desc_map);
 2057                         bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
 2058                                         q->txq[i].desc_map);
 2059                         bus_dma_tag_destroy(q->txq[i].desc_tag);
 2060                         bus_dma_tag_destroy(q->txq[i].entry_tag);
 2061                 }
 2062                 if (q->txq[i].sdesc) {
 2063                         free(q->txq[i].sdesc, M_DEVBUF);
 2064                 }
 2065         }
 2066 
 2067         if (q->rspq.desc) {
 2068                 mtx_lock_spin(&sc->sge.reg_lock);
 2069                 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
 2070                 mtx_unlock_spin(&sc->sge.reg_lock);
 2071                 
 2072                 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
 2073                 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
 2074                                 q->rspq.desc_map);
 2075                 bus_dma_tag_destroy(q->rspq.desc_tag);
 2076                 MTX_DESTROY(&q->rspq.lock);
 2077         }
 2078 
 2079 #ifdef INET
 2080         tcp_lro_free(&q->lro.ctrl);
 2081 #endif
 2082 
 2083         bzero(q, sizeof(*q));
 2084 }
 2085 
 2086 /**
 2087  *      t3_free_sge_resources - free SGE resources
 2088  *      @sc: the adapter softc
 2089  *
 2090  *      Frees resources used by the SGE queue sets.
 2091  */
 2092 void
 2093 t3_free_sge_resources(adapter_t *sc)
 2094 {
 2095         int i, nqsets;
 2096         
 2097         for (nqsets = i = 0; i < (sc)->params.nports; i++) 
 2098                 nqsets += sc->port[i].nqsets;
 2099 
 2100         for (i = 0; i < nqsets; ++i) {
 2101                 TXQ_LOCK(&sc->sge.qs[i]);
 2102                 t3_free_qset(sc, &sc->sge.qs[i]);
 2103         }
 2104         
 2105 }
 2106 
 2107 /**
 2108  *      t3_sge_start - enable SGE
 2109  *      @sc: the controller softc
 2110  *
 2111  *      Enables the SGE for DMAs.  This is the last step in starting packet
 2112  *      transfers.
 2113  */
 2114 void
 2115 t3_sge_start(adapter_t *sc)
 2116 {
 2117         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
 2118 }
 2119 
 2120 /**
 2121  *      t3_sge_stop - disable SGE operation
 2122  *      @sc: the adapter
 2123  *
 2124  *      Disables the DMA engine.  This can be called in emeregencies (e.g.,
 2125  *      from error interrupts) or from normal process context.  In the latter
 2126  *      case it also disables any pending queue restart tasklets.  Note that
 2127  *      if it is called in interrupt context it cannot disable the restart
 2128  *      tasklets as it cannot wait, however the tasklets will have no effect
 2129  *      since the doorbells are disabled and the driver will call this again
 2130  *      later from process context, at which time the tasklets will be stopped
 2131  *      if they are still running.
 2132  */
 2133 void
 2134 t3_sge_stop(adapter_t *sc)
 2135 {
 2136         int i, nqsets;
 2137         
 2138         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
 2139 
 2140         if (sc->tq == NULL)
 2141                 return;
 2142         
 2143         for (nqsets = i = 0; i < (sc)->params.nports; i++) 
 2144                 nqsets += sc->port[i].nqsets;
 2145 #ifdef notyet
 2146         /*
 2147          * 
 2148          * XXX
 2149          */
 2150         for (i = 0; i < nqsets; ++i) {
 2151                 struct sge_qset *qs = &sc->sge.qs[i];
 2152                 
 2153                 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2154                 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2155         }
 2156 #endif
 2157 }
 2158 
 2159 /**
 2160  *      t3_free_tx_desc - reclaims Tx descriptors and their buffers
 2161  *      @adapter: the adapter
 2162  *      @q: the Tx queue to reclaim descriptors from
 2163  *      @reclaimable: the number of descriptors to reclaim
 2164  *      @m_vec_size: maximum number of buffers to reclaim
 2165  *      @desc_reclaimed: returns the number of descriptors reclaimed
 2166  *
 2167  *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 2168  *      Tx buffers.  Called with the Tx queue lock held.
 2169  *
 2170  *      Returns number of buffers of reclaimed   
 2171  */
 2172 void
 2173 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
 2174 {
 2175         struct tx_sw_desc *txsd;
 2176         unsigned int cidx, mask;
 2177         struct sge_txq *q = &qs->txq[queue];
 2178 
 2179 #ifdef T3_TRACE
 2180         T3_TRACE2(sc->tb[q->cntxt_id & 7],
 2181                   "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
 2182 #endif
 2183         cidx = q->cidx;
 2184         mask = q->size - 1;
 2185         txsd = &q->sdesc[cidx];
 2186 
 2187         mtx_assert(&qs->lock, MA_OWNED);
 2188         while (reclaimable--) {
 2189                 prefetch(q->sdesc[(cidx + 1) & mask].m);
 2190                 prefetch(q->sdesc[(cidx + 2) & mask].m);
 2191 
 2192                 if (txsd->m != NULL) {
 2193                         if (txsd->flags & TX_SW_DESC_MAPPED) {
 2194                                 bus_dmamap_unload(q->entry_tag, txsd->map);
 2195                                 txsd->flags &= ~TX_SW_DESC_MAPPED;
 2196                         }
 2197                         m_freem_list(txsd->m);
 2198                         txsd->m = NULL;
 2199                 } else
 2200                         q->txq_skipped++;
 2201                 
 2202                 ++txsd;
 2203                 if (++cidx == q->size) {
 2204                         cidx = 0;
 2205                         txsd = q->sdesc;
 2206                 }
 2207         }
 2208         q->cidx = cidx;
 2209 
 2210 }
 2211 
 2212 /**
 2213  *      is_new_response - check if a response is newly written
 2214  *      @r: the response descriptor
 2215  *      @q: the response queue
 2216  *
 2217  *      Returns true if a response descriptor contains a yet unprocessed
 2218  *      response.
 2219  */
 2220 static __inline int
 2221 is_new_response(const struct rsp_desc *r,
 2222     const struct sge_rspq *q)
 2223 {
 2224         return (r->intr_gen & F_RSPD_GEN2) == q->gen;
 2225 }
 2226 
 2227 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
 2228 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
 2229                         V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
 2230                         V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
 2231                         V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
 2232 
 2233 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
 2234 #define NOMEM_INTR_DELAY 2500
 2235 
 2236 /**
 2237  *      write_ofld_wr - write an offload work request
 2238  *      @adap: the adapter
 2239  *      @m: the packet to send
 2240  *      @q: the Tx queue
 2241  *      @pidx: index of the first Tx descriptor to write
 2242  *      @gen: the generation value to use
 2243  *      @ndesc: number of descriptors the packet will occupy
 2244  *
 2245  *      Write an offload work request to send the supplied packet.  The packet
 2246  *      data already carry the work request with most fields populated.
 2247  */
 2248 static void
 2249 write_ofld_wr(adapter_t *adap, struct mbuf *m,
 2250     struct sge_txq *q, unsigned int pidx,
 2251     unsigned int gen, unsigned int ndesc,
 2252     bus_dma_segment_t *segs, unsigned int nsegs)
 2253 {
 2254         unsigned int sgl_flits, flits;
 2255         struct work_request_hdr *from;
 2256         struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
 2257         struct tx_desc *d = &q->desc[pidx];
 2258         struct txq_state txqs;
 2259         
 2260         if (immediate(m) && nsegs == 0) {
 2261                 write_imm(d, m, m->m_len, gen);
 2262                 return;
 2263         }
 2264 
 2265         /* Only TX_DATA builds SGLs */
 2266         from = mtod(m, struct work_request_hdr *);
 2267         memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
 2268 
 2269         flits = m->m_len / 8;
 2270         sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
 2271 
 2272         make_sgl(sgp, segs, nsegs);
 2273         sgl_flits = sgl_len(nsegs);
 2274 
 2275         txqs.gen = gen;
 2276         txqs.pidx = pidx;
 2277         txqs.compl = 0;
 2278 
 2279         write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
 2280             from->wrh_hi, from->wrh_lo);
 2281 }
 2282 
 2283 /**
 2284  *      calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
 2285  *      @m: the packet
 2286  *
 2287  *      Returns the number of Tx descriptors needed for the given offload
 2288  *      packet.  These packets are already fully constructed.
 2289  */
 2290 static __inline unsigned int
 2291 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
 2292 {
 2293         unsigned int flits, cnt = 0;
 2294         int ndescs;
 2295 
 2296         if (m->m_len <= WR_LEN && nsegs == 0)
 2297                 return (1);                 /* packet fits as immediate data */
 2298 
 2299         /*
 2300          * This needs to be re-visited for TOE
 2301          */
 2302 
 2303         cnt = nsegs;
 2304                 
 2305         /* headers */
 2306         flits = m->m_len / 8;
 2307 
 2308         ndescs = flits_to_desc(flits + sgl_len(cnt));
 2309 
 2310         return (ndescs);
 2311 }
 2312 
 2313 /**
 2314  *      ofld_xmit - send a packet through an offload queue
 2315  *      @adap: the adapter
 2316  *      @q: the Tx offload queue
 2317  *      @m: the packet
 2318  *
 2319  *      Send an offload packet through an SGE offload queue.
 2320  */
 2321 static int
 2322 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 2323 {
 2324         int ret, nsegs;
 2325         unsigned int ndesc;
 2326         unsigned int pidx, gen;
 2327         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2328         bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
 2329         struct tx_sw_desc *stx;
 2330 
 2331         nsegs = m_get_sgllen(m);
 2332         vsegs = m_get_sgl(m);
 2333         ndesc = calc_tx_descs_ofld(m, nsegs);
 2334         busdma_map_sgl(vsegs, segs, nsegs);
 2335 
 2336         stx = &q->sdesc[q->pidx];
 2337         
 2338         TXQ_LOCK(qs);
 2339 again:  reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2340         ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
 2341         if (__predict_false(ret)) {
 2342                 if (ret == 1) {
 2343                         printf("no ofld desc avail\n");
 2344                         
 2345                         m_set_priority(m, ndesc);     /* save for restart */
 2346                         TXQ_UNLOCK(qs);
 2347                         return (EINTR);
 2348                 }
 2349                 goto again;
 2350         }
 2351 
 2352         gen = q->gen;
 2353         q->in_use += ndesc;
 2354         pidx = q->pidx;
 2355         q->pidx += ndesc;
 2356         if (q->pidx >= q->size) {
 2357                 q->pidx -= q->size;
 2358                 q->gen ^= 1;
 2359         }
 2360 #ifdef T3_TRACE
 2361         T3_TRACE5(adap->tb[q->cntxt_id & 7],
 2362                   "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
 2363                   ndesc, pidx, skb->len, skb->len - skb->data_len,
 2364                   skb_shinfo(skb)->nr_frags);
 2365 #endif
 2366         TXQ_UNLOCK(qs);
 2367 
 2368         write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
 2369         check_ring_tx_db(adap, q, 1);
 2370         return (0);
 2371 }
 2372 
 2373 /**
 2374  *      restart_offloadq - restart a suspended offload queue
 2375  *      @qs: the queue set cotaining the offload queue
 2376  *
 2377  *      Resumes transmission on a suspended Tx offload queue.
 2378  */
 2379 static void
 2380 restart_offloadq(void *data, int npending)
 2381 {
 2382         struct mbuf *m;
 2383         struct sge_qset *qs = data;
 2384         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2385         adapter_t *adap = qs->port->adapter;
 2386         bus_dma_segment_t segs[TX_MAX_SEGS];
 2387         struct tx_sw_desc *stx = &q->sdesc[q->pidx];
 2388         int nsegs, cleaned;
 2389                 
 2390         TXQ_LOCK(qs);
 2391 again:  cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2392 
 2393         while ((m = mbufq_peek(&q->sendq)) != NULL) {
 2394                 unsigned int gen, pidx;
 2395                 unsigned int ndesc = m_get_priority(m);
 2396 
 2397                 if (__predict_false(q->size - q->in_use < ndesc)) {
 2398                         setbit(&qs->txq_stopped, TXQ_OFLD);
 2399                         if (should_restart_tx(q) &&
 2400                             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
 2401                                 goto again;
 2402                         q->stops++;
 2403                         break;
 2404                 }
 2405 
 2406                 gen = q->gen;
 2407                 q->in_use += ndesc;
 2408                 pidx = q->pidx;
 2409                 q->pidx += ndesc;
 2410                 if (q->pidx >= q->size) {
 2411                         q->pidx -= q->size;
 2412                         q->gen ^= 1;
 2413                 }
 2414                 
 2415                 (void)mbufq_dequeue(&q->sendq);
 2416                 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
 2417                 TXQ_UNLOCK(qs);
 2418                 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
 2419                 TXQ_LOCK(qs);
 2420         }
 2421 #if USE_GTS
 2422         set_bit(TXQ_RUNNING, &q->flags);
 2423         set_bit(TXQ_LAST_PKT_DB, &q->flags);
 2424 #endif
 2425         TXQ_UNLOCK(qs);
 2426         wmb();
 2427         t3_write_reg(adap, A_SG_KDOORBELL,
 2428                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 2429 }
 2430 
 2431 /**
 2432  *      queue_set - return the queue set a packet should use
 2433  *      @m: the packet
 2434  *
 2435  *      Maps a packet to the SGE queue set it should use.  The desired queue
 2436  *      set is carried in bits 1-3 in the packet's priority.
 2437  */
 2438 static __inline int
 2439 queue_set(const struct mbuf *m)
 2440 {
 2441         return m_get_priority(m) >> 1;
 2442 }
 2443 
 2444 /**
 2445  *      is_ctrl_pkt - return whether an offload packet is a control packet
 2446  *      @m: the packet
 2447  *
 2448  *      Determines whether an offload packet should use an OFLD or a CTRL
 2449  *      Tx queue.  This is indicated by bit 0 in the packet's priority.
 2450  */
 2451 static __inline int
 2452 is_ctrl_pkt(const struct mbuf *m)
 2453 {
 2454         return m_get_priority(m) & 1;
 2455 }
 2456 
 2457 /**
 2458  *      t3_offload_tx - send an offload packet
 2459  *      @tdev: the offload device to send to
 2460  *      @m: the packet
 2461  *
 2462  *      Sends an offload packet.  We use the packet priority to select the
 2463  *      appropriate Tx queue as follows: bit 0 indicates whether the packet
 2464  *      should be sent as regular or control, bits 1-3 select the queue set.
 2465  */
 2466 int
 2467 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
 2468 {
 2469         adapter_t *adap = tdev2adap(tdev);
 2470         struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
 2471 
 2472         if (__predict_false(is_ctrl_pkt(m))) 
 2473                 return ctrl_xmit(adap, qs, m);
 2474 
 2475         return ofld_xmit(adap, qs, m);
 2476 }
 2477 
 2478 /**
 2479  *      deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
 2480  *      @tdev: the offload device that will be receiving the packets
 2481  *      @q: the SGE response queue that assembled the bundle
 2482  *      @m: the partial bundle
 2483  *      @n: the number of packets in the bundle
 2484  *
 2485  *      Delivers a (partial) bundle of Rx offload packets to an offload device.
 2486  */
 2487 static __inline void
 2488 deliver_partial_bundle(struct t3cdev *tdev,
 2489                         struct sge_rspq *q,
 2490                         struct mbuf *mbufs[], int n)
 2491 {
 2492         if (n) {
 2493                 q->offload_bundles++;
 2494                 cxgb_ofld_recv(tdev, mbufs, n);
 2495         }
 2496 }
 2497 
 2498 static __inline int
 2499 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
 2500     struct mbuf *m, struct mbuf *rx_gather[],
 2501     unsigned int gather_idx)
 2502 {
 2503         
 2504         rq->offload_pkts++;
 2505         m->m_pkthdr.header = mtod(m, void *);
 2506         rx_gather[gather_idx++] = m;
 2507         if (gather_idx == RX_BUNDLE_SIZE) {
 2508                 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
 2509                 gather_idx = 0;
 2510                 rq->offload_bundles++;
 2511         }
 2512         return (gather_idx);
 2513 }
 2514 
 2515 static void
 2516 restart_tx(struct sge_qset *qs)
 2517 {
 2518         struct adapter *sc = qs->port->adapter;
 2519         
 2520         
 2521         if (isset(&qs->txq_stopped, TXQ_OFLD) &&
 2522             should_restart_tx(&qs->txq[TXQ_OFLD]) &&
 2523             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
 2524                 qs->txq[TXQ_OFLD].restarts++;
 2525                 DPRINTF("restarting TXQ_OFLD\n");
 2526                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2527         }
 2528         DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
 2529             qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
 2530             qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
 2531             qs->txq[TXQ_CTRL].in_use);
 2532         
 2533         if (isset(&qs->txq_stopped, TXQ_CTRL) &&
 2534             should_restart_tx(&qs->txq[TXQ_CTRL]) &&
 2535             test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
 2536                 qs->txq[TXQ_CTRL].restarts++;
 2537                 DPRINTF("restarting TXQ_CTRL\n");
 2538                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2539         }
 2540 }
 2541 
 2542 /**
 2543  *      t3_sge_alloc_qset - initialize an SGE queue set
 2544  *      @sc: the controller softc
 2545  *      @id: the queue set id
 2546  *      @nports: how many Ethernet ports will be using this queue set
 2547  *      @irq_vec_idx: the IRQ vector index for response queue interrupts
 2548  *      @p: configuration parameters for this queue set
 2549  *      @ntxq: number of Tx queues for the queue set
 2550  *      @pi: port info for queue set
 2551  *
 2552  *      Allocate resources and initialize an SGE queue set.  A queue set
 2553  *      comprises a response queue, two Rx free-buffer queues, and up to 3
 2554  *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
 2555  *      queue, offload queue, and control queue.
 2556  */
 2557 int
 2558 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
 2559                   const struct qset_params *p, int ntxq, struct port_info *pi)
 2560 {
 2561         struct sge_qset *q = &sc->sge.qs[id];
 2562         int i, ret = 0;
 2563 
 2564         MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
 2565         q->port = pi;
 2566 
 2567         if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
 2568             M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
 2569                 device_printf(sc->dev, "failed to allocate mbuf ring\n");
 2570                 goto err;
 2571         }
 2572         if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
 2573             M_NOWAIT | M_ZERO)) == NULL) {
 2574                 device_printf(sc->dev, "failed to allocate ifq\n");
 2575                 goto err;
 2576         }
 2577         ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);     
 2578         callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
 2579         callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
 2580         q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
 2581         q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
 2582 
 2583         init_qset_cntxt(q, id);
 2584         q->idx = id;
 2585         if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
 2586                     sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
 2587                     &q->fl[0].desc, &q->fl[0].sdesc,
 2588                     &q->fl[0].desc_tag, &q->fl[0].desc_map,
 2589                     sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
 2590                 printf("error %d from alloc ring fl0\n", ret);
 2591                 goto err;
 2592         }
 2593 
 2594         if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
 2595                     sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
 2596                     &q->fl[1].desc, &q->fl[1].sdesc,
 2597                     &q->fl[1].desc_tag, &q->fl[1].desc_map,
 2598                     sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
 2599                 printf("error %d from alloc ring fl1\n", ret);
 2600                 goto err;
 2601         }
 2602 
 2603         if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
 2604                     &q->rspq.phys_addr, &q->rspq.desc, NULL,
 2605                     &q->rspq.desc_tag, &q->rspq.desc_map,
 2606                     NULL, NULL)) != 0) {
 2607                 printf("error %d from alloc ring rspq\n", ret);
 2608                 goto err;
 2609         }
 2610 
 2611         for (i = 0; i < ntxq; ++i) {
 2612                 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
 2613 
 2614                 if ((ret = alloc_ring(sc, p->txq_size[i],
 2615                             sizeof(struct tx_desc), sz,
 2616                             &q->txq[i].phys_addr, &q->txq[i].desc,
 2617                             &q->txq[i].sdesc, &q->txq[i].desc_tag,
 2618                             &q->txq[i].desc_map,
 2619                             sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
 2620                         printf("error %d from alloc ring tx %i\n", ret, i);
 2621                         goto err;
 2622                 }
 2623                 mbufq_init(&q->txq[i].sendq);
 2624                 q->txq[i].gen = 1;
 2625                 q->txq[i].size = p->txq_size[i];
 2626         }
 2627         
 2628         TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
 2629         TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
 2630         TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2631         TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2632 
 2633         q->fl[0].gen = q->fl[1].gen = 1;
 2634         q->fl[0].size = p->fl_size;
 2635         q->fl[1].size = p->jumbo_size;
 2636 
 2637         q->rspq.gen = 1;
 2638         q->rspq.cidx = 0;
 2639         q->rspq.size = p->rspq_size;
 2640 
 2641         q->txq[TXQ_ETH].stop_thres = nports *
 2642             flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
 2643 
 2644         q->fl[0].buf_size = MCLBYTES;
 2645         q->fl[0].zone = zone_pack;
 2646         q->fl[0].type = EXT_PACKET;
 2647 
 2648         if (p->jumbo_buf_size ==  MJUM16BYTES) {
 2649                 q->fl[1].zone = zone_jumbo16;
 2650                 q->fl[1].type = EXT_JUMBO16;
 2651         } else if (p->jumbo_buf_size ==  MJUM9BYTES) {
 2652                 q->fl[1].zone = zone_jumbo9;
 2653                 q->fl[1].type = EXT_JUMBO9;             
 2654         } else if (p->jumbo_buf_size ==  MJUMPAGESIZE) {
 2655                 q->fl[1].zone = zone_jumbop;
 2656                 q->fl[1].type = EXT_JUMBOP;
 2657         } else {
 2658                 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
 2659                 ret = EDOOFUS;
 2660                 goto err;
 2661         }
 2662         q->fl[1].buf_size = p->jumbo_buf_size;
 2663 
 2664         /* Allocate and setup the lro_ctrl structure */
 2665         q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
 2666 #ifdef INET
 2667         ret = tcp_lro_init(&q->lro.ctrl);
 2668         if (ret) {
 2669                 printf("error %d from tcp_lro_init\n", ret);
 2670                 goto err;
 2671         }
 2672 #endif
 2673         q->lro.ctrl.ifp = pi->ifp;
 2674 
 2675         mtx_lock_spin(&sc->sge.reg_lock);
 2676         ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
 2677                                    q->rspq.phys_addr, q->rspq.size,
 2678                                    q->fl[0].buf_size, 1, 0);
 2679         if (ret) {
 2680                 printf("error %d from t3_sge_init_rspcntxt\n", ret);
 2681                 goto err_unlock;
 2682         }
 2683 
 2684         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2685                 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
 2686                                           q->fl[i].phys_addr, q->fl[i].size,
 2687                                           q->fl[i].buf_size, p->cong_thres, 1,
 2688                                           0);
 2689                 if (ret) {
 2690                         printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
 2691                         goto err_unlock;
 2692                 }
 2693         }
 2694 
 2695         ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
 2696                                  SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
 2697                                  q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
 2698                                  1, 0);
 2699         if (ret) {
 2700                 printf("error %d from t3_sge_init_ecntxt\n", ret);
 2701                 goto err_unlock;
 2702         }
 2703 
 2704         if (ntxq > 1) {
 2705                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
 2706                                          USE_GTS, SGE_CNTXT_OFLD, id,
 2707                                          q->txq[TXQ_OFLD].phys_addr,
 2708                                          q->txq[TXQ_OFLD].size, 0, 1, 0);
 2709                 if (ret) {
 2710                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2711                         goto err_unlock;
 2712                 }
 2713         }
 2714 
 2715         if (ntxq > 2) {
 2716                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
 2717                                          SGE_CNTXT_CTRL, id,
 2718                                          q->txq[TXQ_CTRL].phys_addr,
 2719                                          q->txq[TXQ_CTRL].size,
 2720                                          q->txq[TXQ_CTRL].token, 1, 0);
 2721                 if (ret) {
 2722                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2723                         goto err_unlock;
 2724                 }
 2725         }
 2726         
 2727         snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
 2728             device_get_unit(sc->dev), irq_vec_idx);
 2729         MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
 2730         
 2731         mtx_unlock_spin(&sc->sge.reg_lock);
 2732         t3_update_qset_coalesce(q, p);
 2733         q->port = pi;
 2734         
 2735         refill_fl(sc, &q->fl[0], q->fl[0].size);
 2736         refill_fl(sc, &q->fl[1], q->fl[1].size);
 2737         refill_rspq(sc, &q->rspq, q->rspq.size - 1);
 2738 
 2739         t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
 2740                      V_NEWTIMER(q->rspq.holdoff_tmr));
 2741 
 2742         return (0);
 2743 
 2744 err_unlock:
 2745         mtx_unlock_spin(&sc->sge.reg_lock);
 2746 err:    
 2747         TXQ_LOCK(q);
 2748         t3_free_qset(sc, q);
 2749 
 2750         return (ret);
 2751 }
 2752 
 2753 /*
 2754  * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
 2755  * ethernet data.  Hardware assistance with various checksums and any vlan tag
 2756  * will also be taken into account here.
 2757  */
 2758 void
 2759 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
 2760 {
 2761         struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
 2762         struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
 2763         struct ifnet *ifp = pi->ifp;
 2764         
 2765         DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
 2766 
 2767         if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
 2768             cpl->csum_valid && cpl->csum == 0xffff) {
 2769                 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
 2770                 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
 2771                 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
 2772                 m->m_pkthdr.csum_data = 0xffff;
 2773         }
 2774 
 2775         if (cpl->vlan_valid) {
 2776                 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
 2777                 m->m_flags |= M_VLANTAG;
 2778         } 
 2779 
 2780         m->m_pkthdr.rcvif = ifp;
 2781         m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
 2782         /*
 2783          * adjust after conversion to mbuf chain
 2784          */
 2785         m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
 2786         m->m_len -= (sizeof(*cpl) + ethpad);
 2787         m->m_data += (sizeof(*cpl) + ethpad);
 2788 }
 2789 
 2790 /**
 2791  *      get_packet - return the next ingress packet buffer from a free list
 2792  *      @adap: the adapter that received the packet
 2793  *      @drop_thres: # of remaining buffers before we start dropping packets
 2794  *      @qs: the qset that the SGE free list holding the packet belongs to
 2795  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
 2796  *      @r: response descriptor 
 2797  *
 2798  *      Get the next packet from a free list and complete setup of the
 2799  *      sk_buff.  If the packet is small we make a copy and recycle the
 2800  *      original buffer, otherwise we use the original buffer itself.  If a
 2801  *      positive drop threshold is supplied packets are dropped and their
 2802  *      buffers recycled if (a) the number of remaining buffers is under the
 2803  *      threshold and the packet is too big to copy, or (b) the packet should
 2804  *      be copied but there is no memory for the copy.
 2805  */
 2806 static int
 2807 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
 2808     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
 2809 {
 2810 
 2811         unsigned int len_cq =  ntohl(r->len_cq);
 2812         struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
 2813         int mask, cidx = fl->cidx;
 2814         struct rx_sw_desc *sd = &fl->sdesc[cidx];
 2815         uint32_t len = G_RSPD_LEN(len_cq);
 2816         uint32_t flags = M_EXT;
 2817         uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
 2818         caddr_t cl;
 2819         struct mbuf *m;
 2820         int ret = 0;
 2821 
 2822         mask = fl->size - 1;
 2823         prefetch(fl->sdesc[(cidx + 1) & mask].m);
 2824         prefetch(fl->sdesc[(cidx + 2) & mask].m);
 2825         prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
 2826         prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 
 2827 
 2828         fl->credits--;
 2829         bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
 2830         
 2831         if (recycle_enable && len <= SGE_RX_COPY_THRES &&
 2832             sopeop == RSPQ_SOP_EOP) {
 2833                 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
 2834                         goto skip_recycle;
 2835                 cl = mtod(m, void *);
 2836                 memcpy(cl, sd->rxsd_cl, len);
 2837                 recycle_rx_buf(adap, fl, fl->cidx);
 2838                 m->m_pkthdr.len = m->m_len = len;
 2839                 m->m_flags = 0;
 2840                 mh->mh_head = mh->mh_tail = m;
 2841                 ret = 1;
 2842                 goto done;
 2843         } else {
 2844         skip_recycle:
 2845                 bus_dmamap_unload(fl->entry_tag, sd->map);
 2846                 cl = sd->rxsd_cl;
 2847                 m = sd->m;
 2848 
 2849                 if ((sopeop == RSPQ_SOP_EOP) ||
 2850                     (sopeop == RSPQ_SOP))
 2851                         flags |= M_PKTHDR;
 2852                 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags);
 2853                 if (fl->zone == zone_pack) {
 2854                         /*
 2855                          * restore clobbered data pointer
 2856                          */
 2857                         m->m_data = m->m_ext.ext_buf;
 2858                 } else {
 2859                         m_cljset(m, cl, fl->type);
 2860                 }
 2861                 m->m_len = len;
 2862         }               
 2863         switch(sopeop) {
 2864         case RSPQ_SOP_EOP:
 2865                 ret = 1;
 2866                 /* FALLTHROUGH */
 2867         case RSPQ_SOP:
 2868                 mh->mh_head = mh->mh_tail = m;
 2869                 m->m_pkthdr.len = len;
 2870                 break;
 2871         case RSPQ_EOP:
 2872                 ret = 1;
 2873                 /* FALLTHROUGH */
 2874         case RSPQ_NSOP_NEOP:
 2875                 if (mh->mh_tail == NULL) {
 2876                         log(LOG_ERR, "discarding intermediate descriptor entry\n");
 2877                         m_freem(m);
 2878                         break;
 2879                 }
 2880                 mh->mh_tail->m_next = m;
 2881                 mh->mh_tail = m;
 2882                 mh->mh_head->m_pkthdr.len += len;
 2883                 break;
 2884         }
 2885         if (cxgb_debug)
 2886                 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
 2887 done:
 2888         if (++fl->cidx == fl->size)
 2889                 fl->cidx = 0;
 2890 
 2891         return (ret);
 2892 }
 2893 
 2894 /**
 2895  *      handle_rsp_cntrl_info - handles control information in a response
 2896  *      @qs: the queue set corresponding to the response
 2897  *      @flags: the response control flags
 2898  *
 2899  *      Handles the control information of an SGE response, such as GTS
 2900  *      indications and completion credits for the queue set's Tx queues.
 2901  *      HW coalesces credits, we don't do any extra SW coalescing.
 2902  */
 2903 static __inline void
 2904 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
 2905 {
 2906         unsigned int credits;
 2907 
 2908 #if USE_GTS
 2909         if (flags & F_RSPD_TXQ0_GTS)
 2910                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
 2911 #endif
 2912         credits = G_RSPD_TXQ0_CR(flags);
 2913         if (credits) 
 2914                 qs->txq[TXQ_ETH].processed += credits;
 2915 
 2916         credits = G_RSPD_TXQ2_CR(flags);
 2917         if (credits)
 2918                 qs->txq[TXQ_CTRL].processed += credits;
 2919 
 2920 # if USE_GTS
 2921         if (flags & F_RSPD_TXQ1_GTS)
 2922                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
 2923 # endif
 2924         credits = G_RSPD_TXQ1_CR(flags);
 2925         if (credits)
 2926                 qs->txq[TXQ_OFLD].processed += credits;
 2927 
 2928 }
 2929 
 2930 static void
 2931 check_ring_db(adapter_t *adap, struct sge_qset *qs,
 2932     unsigned int sleeping)
 2933 {
 2934         ;
 2935 }
 2936 
 2937 /**
 2938  *      process_responses - process responses from an SGE response queue
 2939  *      @adap: the adapter
 2940  *      @qs: the queue set to which the response queue belongs
 2941  *      @budget: how many responses can be processed in this round
 2942  *
 2943  *      Process responses from an SGE response queue up to the supplied budget.
 2944  *      Responses include received packets as well as credits and other events
 2945  *      for the queues that belong to the response queue's queue set.
 2946  *      A negative budget is effectively unlimited.
 2947  *
 2948  *      Additionally choose the interrupt holdoff time for the next interrupt
 2949  *      on this queue.  If the system is under memory shortage use a fairly
 2950  *      long delay to help recovery.
 2951  */
 2952 static int
 2953 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
 2954 {
 2955         struct sge_rspq *rspq = &qs->rspq;
 2956         struct rsp_desc *r = &rspq->desc[rspq->cidx];
 2957         int budget_left = budget;
 2958         unsigned int sleeping = 0;
 2959         int lro_enabled = qs->lro.enabled;
 2960         int skip_lro;
 2961         struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
 2962         struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
 2963         int ngathered = 0;
 2964 #ifdef DEBUG    
 2965         static int last_holdoff = 0;
 2966         if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
 2967                 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
 2968                 last_holdoff = rspq->holdoff_tmr;
 2969         }
 2970 #endif
 2971         rspq->next_holdoff = rspq->holdoff_tmr;
 2972 
 2973         while (__predict_true(budget_left && is_new_response(r, rspq))) {
 2974                 int eth, eop = 0, ethpad = 0;
 2975                 uint32_t flags = ntohl(r->flags);
 2976                 uint32_t rss_csum = *(const uint32_t *)r;
 2977                 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
 2978                 
 2979                 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
 2980                 
 2981                 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
 2982                         struct mbuf *m;
 2983 
 2984                         if (cxgb_debug)
 2985                                 printf("async notification\n");
 2986 
 2987                         if (rspq->rspq_mh.mh_head == NULL) {
 2988                                 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
 2989                                 m = rspq->rspq_mh.mh_head;
 2990                         } else {
 2991                                 m = m_gethdr(M_DONTWAIT, MT_DATA);
 2992                         }
 2993                         if (m == NULL)
 2994                                 goto no_mem;
 2995 
 2996                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
 2997                         m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
 2998                         *mtod(m, char *) = CPL_ASYNC_NOTIF;
 2999                         rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
 3000                         eop = 1;
 3001                         rspq->async_notif++;
 3002                         goto skip;
 3003                 } else if  (flags & F_RSPD_IMM_DATA_VALID) {
 3004                         struct mbuf *m = NULL;
 3005 
 3006                         DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
 3007                             r->rss_hdr.opcode, rspq->cidx);
 3008                         if (rspq->rspq_mh.mh_head == NULL)
 3009                                 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
 3010                         else 
 3011                                 m = m_gethdr(M_DONTWAIT, MT_DATA);
 3012 
 3013                         if (rspq->rspq_mh.mh_head == NULL &&  m == NULL) {      
 3014                 no_mem:
 3015                                 rspq->next_holdoff = NOMEM_INTR_DELAY;
 3016                                 budget_left--;
 3017                                 break;
 3018                         }
 3019                         get_imm_packet(adap, r, rspq->rspq_mh.mh_head);
 3020                         eop = 1;
 3021                         rspq->imm_data++;
 3022                 } else if (r->len_cq) {
 3023                         int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
 3024                         
 3025                         eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
 3026                         if (eop) {
 3027                                 rspq->rspq_mh.mh_head->m_flags |= M_FLOWID;
 3028                                 rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash;
 3029                         }
 3030                         
 3031                         ethpad = 2;
 3032                 } else {
 3033                         rspq->pure_rsps++;
 3034                 }
 3035         skip:
 3036                 if (flags & RSPD_CTRL_MASK) {
 3037                         sleeping |= flags & RSPD_GTS_MASK;
 3038                         handle_rsp_cntrl_info(qs, flags);
 3039                 }
 3040 
 3041                 r++;
 3042                 if (__predict_false(++rspq->cidx == rspq->size)) {
 3043                         rspq->cidx = 0;
 3044                         rspq->gen ^= 1;
 3045                         r = rspq->desc;
 3046                 }
 3047 
 3048                 if (++rspq->credits >= 64) {
 3049                         refill_rspq(adap, rspq, rspq->credits);
 3050                         rspq->credits = 0;
 3051                 }
 3052                 if (!eth && eop) {
 3053                         rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
 3054                         /*
 3055                          * XXX size mismatch
 3056                          */
 3057                         m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
 3058 
 3059                         
 3060                         ngathered = rx_offload(&adap->tdev, rspq,
 3061                             rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
 3062                         rspq->rspq_mh.mh_head = NULL;
 3063                         DPRINTF("received offload packet\n");
 3064                         
 3065                 } else if (eth && eop) {
 3066                         struct mbuf *m = rspq->rspq_mh.mh_head;
 3067 
 3068                         t3_rx_eth(adap, rspq, m, ethpad);
 3069 
 3070                         /*
 3071                          * The T304 sends incoming packets on any qset.  If LRO
 3072                          * is also enabled, we could end up sending packet up
 3073                          * lro_ctrl->ifp's input.  That is incorrect.
 3074                          *
 3075                          * The mbuf's rcvif was derived from the cpl header and
 3076                          * is accurate.  Skip LRO and just use that.
 3077                          */
 3078                         skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
 3079 
 3080                         if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
 3081 #ifdef INET
 3082                             && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
 3083 #endif
 3084                             ) {
 3085                                 /* successfully queue'd for LRO */
 3086                         } else {
 3087                                 /*
 3088                                  * LRO not enabled, packet unsuitable for LRO,
 3089                                  * or unable to queue.  Pass it up right now in
 3090                                  * either case.
 3091                                  */
 3092                                 struct ifnet *ifp = m->m_pkthdr.rcvif;
 3093                                 (*ifp->if_input)(ifp, m);
 3094                         }
 3095                         rspq->rspq_mh.mh_head = NULL;
 3096 
 3097                 }
 3098                 __refill_fl_lt(adap, &qs->fl[0], 32);
 3099                 __refill_fl_lt(adap, &qs->fl[1], 32);
 3100                 --budget_left;
 3101         }
 3102 
 3103         deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
 3104 
 3105 #ifdef INET
 3106         /* Flush LRO */
 3107         while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
 3108                 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
 3109                 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
 3110                 tcp_lro_flush(lro_ctrl, queued);
 3111         }
 3112 #endif
 3113 
 3114         if (sleeping)
 3115                 check_ring_db(adap, qs, sleeping);
 3116 
 3117         mb();  /* commit Tx queue processed updates */
 3118         if (__predict_false(qs->txq_stopped > 1))
 3119                 restart_tx(qs);
 3120 
 3121         __refill_fl_lt(adap, &qs->fl[0], 512);
 3122         __refill_fl_lt(adap, &qs->fl[1], 512);
 3123         budget -= budget_left;
 3124         return (budget);
 3125 }
 3126 
 3127 /*
 3128  * A helper function that processes responses and issues GTS.
 3129  */
 3130 static __inline int
 3131 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
 3132 {
 3133         int work;
 3134         static int last_holdoff = 0;
 3135         
 3136         work = process_responses(adap, rspq_to_qset(rq), -1);
 3137 
 3138         if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
 3139                 printf("next_holdoff=%d\n", rq->next_holdoff);
 3140                 last_holdoff = rq->next_holdoff;
 3141         }
 3142         t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
 3143             V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
 3144         
 3145         return (work);
 3146 }
 3147 
 3148 
 3149 /*
 3150  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
 3151  * Handles data events from SGE response queues as well as error and other
 3152  * async events as they all use the same interrupt pin.  We use one SGE
 3153  * response queue per port in this mode and protect all response queues with
 3154  * queue 0's lock.
 3155  */
 3156 void
 3157 t3b_intr(void *data)
 3158 {
 3159         uint32_t i, map;
 3160         adapter_t *adap = data;
 3161         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3162         
 3163         t3_write_reg(adap, A_PL_CLI, 0);
 3164         map = t3_read_reg(adap, A_SG_DATA_INTR);
 3165 
 3166         if (!map) 
 3167                 return;
 3168 
 3169         if (__predict_false(map & F_ERRINTR))
 3170                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3171 
 3172         mtx_lock(&q0->lock);
 3173         for_each_port(adap, i)
 3174             if (map & (1 << i))
 3175                         process_responses_gts(adap, &adap->sge.qs[i].rspq);
 3176         mtx_unlock(&q0->lock);
 3177 }
 3178 
 3179 /*
 3180  * The MSI interrupt handler.  This needs to handle data events from SGE
 3181  * response queues as well as error and other async events as they all use
 3182  * the same MSI vector.  We use one SGE response queue per port in this mode
 3183  * and protect all response queues with queue 0's lock.
 3184  */
 3185 void
 3186 t3_intr_msi(void *data)
 3187 {
 3188         adapter_t *adap = data;
 3189         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3190         int i, new_packets = 0;
 3191 
 3192         mtx_lock(&q0->lock);
 3193 
 3194         for_each_port(adap, i)
 3195             if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 
 3196                     new_packets = 1;
 3197         mtx_unlock(&q0->lock);
 3198         if (new_packets == 0)
 3199                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3200 }
 3201 
 3202 void
 3203 t3_intr_msix(void *data)
 3204 {
 3205         struct sge_qset *qs = data;
 3206         adapter_t *adap = qs->port->adapter;
 3207         struct sge_rspq *rspq = &qs->rspq;
 3208 
 3209         if (process_responses_gts(adap, rspq) == 0)
 3210                 rspq->unhandled_irqs++;
 3211 }
 3212 
 3213 #define QDUMP_SBUF_SIZE         32 * 400
 3214 static int
 3215 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
 3216 {
 3217         struct sge_rspq *rspq;
 3218         struct sge_qset *qs;
 3219         int i, err, dump_end, idx;
 3220         static int multiplier = 1;
 3221         struct sbuf *sb;
 3222         struct rsp_desc *rspd;
 3223         uint32_t data[4];
 3224         
 3225         rspq = arg1;
 3226         qs = rspq_to_qset(rspq);
 3227         if (rspq->rspq_dump_count == 0) 
 3228                 return (0);
 3229         if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
 3230                 log(LOG_WARNING,
 3231                     "dump count is too large %d\n", rspq->rspq_dump_count);
 3232                 rspq->rspq_dump_count = 0;
 3233                 return (EINVAL);
 3234         }
 3235         if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
 3236                 log(LOG_WARNING,
 3237                     "dump start of %d is greater than queue size\n",
 3238                     rspq->rspq_dump_start);
 3239                 rspq->rspq_dump_start = 0;
 3240                 return (EINVAL);
 3241         }
 3242         err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
 3243         if (err)
 3244                 return (err);
 3245 retry_sbufops:
 3246         sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
 3247 
 3248         sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
 3249             (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
 3250             ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
 3251         sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
 3252             ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
 3253         
 3254         sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
 3255             (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
 3256         
 3257         dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
 3258         for (i = rspq->rspq_dump_start; i < dump_end; i++) {
 3259                 idx = i & (RSPQ_Q_SIZE-1);
 3260                 
 3261                 rspd = &rspq->desc[idx];
 3262                 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
 3263                     idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
 3264                     rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
 3265                 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
 3266                     rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
 3267                     be32toh(rspd->len_cq), rspd->intr_gen);
 3268         }
 3269         if (sbuf_overflowed(sb)) {
 3270                 sbuf_delete(sb);
 3271                 multiplier++;
 3272                 goto retry_sbufops;
 3273         }
 3274         sbuf_finish(sb);
 3275         err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
 3276         sbuf_delete(sb);
 3277         return (err);
 3278 }       
 3279 
 3280 static int
 3281 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
 3282 {
 3283         struct sge_txq *txq;
 3284         struct sge_qset *qs;
 3285         int i, j, err, dump_end;
 3286         static int multiplier = 1;
 3287         struct sbuf *sb;
 3288         struct tx_desc *txd;
 3289         uint32_t *WR, wr_hi, wr_lo, gen;
 3290         uint32_t data[4];
 3291         
 3292         txq = arg1;
 3293         qs = txq_to_qset(txq, TXQ_ETH);
 3294         if (txq->txq_dump_count == 0) {
 3295                 return (0);
 3296         }
 3297         if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
 3298                 log(LOG_WARNING,
 3299                     "dump count is too large %d\n", txq->txq_dump_count);
 3300                 txq->txq_dump_count = 1;
 3301                 return (EINVAL);
 3302         }
 3303         if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
 3304                 log(LOG_WARNING,
 3305                     "dump start of %d is greater than queue size\n",
 3306                     txq->txq_dump_start);
 3307                 txq->txq_dump_start = 0;
 3308                 return (EINVAL);
 3309         }
 3310         err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
 3311         if (err)
 3312                 return (err);
 3313         
 3314             
 3315 retry_sbufops:
 3316         sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
 3317 
 3318         sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
 3319             (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 
 3320             (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
 3321         sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
 3322             ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
 3323             ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
 3324         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3325             txq->txq_dump_start,
 3326             (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
 3327 
 3328         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3329         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3330                 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
 3331                 WR = (uint32_t *)txd->flit;
 3332                 wr_hi = ntohl(WR[0]);
 3333                 wr_lo = ntohl(WR[1]);           
 3334                 gen = G_WR_GEN(wr_lo);
 3335                 
 3336                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3337                     wr_hi, wr_lo, gen);
 3338                 for (j = 2; j < 30; j += 4) 
 3339                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3340                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3341 
 3342         }
 3343         if (sbuf_overflowed(sb)) {
 3344                 sbuf_delete(sb);
 3345                 multiplier++;
 3346                 goto retry_sbufops;
 3347         }
 3348         sbuf_finish(sb);
 3349         err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
 3350         sbuf_delete(sb);
 3351         return (err);
 3352 }
 3353 
 3354 static int
 3355 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
 3356 {
 3357         struct sge_txq *txq;
 3358         struct sge_qset *qs;
 3359         int i, j, err, dump_end;
 3360         static int multiplier = 1;
 3361         struct sbuf *sb;
 3362         struct tx_desc *txd;
 3363         uint32_t *WR, wr_hi, wr_lo, gen;
 3364         
 3365         txq = arg1;
 3366         qs = txq_to_qset(txq, TXQ_CTRL);
 3367         if (txq->txq_dump_count == 0) {
 3368                 return (0);
 3369         }
 3370         if (txq->txq_dump_count > 256) {
 3371                 log(LOG_WARNING,
 3372                     "dump count is too large %d\n", txq->txq_dump_count);
 3373                 txq->txq_dump_count = 1;
 3374                 return (EINVAL);
 3375         }
 3376         if (txq->txq_dump_start > 255) {
 3377                 log(LOG_WARNING,
 3378                     "dump start of %d is greater than queue size\n",
 3379                     txq->txq_dump_start);
 3380                 txq->txq_dump_start = 0;
 3381                 return (EINVAL);
 3382         }
 3383 
 3384 retry_sbufops:
 3385         sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
 3386         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3387             txq->txq_dump_start,
 3388             (txq->txq_dump_start + txq->txq_dump_count) & 255);
 3389 
 3390         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3391         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3392                 txd = &txq->desc[i & (255)];
 3393                 WR = (uint32_t *)txd->flit;
 3394                 wr_hi = ntohl(WR[0]);
 3395                 wr_lo = ntohl(WR[1]);           
 3396                 gen = G_WR_GEN(wr_lo);
 3397                 
 3398                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3399                     wr_hi, wr_lo, gen);
 3400                 for (j = 2; j < 30; j += 4) 
 3401                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3402                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3403 
 3404         }
 3405         if (sbuf_overflowed(sb)) {
 3406                 sbuf_delete(sb);
 3407                 multiplier++;
 3408                 goto retry_sbufops;
 3409         }
 3410         sbuf_finish(sb);
 3411         err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
 3412         sbuf_delete(sb);
 3413         return (err);
 3414 }
 3415 
 3416 static int
 3417 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
 3418 {
 3419         adapter_t *sc = arg1;
 3420         struct qset_params *qsp = &sc->params.sge.qset[0]; 
 3421         int coalesce_usecs;     
 3422         struct sge_qset *qs;
 3423         int i, j, err, nqsets = 0;
 3424         struct mtx *lock;
 3425 
 3426         if ((sc->flags & FULL_INIT_DONE) == 0)
 3427                 return (ENXIO);
 3428                 
 3429         coalesce_usecs = qsp->coalesce_usecs;
 3430         err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
 3431 
 3432         if (err != 0) {
 3433                 return (err);
 3434         }
 3435         if (coalesce_usecs == qsp->coalesce_usecs)
 3436                 return (0);
 3437 
 3438         for (i = 0; i < sc->params.nports; i++) 
 3439                 for (j = 0; j < sc->port[i].nqsets; j++)
 3440                         nqsets++;
 3441 
 3442         coalesce_usecs = max(1, coalesce_usecs);
 3443 
 3444         for (i = 0; i < nqsets; i++) {
 3445                 qs = &sc->sge.qs[i];
 3446                 qsp = &sc->params.sge.qset[i];
 3447                 qsp->coalesce_usecs = coalesce_usecs;
 3448                 
 3449                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 3450                             &sc->sge.qs[0].rspq.lock;
 3451 
 3452                 mtx_lock(lock);
 3453                 t3_update_qset_coalesce(qs, qsp);
 3454                 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
 3455                     V_NEWTIMER(qs->rspq.holdoff_tmr));
 3456                 mtx_unlock(lock);
 3457         }
 3458 
 3459         return (0);
 3460 }
 3461 
 3462 
 3463 void
 3464 t3_add_attach_sysctls(adapter_t *sc)
 3465 {
 3466         struct sysctl_ctx_list *ctx;
 3467         struct sysctl_oid_list *children;
 3468 
 3469         ctx = device_get_sysctl_ctx(sc->dev);
 3470         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3471 
 3472         /* random information */
 3473         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3474             "firmware_version",
 3475             CTLFLAG_RD, &sc->fw_version,
 3476             0, "firmware version");
 3477         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3478             "hw_revision",
 3479             CTLFLAG_RD, &sc->params.rev,
 3480             0, "chip model");
 3481         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3482             "port_types",
 3483             CTLFLAG_RD, &sc->port_types,
 3484             0, "type of ports");
 3485         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3486             "enable_debug",
 3487             CTLFLAG_RW, &cxgb_debug,
 3488             0, "enable verbose debugging output");
 3489         SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce",
 3490             CTLFLAG_RD, &sc->tunq_coalesce,
 3491             "#tunneled packets freed");
 3492         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3493             "txq_overrun",
 3494             CTLFLAG_RD, &txq_fills,
 3495             0, "#times txq overrun");
 3496 }
 3497 
 3498 
 3499 static const char *rspq_name = "rspq";
 3500 static const char *txq_names[] =
 3501 {
 3502         "txq_eth",
 3503         "txq_ofld",
 3504         "txq_ctrl"      
 3505 };
 3506 
 3507 static int
 3508 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
 3509 {
 3510         struct port_info *p = arg1;
 3511         uint64_t *parg;
 3512 
 3513         if (!p)
 3514                 return (EINVAL);
 3515 
 3516         parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
 3517         PORT_LOCK(p);
 3518         t3_mac_update_stats(&p->mac);
 3519         PORT_UNLOCK(p);
 3520 
 3521         return (sysctl_handle_quad(oidp, parg, 0, req));
 3522 }
 3523 
 3524 void
 3525 t3_add_configured_sysctls(adapter_t *sc)
 3526 {
 3527         struct sysctl_ctx_list *ctx;
 3528         struct sysctl_oid_list *children;
 3529         int i, j;
 3530         
 3531         ctx = device_get_sysctl_ctx(sc->dev);
 3532         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3533 
 3534         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3535             "intr_coal",
 3536             CTLTYPE_INT|CTLFLAG_RW, sc,
 3537             0, t3_set_coalesce_usecs,
 3538             "I", "interrupt coalescing timer (us)");
 3539 
 3540         for (i = 0; i < sc->params.nports; i++) {
 3541                 struct port_info *pi = &sc->port[i];
 3542                 struct sysctl_oid *poid;
 3543                 struct sysctl_oid_list *poidlist;
 3544                 struct mac_stats *mstats = &pi->mac.stats;
 3545                 
 3546                 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
 3547                 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 
 3548                     pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
 3549                 poidlist = SYSCTL_CHILDREN(poid);
 3550                 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO, 
 3551                     "nqsets", CTLFLAG_RD, &pi->nqsets,
 3552                     0, "#queue sets");
 3553 
 3554                 for (j = 0; j < pi->nqsets; j++) {
 3555                         struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
 3556                         struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
 3557                                           *ctrlqpoid, *lropoid;
 3558                         struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
 3559                                                *txqpoidlist, *ctrlqpoidlist,
 3560                                                *lropoidlist;
 3561                         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 3562                         
 3563                         snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
 3564                         
 3565                         qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 
 3566                             qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
 3567                         qspoidlist = SYSCTL_CHILDREN(qspoid);
 3568 
 3569                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
 3570                                         CTLFLAG_RD, &qs->fl[0].empty, 0,
 3571                                         "freelist #0 empty");
 3572                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
 3573                                         CTLFLAG_RD, &qs->fl[1].empty, 0,
 3574                                         "freelist #1 empty");
 3575 
 3576                         rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3577                             rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
 3578                         rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
 3579 
 3580                         txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3581                             txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
 3582                         txqpoidlist = SYSCTL_CHILDREN(txqpoid);
 3583 
 3584                         ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3585                             txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
 3586                         ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
 3587 
 3588                         lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3589                             "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
 3590                         lropoidlist = SYSCTL_CHILDREN(lropoid);
 3591 
 3592                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
 3593                             CTLFLAG_RD, &qs->rspq.size,
 3594                             0, "#entries in response queue");
 3595                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
 3596                             CTLFLAG_RD, &qs->rspq.cidx,
 3597                             0, "consumer index");
 3598                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
 3599                             CTLFLAG_RD, &qs->rspq.credits,
 3600                             0, "#credits");
 3601                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
 3602                             CTLFLAG_RD, &qs->rspq.starved,
 3603                             0, "#times starved");
 3604                         SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
 3605                             CTLFLAG_RD, &qs->rspq.phys_addr,
 3606                             "physical_address_of the queue");
 3607                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
 3608                             CTLFLAG_RW, &qs->rspq.rspq_dump_start,
 3609                             0, "start rspq dump entry");
 3610                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
 3611                             CTLFLAG_RW, &qs->rspq.rspq_dump_count,
 3612                             0, "#rspq entries to dump");
 3613                         SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
 3614                             CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
 3615                             0, t3_dump_rspq, "A", "dump of the response queue");
 3616 
 3617                         SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
 3618                             CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
 3619                             "#tunneled packets dropped");
 3620                         SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
 3621                             CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
 3622                             0, "#tunneled packets waiting to be sent");
 3623 #if 0                   
 3624                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
 3625                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
 3626                             0, "#tunneled packets queue producer index");
 3627                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
 3628                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
 3629                             0, "#tunneled packets queue consumer index");
 3630 #endif                  
 3631                         SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
 3632                             CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
 3633                             0, "#tunneled packets processed by the card");
 3634                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
 3635                             CTLFLAG_RD, &txq->cleaned,
 3636                             0, "#tunneled packets cleaned");
 3637                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
 3638                             CTLFLAG_RD, &txq->in_use,
 3639                             0, "#tunneled packet slots in use");
 3640                         SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
 3641                             CTLFLAG_RD, &txq->txq_frees,
 3642                             "#tunneled packets freed");
 3643                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
 3644                             CTLFLAG_RD, &txq->txq_skipped,
 3645                             0, "#tunneled packet descriptors skipped");
 3646                         SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
 3647                             CTLFLAG_RD, &txq->txq_coalesced,
 3648                             "#tunneled packets coalesced");
 3649                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
 3650                             CTLFLAG_RD, &txq->txq_enqueued,
 3651                             0, "#tunneled packets enqueued to hardware");
 3652                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
 3653                             CTLFLAG_RD, &qs->txq_stopped,
 3654                             0, "tx queues stopped");
 3655                         SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
 3656                             CTLFLAG_RD, &txq->phys_addr,
 3657                             "physical_address_of the queue");
 3658                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
 3659                             CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
 3660                             0, "txq generation");
 3661                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
 3662                             CTLFLAG_RD, &txq->cidx,
 3663                             0, "hardware queue cidx");                  
 3664                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
 3665                             CTLFLAG_RD, &txq->pidx,
 3666                             0, "hardware queue pidx");
 3667                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
 3668                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
 3669                             0, "txq start idx for dump");
 3670                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
 3671                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
 3672                             0, "txq #entries to dump");                 
 3673                         SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
 3674                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
 3675                             0, t3_dump_txq_eth, "A", "dump of the transmit queue");
 3676 
 3677                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
 3678                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
 3679                             0, "ctrlq start idx for dump");
 3680                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
 3681                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
 3682                             0, "ctrl #entries to dump");                        
 3683                         SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
 3684                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
 3685                             0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
 3686 
 3687                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
 3688                             CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
 3689                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
 3690                             CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
 3691                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
 3692                             CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
 3693                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
 3694                             CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
 3695                 }
 3696 
 3697                 /* Now add a node for mac stats. */
 3698                 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
 3699                     CTLFLAG_RD, NULL, "MAC statistics");
 3700                 poidlist = SYSCTL_CHILDREN(poid);
 3701 
 3702                 /*
 3703                  * We (ab)use the length argument (arg2) to pass on the offset
 3704                  * of the data that we are interested in.  This is only required
 3705                  * for the quad counters that are updated from the hardware (we
 3706                  * make sure that we return the latest value).
 3707                  * sysctl_handle_macstat first updates *all* the counters from
 3708                  * the hardware, and then returns the latest value of the
 3709                  * requested counter.  Best would be to update only the
 3710                  * requested counter from hardware, but t3_mac_update_stats()
 3711                  * hides all the register details and we don't want to dive into
 3712                  * all that here.
 3713                  */
 3714 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
 3715     (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
 3716     sysctl_handle_macstat, "QU", 0)
 3717                 CXGB_SYSCTL_ADD_QUAD(tx_octets);
 3718                 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
 3719                 CXGB_SYSCTL_ADD_QUAD(tx_frames);
 3720                 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
 3721                 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
 3722                 CXGB_SYSCTL_ADD_QUAD(tx_pause);
 3723                 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
 3724                 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
 3725                 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
 3726                 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
 3727                 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
 3728                 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
 3729                 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
 3730                 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
 3731                 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
 3732                 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
 3733                 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
 3734                 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
 3735                 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
 3736                 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
 3737                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
 3738                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
 3739                 CXGB_SYSCTL_ADD_QUAD(rx_octets);
 3740                 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
 3741                 CXGB_SYSCTL_ADD_QUAD(rx_frames);
 3742                 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
 3743                 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
 3744                 CXGB_SYSCTL_ADD_QUAD(rx_pause);
 3745                 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
 3746                 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
 3747                 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
 3748                 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
 3749                 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
 3750                 CXGB_SYSCTL_ADD_QUAD(rx_runt);
 3751                 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
 3752                 CXGB_SYSCTL_ADD_QUAD(rx_short);
 3753                 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
 3754                 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
 3755                 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
 3756                 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
 3757                 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
 3758                 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
 3759                 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
 3760                 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
 3761                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
 3762                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
 3763 #undef CXGB_SYSCTL_ADD_QUAD
 3764 
 3765 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
 3766     CTLFLAG_RD, &mstats->a, 0)
 3767                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
 3768                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
 3769                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
 3770                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
 3771                 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
 3772                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
 3773                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
 3774                 CXGB_SYSCTL_ADD_ULONG(num_toggled);
 3775                 CXGB_SYSCTL_ADD_ULONG(num_resets);
 3776                 CXGB_SYSCTL_ADD_ULONG(link_faults);
 3777 #undef CXGB_SYSCTL_ADD_ULONG
 3778         }
 3779 }
 3780         
 3781 /**
 3782  *      t3_get_desc - dump an SGE descriptor for debugging purposes
 3783  *      @qs: the queue set
 3784  *      @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
 3785  *      @idx: the descriptor index in the queue
 3786  *      @data: where to dump the descriptor contents
 3787  *
 3788  *      Dumps the contents of a HW descriptor of an SGE queue.  Returns the
 3789  *      size of the descriptor.
 3790  */
 3791 int
 3792 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
 3793                 unsigned char *data)
 3794 {
 3795         if (qnum >= 6)
 3796                 return (EINVAL);
 3797 
 3798         if (qnum < 3) {
 3799                 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
 3800                         return -EINVAL;
 3801                 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
 3802                 return sizeof(struct tx_desc);
 3803         }
 3804 
 3805         if (qnum == 3) {
 3806                 if (!qs->rspq.desc || idx >= qs->rspq.size)
 3807                         return (EINVAL);
 3808                 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
 3809                 return sizeof(struct rsp_desc);
 3810         }
 3811 
 3812         qnum -= 4;
 3813         if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
 3814                 return (EINVAL);
 3815         memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
 3816         return sizeof(struct rx_desc);
 3817 }

Cache object: ea31946d7fa2fdd1d3c5de36487c47bb


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.