The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/cxgb/cxgb_sge.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /**************************************************************************
    2 
    3 Copyright (c) 2007-2009, Chelsio Inc.
    4 All rights reserved.
    5 
    6 Redistribution and use in source and binary forms, with or without
    7 modification, are permitted provided that the following conditions are met:
    8 
    9  1. Redistributions of source code must retain the above copyright notice,
   10     this list of conditions and the following disclaimer.
   11 
   12  2. Neither the name of the Chelsio Corporation nor the names of its
   13     contributors may be used to endorse or promote products derived from
   14     this software without specific prior written permission.
   15  
   16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   26 POSSIBILITY OF SUCH DAMAGE.
   27 
   28 ***************************************************************************/
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD: releng/8.2/sys/dev/cxgb/cxgb_sge.c 211848 2010-08-26 19:55:03Z np $");
   32 
   33 #include "opt_inet.h"
   34 
   35 #include <sys/param.h>
   36 #include <sys/systm.h>
   37 #include <sys/kernel.h>
   38 #include <sys/module.h>
   39 #include <sys/bus.h>
   40 #include <sys/conf.h>
   41 #include <machine/bus.h>
   42 #include <machine/resource.h>
   43 #include <sys/bus_dma.h>
   44 #include <sys/rman.h>
   45 #include <sys/queue.h>
   46 #include <sys/sysctl.h>
   47 #include <sys/taskqueue.h>
   48 
   49 #include <sys/proc.h>
   50 #include <sys/sbuf.h>
   51 #include <sys/sched.h>
   52 #include <sys/smp.h>
   53 #include <sys/systm.h>
   54 #include <sys/syslog.h>
   55 #include <sys/socket.h>
   56 
   57 #include <net/bpf.h>    
   58 #include <net/ethernet.h>
   59 #include <net/if.h>
   60 #include <net/if_vlan_var.h>
   61 
   62 #include <netinet/in_systm.h>
   63 #include <netinet/in.h>
   64 #include <netinet/ip.h>
   65 #include <netinet/tcp.h>
   66 
   67 #include <dev/pci/pcireg.h>
   68 #include <dev/pci/pcivar.h>
   69 
   70 #include <vm/vm.h>
   71 #include <vm/pmap.h>
   72 
   73 #include <cxgb_include.h>
   74 #include <sys/mvec.h>
   75 
   76 int     txq_fills = 0;
   77 int     multiq_tx_enable = 1;
   78 
   79 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
   80 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
   81 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
   82 SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
   83     "size of per-queue mbuf ring");
   84 
   85 static int cxgb_tx_coalesce_force = 0;
   86 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force);
   87 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW,
   88     &cxgb_tx_coalesce_force, 0,
   89     "coalesce small packets into a single work request regardless of ring state");
   90 
   91 #define COALESCE_START_DEFAULT          TX_ETH_Q_SIZE>>1
   92 #define COALESCE_START_MAX              (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
   93 #define COALESCE_STOP_DEFAULT           TX_ETH_Q_SIZE>>2
   94 #define COALESCE_STOP_MIN               TX_ETH_Q_SIZE>>5
   95 #define TX_RECLAIM_DEFAULT              TX_ETH_Q_SIZE>>5
   96 #define TX_RECLAIM_MAX                  TX_ETH_Q_SIZE>>2
   97 #define TX_RECLAIM_MIN                  TX_ETH_Q_SIZE>>6
   98 
   99 
  100 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
  101 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start",
  102     &cxgb_tx_coalesce_enable_start);
  103 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW,
  104     &cxgb_tx_coalesce_enable_start, 0,
  105     "coalesce enable threshold");
  106 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
  107 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop);
  108 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW,
  109     &cxgb_tx_coalesce_enable_stop, 0,
  110     "coalesce disable threshold");
  111 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  112 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold);
  113 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW,
  114     &cxgb_tx_reclaim_threshold, 0,
  115     "tx cleaning minimum threshold");
  116 
  117 /*
  118  * XXX don't re-enable this until TOE stops assuming
  119  * we have an m_ext
  120  */
  121 static int recycle_enable = 0;
  122 
  123 extern int cxgb_use_16k_clusters;
  124 extern int nmbjumbop;
  125 extern int nmbjumbo9;
  126 extern int nmbjumbo16;
  127 
  128 #define USE_GTS 0
  129 
  130 #define SGE_RX_SM_BUF_SIZE      1536
  131 #define SGE_RX_DROP_THRES       16
  132 #define SGE_RX_COPY_THRES       128
  133 
  134 /*
  135  * Period of the Tx buffer reclaim timer.  This timer does not need to run
  136  * frequently as Tx buffers are usually reclaimed by new Tx packets.
  137  */
  138 #define TX_RECLAIM_PERIOD       (hz >> 1)
  139 
  140 /* 
  141  * Values for sge_txq.flags
  142  */
  143 enum {
  144         TXQ_RUNNING     = 1 << 0,  /* fetch engine is running */
  145         TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
  146 };
  147 
  148 struct tx_desc {
  149         uint64_t        flit[TX_DESC_FLITS];
  150 } __packed;
  151 
  152 struct rx_desc {
  153         uint32_t        addr_lo;
  154         uint32_t        len_gen;
  155         uint32_t        gen2;
  156         uint32_t        addr_hi;
  157 } __packed;
  158 
  159 struct rsp_desc {               /* response queue descriptor */
  160         struct rss_header       rss_hdr;
  161         uint32_t                flags;
  162         uint32_t                len_cq;
  163         uint8_t                 imm_data[47];
  164         uint8_t                 intr_gen;
  165 } __packed;
  166 
  167 #define RX_SW_DESC_MAP_CREATED  (1 << 0)
  168 #define TX_SW_DESC_MAP_CREATED  (1 << 1)
  169 #define RX_SW_DESC_INUSE        (1 << 3)
  170 #define TX_SW_DESC_MAPPED       (1 << 4)
  171 
  172 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
  173 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
  174 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
  175 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
  176 
  177 struct tx_sw_desc {                /* SW state per Tx descriptor */
  178         struct mbuf     *m;
  179         bus_dmamap_t    map;
  180         int             flags;
  181 };
  182 
  183 struct rx_sw_desc {                /* SW state per Rx descriptor */
  184         caddr_t         rxsd_cl;
  185         struct mbuf     *m;
  186         bus_dmamap_t    map;
  187         int             flags;
  188 };
  189 
  190 struct txq_state {
  191         unsigned int    compl;
  192         unsigned int    gen;
  193         unsigned int    pidx;
  194 };
  195 
  196 struct refill_fl_cb_arg {
  197         int               error;
  198         bus_dma_segment_t seg;
  199         int               nseg;
  200 };
  201 
  202 
  203 /*
  204  * Maps a number of flits to the number of Tx descriptors that can hold them.
  205  * The formula is
  206  *
  207  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
  208  *
  209  * HW allows up to 4 descriptors to be combined into a WR.
  210  */
  211 static uint8_t flit_desc_map[] = {
  212         0,
  213 #if SGE_NUM_GENBITS == 1
  214         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  215         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  216         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  217         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
  218 #elif SGE_NUM_GENBITS == 2
  219         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  220         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  221         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  222         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  223 #else
  224 # error "SGE_NUM_GENBITS must be 1 or 2"
  225 #endif
  226 };
  227 
  228 #define TXQ_LOCK_ASSERT(qs)     mtx_assert(&(qs)->lock, MA_OWNED)
  229 #define TXQ_TRYLOCK(qs)         mtx_trylock(&(qs)->lock)        
  230 #define TXQ_LOCK(qs)            mtx_lock(&(qs)->lock)   
  231 #define TXQ_UNLOCK(qs)          mtx_unlock(&(qs)->lock) 
  232 #define TXQ_RING_EMPTY(qs)      drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  233 #define TXQ_RING_NEEDS_ENQUEUE(qs)                                      \
  234         drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  235 #define TXQ_RING_FLUSH(qs)      drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  236 #define TXQ_RING_DEQUEUE_COND(qs, func, arg)                            \
  237         drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
  238 #define TXQ_RING_DEQUEUE(qs) \
  239         drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  240 
  241 int cxgb_debug = 0;
  242 
  243 static void sge_timer_cb(void *arg);
  244 static void sge_timer_reclaim(void *arg, int ncount);
  245 static void sge_txq_reclaim_handler(void *arg, int ncount);
  246 static void cxgb_start_locked(struct sge_qset *qs);
  247 
  248 /*
  249  * XXX need to cope with bursty scheduling by looking at a wider
  250  * window than we are now for determining the need for coalescing
  251  *
  252  */
  253 static __inline uint64_t
  254 check_pkt_coalesce(struct sge_qset *qs) 
  255 { 
  256         struct adapter *sc; 
  257         struct sge_txq *txq; 
  258         uint8_t *fill;
  259 
  260         if (__predict_false(cxgb_tx_coalesce_force))
  261                 return (1);
  262         txq = &qs->txq[TXQ_ETH]; 
  263         sc = qs->port->adapter; 
  264         fill = &sc->tunq_fill[qs->idx];
  265 
  266         if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
  267                 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
  268         if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
  269                 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
  270         /*
  271          * if the hardware transmit queue is more than 1/8 full
  272          * we mark it as coalescing - we drop back from coalescing
  273          * when we go below 1/32 full and there are no packets enqueued, 
  274          * this provides us with some degree of hysteresis
  275          */
  276         if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
  277             TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
  278                 *fill = 0; 
  279         else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
  280                 *fill = 1; 
  281 
  282         return (sc->tunq_coalesce);
  283 } 
  284 
  285 #ifdef __LP64__
  286 static void
  287 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  288 {
  289         uint64_t wr_hilo;
  290 #if _BYTE_ORDER == _LITTLE_ENDIAN
  291         wr_hilo = wr_hi;
  292         wr_hilo |= (((uint64_t)wr_lo)<<32);
  293 #else
  294         wr_hilo = wr_lo;
  295         wr_hilo |= (((uint64_t)wr_hi)<<32);
  296 #endif  
  297         wrp->wrh_hilo = wr_hilo;
  298 }
  299 #else
  300 static void
  301 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  302 {
  303 
  304         wrp->wrh_hi = wr_hi;
  305         wmb();
  306         wrp->wrh_lo = wr_lo;
  307 }
  308 #endif
  309 
  310 struct coalesce_info {
  311         int count;
  312         int nbytes;
  313 };
  314 
  315 static int
  316 coalesce_check(struct mbuf *m, void *arg)
  317 {
  318         struct coalesce_info *ci = arg;
  319         int *count = &ci->count;
  320         int *nbytes = &ci->nbytes;
  321 
  322         if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
  323                 (*count < 7) && (m->m_next == NULL))) {
  324                 *count += 1;
  325                 *nbytes += m->m_len;
  326                 return (1);
  327         }
  328         return (0);
  329 }
  330 
  331 static struct mbuf *
  332 cxgb_dequeue(struct sge_qset *qs)
  333 {
  334         struct mbuf *m, *m_head, *m_tail;
  335         struct coalesce_info ci;
  336 
  337         
  338         if (check_pkt_coalesce(qs) == 0) 
  339                 return TXQ_RING_DEQUEUE(qs);
  340 
  341         m_head = m_tail = NULL;
  342         ci.count = ci.nbytes = 0;
  343         do {
  344                 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
  345                 if (m_head == NULL) {
  346                         m_tail = m_head = m;
  347                 } else if (m != NULL) {
  348                         m_tail->m_nextpkt = m;
  349                         m_tail = m;
  350                 }
  351         } while (m != NULL);
  352         if (ci.count > 7)
  353                 panic("trying to coalesce %d packets in to one WR", ci.count);
  354         return (m_head);
  355 }
  356         
  357 /**
  358  *      reclaim_completed_tx - reclaims completed Tx descriptors
  359  *      @adapter: the adapter
  360  *      @q: the Tx queue to reclaim completed descriptors from
  361  *
  362  *      Reclaims Tx descriptors that the SGE has indicated it has processed,
  363  *      and frees the associated buffers if possible.  Called with the Tx
  364  *      queue's lock held.
  365  */
  366 static __inline int
  367 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
  368 {
  369         struct sge_txq *q = &qs->txq[queue];
  370         int reclaim = desc_reclaimable(q);
  371 
  372         if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
  373             (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
  374                 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  375 
  376         if (reclaim < reclaim_min)
  377                 return (0);
  378 
  379         mtx_assert(&qs->lock, MA_OWNED);
  380         if (reclaim > 0) {
  381                 t3_free_tx_desc(qs, reclaim, queue);
  382                 q->cleaned += reclaim;
  383                 q->in_use -= reclaim;
  384         }
  385         if (isset(&qs->txq_stopped, TXQ_ETH))
  386                 clrbit(&qs->txq_stopped, TXQ_ETH);
  387 
  388         return (reclaim);
  389 }
  390 
  391 /**
  392  *      should_restart_tx - are there enough resources to restart a Tx queue?
  393  *      @q: the Tx queue
  394  *
  395  *      Checks if there are enough descriptors to restart a suspended Tx queue.
  396  */
  397 static __inline int
  398 should_restart_tx(const struct sge_txq *q)
  399 {
  400         unsigned int r = q->processed - q->cleaned;
  401 
  402         return q->in_use - r < (q->size >> 1);
  403 }
  404 
  405 /**
  406  *      t3_sge_init - initialize SGE
  407  *      @adap: the adapter
  408  *      @p: the SGE parameters
  409  *
  410  *      Performs SGE initialization needed every time after a chip reset.
  411  *      We do not initialize any of the queue sets here, instead the driver
  412  *      top-level must request those individually.  We also do not enable DMA
  413  *      here, that should be done after the queues have been set up.
  414  */
  415 void
  416 t3_sge_init(adapter_t *adap, struct sge_params *p)
  417 {
  418         u_int ctrl, ups;
  419 
  420         ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
  421 
  422         ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
  423                F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
  424                V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
  425                V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
  426 #if SGE_NUM_GENBITS == 1
  427         ctrl |= F_EGRGENCTRL;
  428 #endif
  429         if (adap->params.rev > 0) {
  430                 if (!(adap->flags & (USING_MSIX | USING_MSI)))
  431                         ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
  432         }
  433         t3_write_reg(adap, A_SG_CONTROL, ctrl);
  434         t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
  435                      V_LORCQDRBTHRSH(512));
  436         t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
  437         t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
  438                      V_TIMEOUT(200 * core_ticks_per_usec(adap)));
  439         t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
  440                      adap->params.rev < T3_REV_C ? 1000 : 500);
  441         t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
  442         t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
  443         t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
  444         t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
  445         t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
  446 }
  447 
  448 
  449 /**
  450  *      sgl_len - calculates the size of an SGL of the given capacity
  451  *      @n: the number of SGL entries
  452  *
  453  *      Calculates the number of flits needed for a scatter/gather list that
  454  *      can hold the given number of entries.
  455  */
  456 static __inline unsigned int
  457 sgl_len(unsigned int n)
  458 {
  459         return ((3 * n) / 2 + (n & 1));
  460 }
  461 
  462 /**
  463  *      get_imm_packet - return the next ingress packet buffer from a response
  464  *      @resp: the response descriptor containing the packet data
  465  *
  466  *      Return a packet containing the immediate data of the given response.
  467  */
  468 static int
  469 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
  470 {
  471 
  472         m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
  473         m->m_ext.ext_buf = NULL;
  474         m->m_ext.ext_type = 0;
  475         memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 
  476         return (0);     
  477 }
  478 
  479 static __inline u_int
  480 flits_to_desc(u_int n)
  481 {
  482         return (flit_desc_map[n]);
  483 }
  484 
  485 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
  486                     F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
  487                     V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
  488                     F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
  489                     F_HIRCQPARITYERROR)
  490 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
  491 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
  492                       F_RSPQDISABLED)
  493 
  494 /**
  495  *      t3_sge_err_intr_handler - SGE async event interrupt handler
  496  *      @adapter: the adapter
  497  *
  498  *      Interrupt handler for SGE asynchronous (non-data) events.
  499  */
  500 void
  501 t3_sge_err_intr_handler(adapter_t *adapter)
  502 {
  503         unsigned int v, status;
  504 
  505         status = t3_read_reg(adapter, A_SG_INT_CAUSE);
  506         if (status & SGE_PARERR)
  507                 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
  508                          status & SGE_PARERR);
  509         if (status & SGE_FRAMINGERR)
  510                 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
  511                          status & SGE_FRAMINGERR);
  512         if (status & F_RSPQCREDITOVERFOW)
  513                 CH_ALERT(adapter, "SGE response queue credit overflow\n");
  514 
  515         if (status & F_RSPQDISABLED) {
  516                 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
  517 
  518                 CH_ALERT(adapter,
  519                          "packet delivered to disabled response queue (0x%x)\n",
  520                          (v >> S_RSPQ0DISABLED) & 0xff);
  521         }
  522 
  523         t3_write_reg(adapter, A_SG_INT_CAUSE, status);
  524         if (status & SGE_FATALERR)
  525                 t3_fatal_err(adapter);
  526 }
  527 
  528 void
  529 t3_sge_prep(adapter_t *adap, struct sge_params *p)
  530 {
  531         int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
  532 
  533         nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
  534         nqsets *= adap->params.nports;
  535 
  536         fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
  537 
  538         while (!powerof2(fl_q_size))
  539                 fl_q_size--;
  540 
  541         use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
  542             is_offload(adap);
  543 
  544 #if __FreeBSD_version >= 700111
  545         if (use_16k) {
  546                 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
  547                 jumbo_buf_size = MJUM16BYTES;
  548         } else {
  549                 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
  550                 jumbo_buf_size = MJUM9BYTES;
  551         }
  552 #else
  553         jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE);
  554         jumbo_buf_size = MJUMPAGESIZE;
  555 #endif
  556         while (!powerof2(jumbo_q_size))
  557                 jumbo_q_size--;
  558 
  559         if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
  560                 device_printf(adap->dev,
  561                     "Insufficient clusters and/or jumbo buffers.\n");
  562 
  563         p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
  564 
  565         for (i = 0; i < SGE_QSETS; ++i) {
  566                 struct qset_params *q = p->qset + i;
  567 
  568                 if (adap->params.nports > 2) {
  569                         q->coalesce_usecs = 50;
  570                 } else {
  571 #ifdef INVARIANTS                       
  572                         q->coalesce_usecs = 10;
  573 #else
  574                         q->coalesce_usecs = 5;
  575 #endif                  
  576                 }
  577                 q->polling = 0;
  578                 q->rspq_size = RSPQ_Q_SIZE;
  579                 q->fl_size = fl_q_size;
  580                 q->jumbo_size = jumbo_q_size;
  581                 q->jumbo_buf_size = jumbo_buf_size;
  582                 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
  583                 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
  584                 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
  585                 q->cong_thres = 0;
  586         }
  587 }
  588 
  589 int
  590 t3_sge_alloc(adapter_t *sc)
  591 {
  592 
  593         /* The parent tag. */
  594         if (bus_dma_tag_create( NULL,                   /* parent */
  595                                 1, 0,                   /* algnmnt, boundary */
  596                                 BUS_SPACE_MAXADDR,      /* lowaddr */
  597                                 BUS_SPACE_MAXADDR,      /* highaddr */
  598                                 NULL, NULL,             /* filter, filterarg */
  599                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
  600                                 BUS_SPACE_UNRESTRICTED, /* nsegments */
  601                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
  602                                 0,                      /* flags */
  603                                 NULL, NULL,             /* lock, lockarg */
  604                                 &sc->parent_dmat)) {
  605                 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
  606                 return (ENOMEM);
  607         }
  608 
  609         /*
  610          * DMA tag for normal sized RX frames
  611          */
  612         if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
  613                 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
  614                 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
  615                 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
  616                 return (ENOMEM);
  617         }
  618 
  619         /* 
  620          * DMA tag for jumbo sized RX frames.
  621          */
  622         if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
  623                 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
  624                 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
  625                 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
  626                 return (ENOMEM);
  627         }
  628 
  629         /* 
  630          * DMA tag for TX frames.
  631          */
  632         if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
  633                 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  634                 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  635                 NULL, NULL, &sc->tx_dmat)) {
  636                 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
  637                 return (ENOMEM);
  638         }
  639 
  640         return (0);
  641 }
  642 
  643 int
  644 t3_sge_free(struct adapter * sc)
  645 {
  646 
  647         if (sc->tx_dmat != NULL)
  648                 bus_dma_tag_destroy(sc->tx_dmat);
  649 
  650         if (sc->rx_jumbo_dmat != NULL)
  651                 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
  652 
  653         if (sc->rx_dmat != NULL)
  654                 bus_dma_tag_destroy(sc->rx_dmat);
  655 
  656         if (sc->parent_dmat != NULL)
  657                 bus_dma_tag_destroy(sc->parent_dmat);
  658 
  659         return (0);
  660 }
  661 
  662 void
  663 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
  664 {
  665 
  666         qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
  667         qs->rspq.polling = 0 /* p->polling */;
  668 }
  669 
  670 #if !defined(__i386__) && !defined(__amd64__)
  671 static void
  672 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
  673 {
  674         struct refill_fl_cb_arg *cb_arg = arg;
  675         
  676         cb_arg->error = error;
  677         cb_arg->seg = segs[0];
  678         cb_arg->nseg = nseg;
  679 
  680 }
  681 #endif
  682 /**
  683  *      refill_fl - refill an SGE free-buffer list
  684  *      @sc: the controller softc
  685  *      @q: the free-list to refill
  686  *      @n: the number of new buffers to allocate
  687  *
  688  *      (Re)populate an SGE free-buffer list with up to @n new packet buffers.
  689  *      The caller must assure that @n does not exceed the queue's capacity.
  690  */
  691 static void
  692 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
  693 {
  694         struct rx_sw_desc *sd = &q->sdesc[q->pidx];
  695         struct rx_desc *d = &q->desc[q->pidx];
  696         struct refill_fl_cb_arg cb_arg;
  697         struct mbuf *m;
  698         caddr_t cl;
  699         int err;
  700         
  701         cb_arg.error = 0;
  702         while (n--) {
  703                 /*
  704                  * We only allocate a cluster, mbuf allocation happens after rx
  705                  */
  706                 if (q->zone == zone_pack) {
  707                         if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
  708                                 break;
  709                         cl = m->m_ext.ext_buf;                  
  710                 } else {
  711                         if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
  712                                 break;
  713                         if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
  714                                 uma_zfree(q->zone, cl);
  715                                 break;
  716                         }
  717                 }
  718                 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
  719                         if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
  720                                 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
  721                                 uma_zfree(q->zone, cl);
  722                                 goto done;
  723                         }
  724                         sd->flags |= RX_SW_DESC_MAP_CREATED;
  725                 }
  726 #if !defined(__i386__) && !defined(__amd64__)
  727                 err = bus_dmamap_load(q->entry_tag, sd->map,
  728                     cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
  729                 
  730                 if (err != 0 || cb_arg.error) {
  731                         if (q->zone == zone_pack)
  732                                 uma_zfree(q->zone, cl);
  733                         m_free(m);
  734                         goto done;
  735                 }
  736 #else
  737                 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
  738 #endif          
  739                 sd->flags |= RX_SW_DESC_INUSE;
  740                 sd->rxsd_cl = cl;
  741                 sd->m = m;
  742                 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
  743                 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
  744                 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
  745                 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
  746 
  747                 d++;
  748                 sd++;
  749 
  750                 if (++q->pidx == q->size) {
  751                         q->pidx = 0;
  752                         q->gen ^= 1;
  753                         sd = q->sdesc;
  754                         d = q->desc;
  755                 }
  756                 q->credits++;
  757                 q->db_pending++;
  758         }
  759 
  760 done:
  761         if (q->db_pending >= 32) {
  762                 q->db_pending = 0;
  763                 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  764         }
  765 }
  766 
  767 
  768 /**
  769  *      free_rx_bufs - free the Rx buffers on an SGE free list
  770  *      @sc: the controle softc
  771  *      @q: the SGE free list to clean up
  772  *
  773  *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
  774  *      this queue should be stopped before calling this function.
  775  */
  776 static void
  777 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
  778 {
  779         u_int cidx = q->cidx;
  780 
  781         while (q->credits--) {
  782                 struct rx_sw_desc *d = &q->sdesc[cidx];
  783 
  784                 if (d->flags & RX_SW_DESC_INUSE) {
  785                         bus_dmamap_unload(q->entry_tag, d->map);
  786                         bus_dmamap_destroy(q->entry_tag, d->map);
  787                         if (q->zone == zone_pack) {
  788                                 m_init(d->m, zone_pack, MCLBYTES,
  789                                     M_NOWAIT, MT_DATA, M_EXT);
  790                                 uma_zfree(zone_pack, d->m);
  791                         } else {
  792                                 m_init(d->m, zone_mbuf, MLEN,
  793                                     M_NOWAIT, MT_DATA, 0);
  794                                 uma_zfree(zone_mbuf, d->m);
  795                                 uma_zfree(q->zone, d->rxsd_cl);
  796                         }                       
  797                 }
  798                 
  799                 d->rxsd_cl = NULL;
  800                 d->m = NULL;
  801                 if (++cidx == q->size)
  802                         cidx = 0;
  803         }
  804 }
  805 
  806 static __inline void
  807 __refill_fl(adapter_t *adap, struct sge_fl *fl)
  808 {
  809         refill_fl(adap, fl, min(16U, fl->size - fl->credits));
  810 }
  811 
  812 static __inline void
  813 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
  814 {
  815         uint32_t reclaimable = fl->size - fl->credits;
  816 
  817         if (reclaimable > 0)
  818                 refill_fl(adap, fl, min(max, reclaimable));
  819 }
  820 
  821 /**
  822  *      recycle_rx_buf - recycle a receive buffer
  823  *      @adapter: the adapter
  824  *      @q: the SGE free list
  825  *      @idx: index of buffer to recycle
  826  *
  827  *      Recycles the specified buffer on the given free list by adding it at
  828  *      the next available slot on the list.
  829  */
  830 static void
  831 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
  832 {
  833         struct rx_desc *from = &q->desc[idx];
  834         struct rx_desc *to   = &q->desc[q->pidx];
  835 
  836         q->sdesc[q->pidx] = q->sdesc[idx];
  837         to->addr_lo = from->addr_lo;        // already big endian
  838         to->addr_hi = from->addr_hi;        // likewise
  839         wmb();  /* necessary ? */
  840         to->len_gen = htobe32(V_FLD_GEN1(q->gen));
  841         to->gen2 = htobe32(V_FLD_GEN2(q->gen));
  842         q->credits++;
  843 
  844         if (++q->pidx == q->size) {
  845                 q->pidx = 0;
  846                 q->gen ^= 1;
  847         }
  848         t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  849 }
  850 
  851 static void
  852 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
  853 {
  854         uint32_t *addr;
  855 
  856         addr = arg;
  857         *addr = segs[0].ds_addr;
  858 }
  859 
  860 static int
  861 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
  862     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
  863     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
  864 {
  865         size_t len = nelem * elem_size;
  866         void *s = NULL;
  867         void *p = NULL;
  868         int err;
  869 
  870         if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
  871                                       BUS_SPACE_MAXADDR_32BIT,
  872                                       BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
  873                                       len, 0, NULL, NULL, tag)) != 0) {
  874                 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
  875                 return (ENOMEM);
  876         }
  877 
  878         if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
  879                                     map)) != 0) {
  880                 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
  881                 return (ENOMEM);
  882         }
  883 
  884         bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
  885         bzero(p, len);
  886         *(void **)desc = p;
  887 
  888         if (sw_size) {
  889                 len = nelem * sw_size;
  890                 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
  891                 *(void **)sdesc = s;
  892         }
  893         if (parent_entry_tag == NULL)
  894                 return (0);
  895             
  896         if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
  897                                       BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
  898                                       NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  899                                       TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  900                                       NULL, NULL, entry_tag)) != 0) {
  901                 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
  902                 return (ENOMEM);
  903         }
  904         return (0);
  905 }
  906 
  907 static void
  908 sge_slow_intr_handler(void *arg, int ncount)
  909 {
  910         adapter_t *sc = arg;
  911 
  912         t3_slow_intr_handler(sc);
  913         t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
  914         (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
  915 }
  916 
  917 /**
  918  *      sge_timer_cb - perform periodic maintenance of an SGE qset
  919  *      @data: the SGE queue set to maintain
  920  *
  921  *      Runs periodically from a timer to perform maintenance of an SGE queue
  922  *      set.  It performs two tasks:
  923  *
  924  *      a) Cleans up any completed Tx descriptors that may still be pending.
  925  *      Normal descriptor cleanup happens when new packets are added to a Tx
  926  *      queue so this timer is relatively infrequent and does any cleanup only
  927  *      if the Tx queue has not seen any new packets in a while.  We make a
  928  *      best effort attempt to reclaim descriptors, in that we don't wait
  929  *      around if we cannot get a queue's lock (which most likely is because
  930  *      someone else is queueing new packets and so will also handle the clean
  931  *      up).  Since control queues use immediate data exclusively we don't
  932  *      bother cleaning them up here.
  933  *
  934  *      b) Replenishes Rx queues that have run out due to memory shortage.
  935  *      Normally new Rx buffers are added when existing ones are consumed but
  936  *      when out of memory a queue can become empty.  We try to add only a few
  937  *      buffers here, the queue will be replenished fully as these new buffers
  938  *      are used up if memory shortage has subsided.
  939  *      
  940  *      c) Return coalesced response queue credits in case a response queue is
  941  *      starved.
  942  *
  943  *      d) Ring doorbells for T304 tunnel queues since we have seen doorbell 
  944  *      fifo overflows and the FW doesn't implement any recovery scheme yet.
  945  */
  946 static void
  947 sge_timer_cb(void *arg)
  948 {
  949         adapter_t *sc = arg;
  950         if ((sc->flags & USING_MSIX) == 0) {
  951                 
  952                 struct port_info *pi;
  953                 struct sge_qset *qs;
  954                 struct sge_txq  *txq;
  955                 int i, j;
  956                 int reclaim_ofl, refill_rx;
  957 
  958                 if (sc->open_device_map == 0) 
  959                         return;
  960 
  961                 for (i = 0; i < sc->params.nports; i++) {
  962                         pi = &sc->port[i];
  963                         for (j = 0; j < pi->nqsets; j++) {
  964                                 qs = &sc->sge.qs[pi->first_qset + j];
  965                                 txq = &qs->txq[0];
  966                                 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
  967                                 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 
  968                                     (qs->fl[1].credits < qs->fl[1].size));
  969                                 if (reclaim_ofl || refill_rx) {
  970                                         taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
  971                                         break;
  972                                 }
  973                         }
  974                 }
  975         }
  976         
  977         if (sc->params.nports > 2) {
  978                 int i;
  979 
  980                 for_each_port(sc, i) {
  981                         struct port_info *pi = &sc->port[i];
  982 
  983                         t3_write_reg(sc, A_SG_KDOORBELL, 
  984                                      F_SELEGRCNTX | 
  985                                      (FW_TUNNEL_SGEEC_START + pi->first_qset));
  986                 }
  987         }       
  988         if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
  989             sc->open_device_map != 0)
  990                 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
  991 }
  992 
  993 /*
  994  * This is meant to be a catch-all function to keep sge state private
  995  * to sge.c
  996  *
  997  */
  998 int
  999 t3_sge_init_adapter(adapter_t *sc)
 1000 {
 1001         callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
 1002         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1003         TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
 1004         return (0);
 1005 }
 1006 
 1007 int
 1008 t3_sge_reset_adapter(adapter_t *sc)
 1009 {
 1010         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1011         return (0);
 1012 }
 1013 
 1014 int
 1015 t3_sge_init_port(struct port_info *pi)
 1016 {
 1017         TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
 1018         return (0);
 1019 }
 1020 
 1021 /**
 1022  *      refill_rspq - replenish an SGE response queue
 1023  *      @adapter: the adapter
 1024  *      @q: the response queue to replenish
 1025  *      @credits: how many new responses to make available
 1026  *
 1027  *      Replenishes a response queue by making the supplied number of responses
 1028  *      available to HW.
 1029  */
 1030 static __inline void
 1031 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
 1032 {
 1033 
 1034         /* mbufs are allocated on demand when a rspq entry is processed. */
 1035         t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
 1036                      V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 1037 }
 1038 
 1039 static void
 1040 sge_txq_reclaim_handler(void *arg, int ncount)
 1041 {
 1042         struct sge_qset *qs = arg;
 1043         int i;
 1044 
 1045         for (i = 0; i < 3; i++)
 1046                 reclaim_completed_tx(qs, 16, i);
 1047 }
 1048 
 1049 static void
 1050 sge_timer_reclaim(void *arg, int ncount)
 1051 {
 1052         struct port_info *pi = arg;
 1053         int i, nqsets = pi->nqsets;
 1054         adapter_t *sc = pi->adapter;
 1055         struct sge_qset *qs;
 1056         struct mtx *lock;
 1057         
 1058         KASSERT((sc->flags & USING_MSIX) == 0,
 1059             ("can't call timer reclaim for msi-x"));
 1060 
 1061         for (i = 0; i < nqsets; i++) {
 1062                 qs = &sc->sge.qs[pi->first_qset + i];
 1063 
 1064                 reclaim_completed_tx(qs, 16, TXQ_OFLD);
 1065                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 1066                             &sc->sge.qs[0].rspq.lock;
 1067 
 1068                 if (mtx_trylock(lock)) {
 1069                         /* XXX currently assume that we are *NOT* polling */
 1070                         uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
 1071 
 1072                         if (qs->fl[0].credits < qs->fl[0].size - 16)
 1073                                 __refill_fl(sc, &qs->fl[0]);
 1074                         if (qs->fl[1].credits < qs->fl[1].size - 16)
 1075                                 __refill_fl(sc, &qs->fl[1]);
 1076                         
 1077                         if (status & (1 << qs->rspq.cntxt_id)) {
 1078                                 if (qs->rspq.credits) {
 1079                                         refill_rspq(sc, &qs->rspq, 1);
 1080                                         qs->rspq.credits--;
 1081                                         t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 
 1082                                             1 << qs->rspq.cntxt_id);
 1083                                 }
 1084                         }
 1085                         mtx_unlock(lock);
 1086                 }
 1087         }
 1088 }
 1089 
 1090 /**
 1091  *      init_qset_cntxt - initialize an SGE queue set context info
 1092  *      @qs: the queue set
 1093  *      @id: the queue set id
 1094  *
 1095  *      Initializes the TIDs and context ids for the queues of a queue set.
 1096  */
 1097 static void
 1098 init_qset_cntxt(struct sge_qset *qs, u_int id)
 1099 {
 1100 
 1101         qs->rspq.cntxt_id = id;
 1102         qs->fl[0].cntxt_id = 2 * id;
 1103         qs->fl[1].cntxt_id = 2 * id + 1;
 1104         qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
 1105         qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
 1106         qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 1107         qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 1108         qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
 1109 
 1110         mbufq_init(&qs->txq[TXQ_ETH].sendq);
 1111         mbufq_init(&qs->txq[TXQ_OFLD].sendq);
 1112         mbufq_init(&qs->txq[TXQ_CTRL].sendq);
 1113 }
 1114 
 1115 
 1116 static void
 1117 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
 1118 {
 1119         txq->in_use += ndesc;
 1120         /*
 1121          * XXX we don't handle stopping of queue
 1122          * presumably start handles this when we bump against the end
 1123          */
 1124         txqs->gen = txq->gen;
 1125         txq->unacked += ndesc;
 1126         txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
 1127         txq->unacked &= 31;
 1128         txqs->pidx = txq->pidx;
 1129         txq->pidx += ndesc;
 1130 #ifdef INVARIANTS
 1131         if (((txqs->pidx > txq->cidx) &&
 1132                 (txq->pidx < txqs->pidx) &&
 1133                 (txq->pidx >= txq->cidx)) ||
 1134             ((txqs->pidx < txq->cidx) &&
 1135                 (txq->pidx >= txq-> cidx)) ||
 1136             ((txqs->pidx < txq->cidx) &&
 1137                 (txq->cidx < txqs->pidx)))
 1138                 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
 1139                     txqs->pidx, txq->pidx, txq->cidx);
 1140 #endif
 1141         if (txq->pidx >= txq->size) {
 1142                 txq->pidx -= txq->size;
 1143                 txq->gen ^= 1;
 1144         }
 1145 
 1146 }
 1147 
 1148 /**
 1149  *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 1150  *      @m: the packet mbufs
 1151  *      @nsegs: the number of segments 
 1152  *
 1153  *      Returns the number of Tx descriptors needed for the given Ethernet
 1154  *      packet.  Ethernet packets require addition of WR and CPL headers.
 1155  */
 1156 static __inline unsigned int
 1157 calc_tx_descs(const struct mbuf *m, int nsegs)
 1158 {
 1159         unsigned int flits;
 1160 
 1161         if (m->m_pkthdr.len <= PIO_LEN)
 1162                 return 1;
 1163 
 1164         flits = sgl_len(nsegs) + 2;
 1165         if (m->m_pkthdr.csum_flags & CSUM_TSO)
 1166                 flits++;
 1167 
 1168         return flits_to_desc(flits);
 1169 }
 1170 
 1171 static unsigned int
 1172 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
 1173     struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
 1174 {
 1175         struct mbuf *m0;
 1176         int err, pktlen, pass = 0;
 1177         bus_dma_tag_t tag = txq->entry_tag;
 1178 
 1179 retry:
 1180         err = 0;
 1181         m0 = *m;
 1182         pktlen = m0->m_pkthdr.len;
 1183 #if defined(__i386__) || defined(__amd64__)
 1184         if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) {
 1185                 goto done;
 1186         } else
 1187 #endif
 1188                 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0);
 1189 
 1190         if (err == 0) {
 1191                 goto done;
 1192         }
 1193         if (err == EFBIG && pass == 0) {
 1194                 pass = 1;
 1195                 /* Too many segments, try to defrag */
 1196                 m0 = m_defrag(m0, M_DONTWAIT);
 1197                 if (m0 == NULL) {
 1198                         m_freem(*m);
 1199                         *m = NULL;
 1200                         return (ENOBUFS);
 1201                 }
 1202                 *m = m0;
 1203                 goto retry;
 1204         } else if (err == ENOMEM) {
 1205                 return (err);
 1206         } if (err) {
 1207                 if (cxgb_debug)
 1208                         printf("map failure err=%d pktlen=%d\n", err, pktlen);
 1209                 m_freem(m0);
 1210                 *m = NULL;
 1211                 return (err);
 1212         }
 1213 done:
 1214 #if !defined(__i386__) && !defined(__amd64__)
 1215         bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE);
 1216 #endif  
 1217         txsd->flags |= TX_SW_DESC_MAPPED;
 1218 
 1219         return (0);
 1220 }
 1221 
 1222 /**
 1223  *      make_sgl - populate a scatter/gather list for a packet
 1224  *      @sgp: the SGL to populate
 1225  *      @segs: the packet dma segments
 1226  *      @nsegs: the number of segments
 1227  *
 1228  *      Generates a scatter/gather list for the buffers that make up a packet
 1229  *      and returns the SGL size in 8-byte words.  The caller must size the SGL
 1230  *      appropriately.
 1231  */
 1232 static __inline void
 1233 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
 1234 {
 1235         int i, idx;
 1236         
 1237         for (idx = 0, i = 0; i < nsegs; i++) {
 1238                 /*
 1239                  * firmware doesn't like empty segments
 1240                  */
 1241                 if (segs[i].ds_len == 0)
 1242                         continue;
 1243                 if (i && idx == 0) 
 1244                         ++sgp;
 1245                 
 1246                 sgp->len[idx] = htobe32(segs[i].ds_len);
 1247                 sgp->addr[idx] = htobe64(segs[i].ds_addr);
 1248                 idx ^= 1;
 1249         }
 1250         
 1251         if (idx) {
 1252                 sgp->len[idx] = 0;
 1253                 sgp->addr[idx] = 0;
 1254         }
 1255 }
 1256         
 1257 /**
 1258  *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
 1259  *      @adap: the adapter
 1260  *      @q: the Tx queue
 1261  *
 1262  *      Ring the doorbell if a Tx queue is asleep.  There is a natural race,
 1263  *      where the HW is going to sleep just after we checked, however,
 1264  *      then the interrupt handler will detect the outstanding TX packet
 1265  *      and ring the doorbell for us.
 1266  *
 1267  *      When GTS is disabled we unconditionally ring the doorbell.
 1268  */
 1269 static __inline void
 1270 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
 1271 {
 1272 #if USE_GTS
 1273         clear_bit(TXQ_LAST_PKT_DB, &q->flags);
 1274         if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
 1275                 set_bit(TXQ_LAST_PKT_DB, &q->flags);
 1276 #ifdef T3_TRACE
 1277                 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
 1278                           q->cntxt_id);
 1279 #endif
 1280                 t3_write_reg(adap, A_SG_KDOORBELL,
 1281                              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1282         }
 1283 #else
 1284         if (mustring || ++q->db_pending >= 32) {
 1285                 wmb();            /* write descriptors before telling HW */
 1286                 t3_write_reg(adap, A_SG_KDOORBELL,
 1287                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1288                 q->db_pending = 0;
 1289         }
 1290 #endif
 1291 }
 1292 
 1293 static __inline void
 1294 wr_gen2(struct tx_desc *d, unsigned int gen)
 1295 {
 1296 #if SGE_NUM_GENBITS == 2
 1297         d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
 1298 #endif
 1299 }
 1300 
 1301 /**
 1302  *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
 1303  *      @ndesc: number of Tx descriptors spanned by the SGL
 1304  *      @txd: first Tx descriptor to be written
 1305  *      @txqs: txq state (generation and producer index)
 1306  *      @txq: the SGE Tx queue
 1307  *      @sgl: the SGL
 1308  *      @flits: number of flits to the start of the SGL in the first descriptor
 1309  *      @sgl_flits: the SGL size in flits
 1310  *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
 1311  *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
 1312  *
 1313  *      Write a work request header and an associated SGL.  If the SGL is
 1314  *      small enough to fit into one Tx descriptor it has already been written
 1315  *      and we just need to write the WR header.  Otherwise we distribute the
 1316  *      SGL across the number of descriptors it spans.
 1317  */
 1318 static void
 1319 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
 1320     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
 1321     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
 1322 {
 1323 
 1324         struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
 1325         struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
 1326         
 1327         if (__predict_true(ndesc == 1)) {
 1328                 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1329                         V_WR_SGLSFLT(flits)) | wr_hi,
 1330                     htonl(V_WR_LEN(flits + sgl_flits) |
 1331                         V_WR_GEN(txqs->gen)) | wr_lo);
 1332                 /* XXX gen? */
 1333                 wr_gen2(txd, txqs->gen);
 1334                 
 1335         } else {
 1336                 unsigned int ogen = txqs->gen;
 1337                 const uint64_t *fp = (const uint64_t *)sgl;
 1338                 struct work_request_hdr *wp = wrp;
 1339                 
 1340                 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
 1341                     V_WR_SGLSFLT(flits)) | wr_hi;
 1342                 
 1343                 while (sgl_flits) {
 1344                         unsigned int avail = WR_FLITS - flits;
 1345 
 1346                         if (avail > sgl_flits)
 1347                                 avail = sgl_flits;
 1348                         memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
 1349                         sgl_flits -= avail;
 1350                         ndesc--;
 1351                         if (!sgl_flits)
 1352                                 break;
 1353                         
 1354                         fp += avail;
 1355                         txd++;
 1356                         txsd++;
 1357                         if (++txqs->pidx == txq->size) {
 1358                                 txqs->pidx = 0;
 1359                                 txqs->gen ^= 1;
 1360                                 txd = txq->desc;
 1361                                 txsd = txq->sdesc;
 1362                         }
 1363 
 1364                         /*
 1365                          * when the head of the mbuf chain
 1366                          * is freed all clusters will be freed
 1367                          * with it
 1368                          */
 1369                         wrp = (struct work_request_hdr *)txd;
 1370                         wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
 1371                             V_WR_SGLSFLT(1)) | wr_hi;
 1372                         wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
 1373                                     sgl_flits + 1)) |
 1374                             V_WR_GEN(txqs->gen)) | wr_lo;
 1375                         wr_gen2(txd, txqs->gen);
 1376                         flits = 1;
 1377                 }
 1378                 wrp->wrh_hi |= htonl(F_WR_EOP);
 1379                 wmb();
 1380                 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
 1381                 wr_gen2((struct tx_desc *)wp, ogen);
 1382         }
 1383 }
 1384 
 1385 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
 1386 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
 1387 
 1388 #define GET_VTAG(cntrl, m) \
 1389 do { \
 1390         if ((m)->m_flags & M_VLANTAG)                                               \
 1391                 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
 1392 } while (0)
 1393 
 1394 static int
 1395 t3_encap(struct sge_qset *qs, struct mbuf **m)
 1396 {
 1397         adapter_t *sc;
 1398         struct mbuf *m0;
 1399         struct sge_txq *txq;
 1400         struct txq_state txqs;
 1401         struct port_info *pi;
 1402         unsigned int ndesc, flits, cntrl, mlen;
 1403         int err, nsegs, tso_info = 0;
 1404 
 1405         struct work_request_hdr *wrp;
 1406         struct tx_sw_desc *txsd;
 1407         struct sg_ent *sgp, *sgl;
 1408         uint32_t wr_hi, wr_lo, sgl_flits; 
 1409         bus_dma_segment_t segs[TX_MAX_SEGS];
 1410 
 1411         struct tx_desc *txd;
 1412                 
 1413         pi = qs->port;
 1414         sc = pi->adapter;
 1415         txq = &qs->txq[TXQ_ETH];
 1416         txd = &txq->desc[txq->pidx];
 1417         txsd = &txq->sdesc[txq->pidx];
 1418         sgl = txq->txq_sgl;
 1419 
 1420         prefetch(txd);
 1421         m0 = *m;
 1422 
 1423         mtx_assert(&qs->lock, MA_OWNED);
 1424         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1425         KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
 1426         
 1427         if  (m0->m_nextpkt == NULL && m0->m_next != NULL &&
 1428             m0->m_pkthdr.csum_flags & (CSUM_TSO))
 1429                 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
 1430 
 1431         if (m0->m_nextpkt != NULL) {
 1432                 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
 1433                 ndesc = 1;
 1434                 mlen = 0;
 1435         } else {
 1436                 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
 1437                     &m0, segs, &nsegs))) {
 1438                         if (cxgb_debug)
 1439                                 printf("failed ... err=%d\n", err);
 1440                         return (err);
 1441                 }
 1442                 mlen = m0->m_pkthdr.len;
 1443                 ndesc = calc_tx_descs(m0, nsegs);
 1444         }
 1445         txq_prod(txq, ndesc, &txqs);
 1446 
 1447         KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
 1448         txsd->m = m0;
 1449 
 1450         if (m0->m_nextpkt != NULL) {
 1451                 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
 1452                 int i, fidx;
 1453 
 1454                 if (nsegs > 7)
 1455                         panic("trying to coalesce %d packets in to one WR", nsegs);
 1456                 txq->txq_coalesced += nsegs;
 1457                 wrp = (struct work_request_hdr *)txd;
 1458                 flits = nsegs*2 + 1;
 1459 
 1460                 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
 1461                         struct cpl_tx_pkt_batch_entry *cbe;
 1462                         uint64_t flit;
 1463                         uint32_t *hflit = (uint32_t *)&flit;
 1464                         int cflags = m0->m_pkthdr.csum_flags;
 1465 
 1466                         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1467                         GET_VTAG(cntrl, m0);
 1468                         cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1469                         if (__predict_false(!(cflags & CSUM_IP)))
 1470                                 cntrl |= F_TXPKT_IPCSUM_DIS;
 1471                         if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP))))
 1472                                 cntrl |= F_TXPKT_L4CSUM_DIS;
 1473 
 1474                         hflit[0] = htonl(cntrl);
 1475                         hflit[1] = htonl(segs[i].ds_len | 0x80000000);
 1476                         flit |= htobe64(1 << 24);
 1477                         cbe = &cpl_batch->pkt_entry[i];
 1478                         cbe->cntrl = hflit[0];
 1479                         cbe->len = hflit[1];
 1480                         cbe->addr = htobe64(segs[i].ds_addr);
 1481                 }
 1482 
 1483                 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1484                     V_WR_SGLSFLT(flits)) |
 1485                     htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1486                 wr_lo = htonl(V_WR_LEN(flits) |
 1487                     V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
 1488                 set_wr_hdr(wrp, wr_hi, wr_lo);
 1489                 wmb();
 1490                 ETHER_BPF_MTAP(pi->ifp, m0);
 1491                 wr_gen2(txd, txqs.gen);
 1492                 check_ring_tx_db(sc, txq, 0);
 1493                 return (0);             
 1494         } else if (tso_info) {
 1495                 int eth_type;
 1496                 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
 1497                 struct ether_header *eh;
 1498                 struct ip *ip;
 1499                 struct tcphdr *tcp;
 1500 
 1501                 txd->flit[2] = 0;
 1502                 GET_VTAG(cntrl, m0);
 1503                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
 1504                 hdr->cntrl = htonl(cntrl);
 1505                 hdr->len = htonl(mlen | 0x80000000);
 1506 
 1507                 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
 1508                         printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
 1509                             m0, mlen, m0->m_pkthdr.tso_segsz,
 1510                             m0->m_pkthdr.csum_flags, m0->m_flags);
 1511                         panic("tx tso packet too small");
 1512                 }
 1513 
 1514                 /* Make sure that ether, ip, tcp headers are all in m0 */
 1515                 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
 1516                         m0 = m_pullup(m0, TCPPKTHDRSIZE);
 1517                         if (__predict_false(m0 == NULL)) {
 1518                                 /* XXX panic probably an overreaction */
 1519                                 panic("couldn't fit header into mbuf");
 1520                         }
 1521                 }
 1522 
 1523                 eh = mtod(m0, struct ether_header *);
 1524                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 1525                         eth_type = CPL_ETH_II_VLAN;
 1526                         ip = (struct ip *)((struct ether_vlan_header *)eh + 1);
 1527                 } else {
 1528                         eth_type = CPL_ETH_II;
 1529                         ip = (struct ip *)(eh + 1);
 1530                 }
 1531                 tcp = (struct tcphdr *)(ip + 1);
 1532 
 1533                 tso_info |= V_LSO_ETH_TYPE(eth_type) |
 1534                             V_LSO_IPHDR_WORDS(ip->ip_hl) |
 1535                             V_LSO_TCPHDR_WORDS(tcp->th_off);
 1536                 hdr->lso_info = htonl(tso_info);
 1537 
 1538                 if (__predict_false(mlen <= PIO_LEN)) {
 1539                         /*
 1540                          * pkt not undersized but fits in PIO_LEN
 1541                          * Indicates a TSO bug at the higher levels.
 1542                          */
 1543                         txsd->m = NULL;
 1544                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
 1545                         flits = (mlen + 7) / 8 + 3;
 1546                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1547                                           V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1548                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1549                         wr_lo = htonl(V_WR_LEN(flits) |
 1550                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1551                         set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
 1552                         wmb();
 1553                         ETHER_BPF_MTAP(pi->ifp, m0);
 1554                         wr_gen2(txd, txqs.gen);
 1555                         check_ring_tx_db(sc, txq, 0);
 1556                         m_freem(m0);
 1557                         return (0);
 1558                 }
 1559                 flits = 3;      
 1560         } else {
 1561                 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
 1562                 
 1563                 GET_VTAG(cntrl, m0);
 1564                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1565                 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
 1566                         cntrl |= F_TXPKT_IPCSUM_DIS;
 1567                 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
 1568                         cntrl |= F_TXPKT_L4CSUM_DIS;
 1569                 cpl->cntrl = htonl(cntrl);
 1570                 cpl->len = htonl(mlen | 0x80000000);
 1571 
 1572                 if (mlen <= PIO_LEN) {
 1573                         txsd->m = NULL;
 1574                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
 1575                         flits = (mlen + 7) / 8 + 2;
 1576                         
 1577                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1578                             V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1579                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1580                         wr_lo = htonl(V_WR_LEN(flits) |
 1581                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1582                         set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
 1583                         wmb();
 1584                         ETHER_BPF_MTAP(pi->ifp, m0);
 1585                         wr_gen2(txd, txqs.gen);
 1586                         check_ring_tx_db(sc, txq, 0);
 1587                         m_freem(m0);
 1588                         return (0);
 1589                 }
 1590                 flits = 2;
 1591         }
 1592         wrp = (struct work_request_hdr *)txd;
 1593         sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
 1594         make_sgl(sgp, segs, nsegs);
 1595 
 1596         sgl_flits = sgl_len(nsegs);
 1597 
 1598         ETHER_BPF_MTAP(pi->ifp, m0);
 1599 
 1600         KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
 1601         wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1602         wr_lo = htonl(V_WR_TID(txq->token));
 1603         write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
 1604             sgl_flits, wr_hi, wr_lo);
 1605         check_ring_tx_db(sc, txq, 0);
 1606 
 1607         return (0);
 1608 }
 1609 
 1610 void
 1611 cxgb_tx_watchdog(void *arg)
 1612 {
 1613         struct sge_qset *qs = arg;
 1614         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1615 
 1616         if (qs->coalescing != 0 &&
 1617             (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
 1618             TXQ_RING_EMPTY(qs))
 1619                 qs->coalescing = 0; 
 1620         else if (qs->coalescing == 0 &&
 1621             (txq->in_use >= cxgb_tx_coalesce_enable_start))
 1622                 qs->coalescing = 1;
 1623         if (TXQ_TRYLOCK(qs)) {
 1624                 qs->qs_flags |= QS_FLUSHING;
 1625                 cxgb_start_locked(qs);
 1626                 qs->qs_flags &= ~QS_FLUSHING;
 1627                 TXQ_UNLOCK(qs);
 1628         }
 1629         if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
 1630                 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
 1631                     qs, txq->txq_watchdog.c_cpu);
 1632 }
 1633 
 1634 static void
 1635 cxgb_tx_timeout(void *arg)
 1636 {
 1637         struct sge_qset *qs = arg;
 1638         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1639 
 1640         if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
 1641                 qs->coalescing = 1;     
 1642         if (TXQ_TRYLOCK(qs)) {
 1643                 qs->qs_flags |= QS_TIMEOUT;
 1644                 cxgb_start_locked(qs);
 1645                 qs->qs_flags &= ~QS_TIMEOUT;
 1646                 TXQ_UNLOCK(qs);
 1647         }
 1648 }
 1649 
 1650 static void
 1651 cxgb_start_locked(struct sge_qset *qs)
 1652 {
 1653         struct mbuf *m_head = NULL;
 1654         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1655         struct port_info *pi = qs->port;
 1656         struct ifnet *ifp = pi->ifp;
 1657 
 1658         if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
 1659                 reclaim_completed_tx(qs, 0, TXQ_ETH);
 1660 
 1661         if (!pi->link_config.link_ok) {
 1662                 TXQ_RING_FLUSH(qs);
 1663                 return;
 1664         }
 1665         TXQ_LOCK_ASSERT(qs);
 1666         while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
 1667             pi->link_config.link_ok) {
 1668                 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1669 
 1670                 if (txq->size - txq->in_use <= TX_MAX_DESC)
 1671                         break;
 1672 
 1673                 if ((m_head = cxgb_dequeue(qs)) == NULL)
 1674                         break;
 1675                 /*
 1676                  *  Encapsulation can modify our pointer, and or make it
 1677                  *  NULL on failure.  In that event, we can't requeue.
 1678                  */
 1679                 if (t3_encap(qs, &m_head) || m_head == NULL)
 1680                         break;
 1681 
 1682                 m_head = NULL;
 1683         }
 1684 
 1685         if (txq->db_pending)
 1686                 check_ring_tx_db(pi->adapter, txq, 1);
 1687 
 1688         if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
 1689             pi->link_config.link_ok)
 1690                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1691                     qs, txq->txq_timer.c_cpu);
 1692         if (m_head != NULL)
 1693                 m_freem(m_head);
 1694 }
 1695 
 1696 static int
 1697 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
 1698 {
 1699         struct port_info *pi = qs->port;
 1700         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1701         struct buf_ring *br = txq->txq_mr;
 1702         int error, avail;
 1703 
 1704         avail = txq->size - txq->in_use;
 1705         TXQ_LOCK_ASSERT(qs);
 1706 
 1707         /*
 1708          * We can only do a direct transmit if the following are true:
 1709          * - we aren't coalescing (ring < 3/4 full)
 1710          * - the link is up -- checked in caller
 1711          * - there are no packets enqueued already
 1712          * - there is space in hardware transmit queue 
 1713          */
 1714         if (check_pkt_coalesce(qs) == 0 &&
 1715             !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
 1716                 if (t3_encap(qs, &m)) {
 1717                         if (m != NULL &&
 1718                             (error = drbr_enqueue(ifp, br, m)) != 0) 
 1719                                 return (error);
 1720                 } else {
 1721                         if (txq->db_pending)
 1722                                 check_ring_tx_db(pi->adapter, txq, 1);
 1723 
 1724                         /*
 1725                          * We've bypassed the buf ring so we need to update
 1726                          * the stats directly
 1727                          */
 1728                         txq->txq_direct_packets++;
 1729                         txq->txq_direct_bytes += m->m_pkthdr.len;
 1730                 }
 1731         } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
 1732                 return (error);
 1733 
 1734         reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1735         if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
 1736             (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
 1737                 cxgb_start_locked(qs);
 1738         else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
 1739                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1740                     qs, txq->txq_timer.c_cpu);
 1741         return (0);
 1742 }
 1743 
 1744 int
 1745 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
 1746 {
 1747         struct sge_qset *qs;
 1748         struct port_info *pi = ifp->if_softc;
 1749         int error, qidx = pi->first_qset;
 1750 
 1751         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
 1752             ||(!pi->link_config.link_ok)) {
 1753                 m_freem(m);
 1754                 return (0);
 1755         }
 1756         
 1757         if (m->m_flags & M_FLOWID)
 1758                 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
 1759 
 1760         qs = &pi->adapter->sge.qs[qidx];
 1761         
 1762         if (TXQ_TRYLOCK(qs)) {
 1763                 /* XXX running */
 1764                 error = cxgb_transmit_locked(ifp, qs, m);
 1765                 TXQ_UNLOCK(qs);
 1766         } else
 1767                 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
 1768         return (error);
 1769 }
 1770 void
 1771 cxgb_start(struct ifnet *ifp)
 1772 {
 1773         struct port_info *pi = ifp->if_softc;
 1774         struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset];
 1775         
 1776         if (!pi->link_config.link_ok)
 1777                 return;
 1778 
 1779         TXQ_LOCK(qs);
 1780         cxgb_start_locked(qs);
 1781         TXQ_UNLOCK(qs);
 1782 }
 1783 
 1784 void
 1785 cxgb_qflush(struct ifnet *ifp)
 1786 {
 1787         /*
 1788          * flush any enqueued mbufs in the buf_rings
 1789          * and in the transmit queues
 1790          * no-op for now
 1791          */
 1792         return;
 1793 }
 1794 
 1795 /**
 1796  *      write_imm - write a packet into a Tx descriptor as immediate data
 1797  *      @d: the Tx descriptor to write
 1798  *      @m: the packet
 1799  *      @len: the length of packet data to write as immediate data
 1800  *      @gen: the generation bit value to write
 1801  *
 1802  *      Writes a packet as immediate data into a Tx descriptor.  The packet
 1803  *      contains a work request at its beginning.  We must write the packet
 1804  *      carefully so the SGE doesn't read accidentally before it's written in
 1805  *      its entirety.
 1806  */
 1807 static __inline void
 1808 write_imm(struct tx_desc *d, struct mbuf *m,
 1809           unsigned int len, unsigned int gen)
 1810 {
 1811         struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
 1812         struct work_request_hdr *to = (struct work_request_hdr *)d;
 1813         uint32_t wr_hi, wr_lo;
 1814 
 1815         if (len > WR_LEN)
 1816                 panic("len too big %d\n", len);
 1817         if (len < sizeof(*from))
 1818                 panic("len too small %d", len);
 1819         
 1820         memcpy(&to[1], &from[1], len - sizeof(*from));
 1821         wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
 1822                                         V_WR_BCNTLFLT(len & 7));
 1823         wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) |
 1824                                         V_WR_LEN((len + 7) / 8));
 1825         set_wr_hdr(to, wr_hi, wr_lo);
 1826         wmb();
 1827         wr_gen2(d, gen);
 1828 
 1829         /*
 1830          * This check is a hack we should really fix the logic so
 1831          * that this can't happen
 1832          */
 1833         if (m->m_type != MT_DONTFREE)
 1834                 m_freem(m);
 1835         
 1836 }
 1837 
 1838 /**
 1839  *      check_desc_avail - check descriptor availability on a send queue
 1840  *      @adap: the adapter
 1841  *      @q: the TX queue
 1842  *      @m: the packet needing the descriptors
 1843  *      @ndesc: the number of Tx descriptors needed
 1844  *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
 1845  *
 1846  *      Checks if the requested number of Tx descriptors is available on an
 1847  *      SGE send queue.  If the queue is already suspended or not enough
 1848  *      descriptors are available the packet is queued for later transmission.
 1849  *      Must be called with the Tx queue locked.
 1850  *
 1851  *      Returns 0 if enough descriptors are available, 1 if there aren't
 1852  *      enough descriptors and the packet has been queued, and 2 if the caller
 1853  *      needs to retry because there weren't enough descriptors at the
 1854  *      beginning of the call but some freed up in the mean time.
 1855  */
 1856 static __inline int
 1857 check_desc_avail(adapter_t *adap, struct sge_txq *q,
 1858                  struct mbuf *m, unsigned int ndesc,
 1859                  unsigned int qid)
 1860 {
 1861         /* 
 1862          * XXX We currently only use this for checking the control queue
 1863          * the control queue is only used for binding qsets which happens
 1864          * at init time so we are guaranteed enough descriptors
 1865          */
 1866         if (__predict_false(!mbufq_empty(&q->sendq))) {
 1867 addq_exit:      mbufq_tail(&q->sendq, m);
 1868                 return 1;
 1869         }
 1870         if (__predict_false(q->size - q->in_use < ndesc)) {
 1871 
 1872                 struct sge_qset *qs = txq_to_qset(q, qid);
 1873 
 1874                 setbit(&qs->txq_stopped, qid);
 1875                 if (should_restart_tx(q) &&
 1876                     test_and_clear_bit(qid, &qs->txq_stopped))
 1877                         return 2;
 1878 
 1879                 q->stops++;
 1880                 goto addq_exit;
 1881         }
 1882         return 0;
 1883 }
 1884 
 1885 
 1886 /**
 1887  *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
 1888  *      @q: the SGE control Tx queue
 1889  *
 1890  *      This is a variant of reclaim_completed_tx() that is used for Tx queues
 1891  *      that send only immediate data (presently just the control queues) and
 1892  *      thus do not have any mbufs
 1893  */
 1894 static __inline void
 1895 reclaim_completed_tx_imm(struct sge_txq *q)
 1896 {
 1897         unsigned int reclaim = q->processed - q->cleaned;
 1898 
 1899         q->in_use -= reclaim;
 1900         q->cleaned += reclaim;
 1901 }
 1902 
 1903 static __inline int
 1904 immediate(const struct mbuf *m)
 1905 {
 1906         return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
 1907 }
 1908 
 1909 /**
 1910  *      ctrl_xmit - send a packet through an SGE control Tx queue
 1911  *      @adap: the adapter
 1912  *      @q: the control queue
 1913  *      @m: the packet
 1914  *
 1915  *      Send a packet through an SGE control Tx queue.  Packets sent through
 1916  *      a control queue must fit entirely as immediate data in a single Tx
 1917  *      descriptor and have no page fragments.
 1918  */
 1919 static int
 1920 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 1921 {
 1922         int ret;
 1923         struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
 1924         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1925         
 1926         if (__predict_false(!immediate(m))) {
 1927                 m_freem(m);
 1928                 return 0;
 1929         }
 1930         
 1931         wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
 1932         wrp->wrh_lo = htonl(V_WR_TID(q->token));
 1933 
 1934         TXQ_LOCK(qs);
 1935 again:  reclaim_completed_tx_imm(q);
 1936 
 1937         ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
 1938         if (__predict_false(ret)) {
 1939                 if (ret == 1) {
 1940                         TXQ_UNLOCK(qs);
 1941                         return (ENOSPC);
 1942                 }
 1943                 goto again;
 1944         }
 1945         write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
 1946         
 1947         q->in_use++;
 1948         if (++q->pidx >= q->size) {
 1949                 q->pidx = 0;
 1950                 q->gen ^= 1;
 1951         }
 1952         TXQ_UNLOCK(qs);
 1953         wmb();
 1954         t3_write_reg(adap, A_SG_KDOORBELL,
 1955                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1956         return (0);
 1957 }
 1958 
 1959 
 1960 /**
 1961  *      restart_ctrlq - restart a suspended control queue
 1962  *      @qs: the queue set cotaining the control queue
 1963  *
 1964  *      Resumes transmission on a suspended Tx control queue.
 1965  */
 1966 static void
 1967 restart_ctrlq(void *data, int npending)
 1968 {
 1969         struct mbuf *m;
 1970         struct sge_qset *qs = (struct sge_qset *)data;
 1971         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1972         adapter_t *adap = qs->port->adapter;
 1973 
 1974         TXQ_LOCK(qs);
 1975 again:  reclaim_completed_tx_imm(q);
 1976 
 1977         while (q->in_use < q->size &&
 1978                (m = mbufq_dequeue(&q->sendq)) != NULL) {
 1979 
 1980                 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
 1981 
 1982                 if (++q->pidx >= q->size) {
 1983                         q->pidx = 0;
 1984                         q->gen ^= 1;
 1985                 }
 1986                 q->in_use++;
 1987         }
 1988         if (!mbufq_empty(&q->sendq)) {
 1989                 setbit(&qs->txq_stopped, TXQ_CTRL);
 1990 
 1991                 if (should_restart_tx(q) &&
 1992                     test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
 1993                         goto again;
 1994                 q->stops++;
 1995         }
 1996         TXQ_UNLOCK(qs);
 1997         t3_write_reg(adap, A_SG_KDOORBELL,
 1998                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1999 }
 2000 
 2001 
 2002 /*
 2003  * Send a management message through control queue 0
 2004  */
 2005 int
 2006 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
 2007 {
 2008         return ctrl_xmit(adap, &adap->sge.qs[0], m);
 2009 }
 2010 
 2011 /**
 2012  *      free_qset - free the resources of an SGE queue set
 2013  *      @sc: the controller owning the queue set
 2014  *      @q: the queue set
 2015  *
 2016  *      Release the HW and SW resources associated with an SGE queue set, such
 2017  *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 2018  *      queue set must be quiesced prior to calling this.
 2019  */
 2020 static void
 2021 t3_free_qset(adapter_t *sc, struct sge_qset *q)
 2022 {
 2023         int i;
 2024         
 2025         reclaim_completed_tx(q, 0, TXQ_ETH);
 2026         if (q->txq[TXQ_ETH].txq_mr != NULL) 
 2027                 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
 2028         if (q->txq[TXQ_ETH].txq_ifq != NULL) {
 2029                 ifq_delete(q->txq[TXQ_ETH].txq_ifq);
 2030                 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
 2031         }
 2032 
 2033         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2034                 if (q->fl[i].desc) {
 2035                         mtx_lock_spin(&sc->sge.reg_lock);
 2036                         t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
 2037                         mtx_unlock_spin(&sc->sge.reg_lock);
 2038                         bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
 2039                         bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
 2040                                         q->fl[i].desc_map);
 2041                         bus_dma_tag_destroy(q->fl[i].desc_tag);
 2042                         bus_dma_tag_destroy(q->fl[i].entry_tag);
 2043                 }
 2044                 if (q->fl[i].sdesc) {
 2045                         free_rx_bufs(sc, &q->fl[i]);
 2046                         free(q->fl[i].sdesc, M_DEVBUF);
 2047                 }
 2048         }
 2049 
 2050         mtx_unlock(&q->lock);
 2051         MTX_DESTROY(&q->lock);
 2052         for (i = 0; i < SGE_TXQ_PER_SET; i++) {
 2053                 if (q->txq[i].desc) {
 2054                         mtx_lock_spin(&sc->sge.reg_lock);
 2055                         t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
 2056                         mtx_unlock_spin(&sc->sge.reg_lock);
 2057                         bus_dmamap_unload(q->txq[i].desc_tag,
 2058                                         q->txq[i].desc_map);
 2059                         bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
 2060                                         q->txq[i].desc_map);
 2061                         bus_dma_tag_destroy(q->txq[i].desc_tag);
 2062                         bus_dma_tag_destroy(q->txq[i].entry_tag);
 2063                 }
 2064                 if (q->txq[i].sdesc) {
 2065                         free(q->txq[i].sdesc, M_DEVBUF);
 2066                 }
 2067         }
 2068 
 2069         if (q->rspq.desc) {
 2070                 mtx_lock_spin(&sc->sge.reg_lock);
 2071                 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
 2072                 mtx_unlock_spin(&sc->sge.reg_lock);
 2073                 
 2074                 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
 2075                 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
 2076                                 q->rspq.desc_map);
 2077                 bus_dma_tag_destroy(q->rspq.desc_tag);
 2078                 MTX_DESTROY(&q->rspq.lock);
 2079         }
 2080 
 2081 #ifdef INET
 2082         tcp_lro_free(&q->lro.ctrl);
 2083 #endif
 2084 
 2085         bzero(q, sizeof(*q));
 2086 }
 2087 
 2088 /**
 2089  *      t3_free_sge_resources - free SGE resources
 2090  *      @sc: the adapter softc
 2091  *
 2092  *      Frees resources used by the SGE queue sets.
 2093  */
 2094 void
 2095 t3_free_sge_resources(adapter_t *sc)
 2096 {
 2097         int i, nqsets;
 2098         
 2099         for (nqsets = i = 0; i < (sc)->params.nports; i++) 
 2100                 nqsets += sc->port[i].nqsets;
 2101 
 2102         for (i = 0; i < nqsets; ++i) {
 2103                 TXQ_LOCK(&sc->sge.qs[i]);
 2104                 t3_free_qset(sc, &sc->sge.qs[i]);
 2105         }
 2106         
 2107 }
 2108 
 2109 /**
 2110  *      t3_sge_start - enable SGE
 2111  *      @sc: the controller softc
 2112  *
 2113  *      Enables the SGE for DMAs.  This is the last step in starting packet
 2114  *      transfers.
 2115  */
 2116 void
 2117 t3_sge_start(adapter_t *sc)
 2118 {
 2119         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
 2120 }
 2121 
 2122 /**
 2123  *      t3_sge_stop - disable SGE operation
 2124  *      @sc: the adapter
 2125  *
 2126  *      Disables the DMA engine.  This can be called in emeregencies (e.g.,
 2127  *      from error interrupts) or from normal process context.  In the latter
 2128  *      case it also disables any pending queue restart tasklets.  Note that
 2129  *      if it is called in interrupt context it cannot disable the restart
 2130  *      tasklets as it cannot wait, however the tasklets will have no effect
 2131  *      since the doorbells are disabled and the driver will call this again
 2132  *      later from process context, at which time the tasklets will be stopped
 2133  *      if they are still running.
 2134  */
 2135 void
 2136 t3_sge_stop(adapter_t *sc)
 2137 {
 2138         int i, nqsets;
 2139         
 2140         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
 2141 
 2142         if (sc->tq == NULL)
 2143                 return;
 2144         
 2145         for (nqsets = i = 0; i < (sc)->params.nports; i++) 
 2146                 nqsets += sc->port[i].nqsets;
 2147 #ifdef notyet
 2148         /*
 2149          * 
 2150          * XXX
 2151          */
 2152         for (i = 0; i < nqsets; ++i) {
 2153                 struct sge_qset *qs = &sc->sge.qs[i];
 2154                 
 2155                 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2156                 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2157         }
 2158 #endif
 2159 }
 2160 
 2161 /**
 2162  *      t3_free_tx_desc - reclaims Tx descriptors and their buffers
 2163  *      @adapter: the adapter
 2164  *      @q: the Tx queue to reclaim descriptors from
 2165  *      @reclaimable: the number of descriptors to reclaim
 2166  *      @m_vec_size: maximum number of buffers to reclaim
 2167  *      @desc_reclaimed: returns the number of descriptors reclaimed
 2168  *
 2169  *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 2170  *      Tx buffers.  Called with the Tx queue lock held.
 2171  *
 2172  *      Returns number of buffers of reclaimed   
 2173  */
 2174 void
 2175 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
 2176 {
 2177         struct tx_sw_desc *txsd;
 2178         unsigned int cidx, mask;
 2179         struct sge_txq *q = &qs->txq[queue];
 2180 
 2181 #ifdef T3_TRACE
 2182         T3_TRACE2(sc->tb[q->cntxt_id & 7],
 2183                   "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
 2184 #endif
 2185         cidx = q->cidx;
 2186         mask = q->size - 1;
 2187         txsd = &q->sdesc[cidx];
 2188 
 2189         mtx_assert(&qs->lock, MA_OWNED);
 2190         while (reclaimable--) {
 2191                 prefetch(q->sdesc[(cidx + 1) & mask].m);
 2192                 prefetch(q->sdesc[(cidx + 2) & mask].m);
 2193 
 2194                 if (txsd->m != NULL) {
 2195                         if (txsd->flags & TX_SW_DESC_MAPPED) {
 2196                                 bus_dmamap_unload(q->entry_tag, txsd->map);
 2197                                 txsd->flags &= ~TX_SW_DESC_MAPPED;
 2198                         }
 2199                         m_freem_list(txsd->m);
 2200                         txsd->m = NULL;
 2201                 } else
 2202                         q->txq_skipped++;
 2203                 
 2204                 ++txsd;
 2205                 if (++cidx == q->size) {
 2206                         cidx = 0;
 2207                         txsd = q->sdesc;
 2208                 }
 2209         }
 2210         q->cidx = cidx;
 2211 
 2212 }
 2213 
 2214 /**
 2215  *      is_new_response - check if a response is newly written
 2216  *      @r: the response descriptor
 2217  *      @q: the response queue
 2218  *
 2219  *      Returns true if a response descriptor contains a yet unprocessed
 2220  *      response.
 2221  */
 2222 static __inline int
 2223 is_new_response(const struct rsp_desc *r,
 2224     const struct sge_rspq *q)
 2225 {
 2226         return (r->intr_gen & F_RSPD_GEN2) == q->gen;
 2227 }
 2228 
 2229 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
 2230 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
 2231                         V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
 2232                         V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
 2233                         V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
 2234 
 2235 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
 2236 #define NOMEM_INTR_DELAY 2500
 2237 
 2238 /**
 2239  *      write_ofld_wr - write an offload work request
 2240  *      @adap: the adapter
 2241  *      @m: the packet to send
 2242  *      @q: the Tx queue
 2243  *      @pidx: index of the first Tx descriptor to write
 2244  *      @gen: the generation value to use
 2245  *      @ndesc: number of descriptors the packet will occupy
 2246  *
 2247  *      Write an offload work request to send the supplied packet.  The packet
 2248  *      data already carry the work request with most fields populated.
 2249  */
 2250 static void
 2251 write_ofld_wr(adapter_t *adap, struct mbuf *m,
 2252     struct sge_txq *q, unsigned int pidx,
 2253     unsigned int gen, unsigned int ndesc,
 2254     bus_dma_segment_t *segs, unsigned int nsegs)
 2255 {
 2256         unsigned int sgl_flits, flits;
 2257         struct work_request_hdr *from;
 2258         struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
 2259         struct tx_desc *d = &q->desc[pidx];
 2260         struct txq_state txqs;
 2261         
 2262         if (immediate(m) && nsegs == 0) {
 2263                 write_imm(d, m, m->m_len, gen);
 2264                 return;
 2265         }
 2266 
 2267         /* Only TX_DATA builds SGLs */
 2268         from = mtod(m, struct work_request_hdr *);
 2269         memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
 2270 
 2271         flits = m->m_len / 8;
 2272         sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
 2273 
 2274         make_sgl(sgp, segs, nsegs);
 2275         sgl_flits = sgl_len(nsegs);
 2276 
 2277         txqs.gen = gen;
 2278         txqs.pidx = pidx;
 2279         txqs.compl = 0;
 2280 
 2281         write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
 2282             from->wrh_hi, from->wrh_lo);
 2283 }
 2284 
 2285 /**
 2286  *      calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
 2287  *      @m: the packet
 2288  *
 2289  *      Returns the number of Tx descriptors needed for the given offload
 2290  *      packet.  These packets are already fully constructed.
 2291  */
 2292 static __inline unsigned int
 2293 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
 2294 {
 2295         unsigned int flits, cnt = 0;
 2296         int ndescs;
 2297 
 2298         if (m->m_len <= WR_LEN && nsegs == 0)
 2299                 return (1);                 /* packet fits as immediate data */
 2300 
 2301         /*
 2302          * This needs to be re-visited for TOE
 2303          */
 2304 
 2305         cnt = nsegs;
 2306                 
 2307         /* headers */
 2308         flits = m->m_len / 8;
 2309 
 2310         ndescs = flits_to_desc(flits + sgl_len(cnt));
 2311 
 2312         return (ndescs);
 2313 }
 2314 
 2315 /**
 2316  *      ofld_xmit - send a packet through an offload queue
 2317  *      @adap: the adapter
 2318  *      @q: the Tx offload queue
 2319  *      @m: the packet
 2320  *
 2321  *      Send an offload packet through an SGE offload queue.
 2322  */
 2323 static int
 2324 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 2325 {
 2326         int ret, nsegs;
 2327         unsigned int ndesc;
 2328         unsigned int pidx, gen;
 2329         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2330         bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
 2331         struct tx_sw_desc *stx;
 2332 
 2333         nsegs = m_get_sgllen(m);
 2334         vsegs = m_get_sgl(m);
 2335         ndesc = calc_tx_descs_ofld(m, nsegs);
 2336         busdma_map_sgl(vsegs, segs, nsegs);
 2337 
 2338         stx = &q->sdesc[q->pidx];
 2339         
 2340         TXQ_LOCK(qs);
 2341 again:  reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2342         ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
 2343         if (__predict_false(ret)) {
 2344                 if (ret == 1) {
 2345                         printf("no ofld desc avail\n");
 2346                         
 2347                         m_set_priority(m, ndesc);     /* save for restart */
 2348                         TXQ_UNLOCK(qs);
 2349                         return (EINTR);
 2350                 }
 2351                 goto again;
 2352         }
 2353 
 2354         gen = q->gen;
 2355         q->in_use += ndesc;
 2356         pidx = q->pidx;
 2357         q->pidx += ndesc;
 2358         if (q->pidx >= q->size) {
 2359                 q->pidx -= q->size;
 2360                 q->gen ^= 1;
 2361         }
 2362 #ifdef T3_TRACE
 2363         T3_TRACE5(adap->tb[q->cntxt_id & 7],
 2364                   "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
 2365                   ndesc, pidx, skb->len, skb->len - skb->data_len,
 2366                   skb_shinfo(skb)->nr_frags);
 2367 #endif
 2368         TXQ_UNLOCK(qs);
 2369 
 2370         write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
 2371         check_ring_tx_db(adap, q, 1);
 2372         return (0);
 2373 }
 2374 
 2375 /**
 2376  *      restart_offloadq - restart a suspended offload queue
 2377  *      @qs: the queue set cotaining the offload queue
 2378  *
 2379  *      Resumes transmission on a suspended Tx offload queue.
 2380  */
 2381 static void
 2382 restart_offloadq(void *data, int npending)
 2383 {
 2384         struct mbuf *m;
 2385         struct sge_qset *qs = data;
 2386         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2387         adapter_t *adap = qs->port->adapter;
 2388         bus_dma_segment_t segs[TX_MAX_SEGS];
 2389         struct tx_sw_desc *stx = &q->sdesc[q->pidx];
 2390         int nsegs, cleaned;
 2391                 
 2392         TXQ_LOCK(qs);
 2393 again:  cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2394 
 2395         while ((m = mbufq_peek(&q->sendq)) != NULL) {
 2396                 unsigned int gen, pidx;
 2397                 unsigned int ndesc = m_get_priority(m);
 2398 
 2399                 if (__predict_false(q->size - q->in_use < ndesc)) {
 2400                         setbit(&qs->txq_stopped, TXQ_OFLD);
 2401                         if (should_restart_tx(q) &&
 2402                             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
 2403                                 goto again;
 2404                         q->stops++;
 2405                         break;
 2406                 }
 2407 
 2408                 gen = q->gen;
 2409                 q->in_use += ndesc;
 2410                 pidx = q->pidx;
 2411                 q->pidx += ndesc;
 2412                 if (q->pidx >= q->size) {
 2413                         q->pidx -= q->size;
 2414                         q->gen ^= 1;
 2415                 }
 2416                 
 2417                 (void)mbufq_dequeue(&q->sendq);
 2418                 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
 2419                 TXQ_UNLOCK(qs);
 2420                 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
 2421                 TXQ_LOCK(qs);
 2422         }
 2423 #if USE_GTS
 2424         set_bit(TXQ_RUNNING, &q->flags);
 2425         set_bit(TXQ_LAST_PKT_DB, &q->flags);
 2426 #endif
 2427         TXQ_UNLOCK(qs);
 2428         wmb();
 2429         t3_write_reg(adap, A_SG_KDOORBELL,
 2430                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 2431 }
 2432 
 2433 /**
 2434  *      queue_set - return the queue set a packet should use
 2435  *      @m: the packet
 2436  *
 2437  *      Maps a packet to the SGE queue set it should use.  The desired queue
 2438  *      set is carried in bits 1-3 in the packet's priority.
 2439  */
 2440 static __inline int
 2441 queue_set(const struct mbuf *m)
 2442 {
 2443         return m_get_priority(m) >> 1;
 2444 }
 2445 
 2446 /**
 2447  *      is_ctrl_pkt - return whether an offload packet is a control packet
 2448  *      @m: the packet
 2449  *
 2450  *      Determines whether an offload packet should use an OFLD or a CTRL
 2451  *      Tx queue.  This is indicated by bit 0 in the packet's priority.
 2452  */
 2453 static __inline int
 2454 is_ctrl_pkt(const struct mbuf *m)
 2455 {
 2456         return m_get_priority(m) & 1;
 2457 }
 2458 
 2459 /**
 2460  *      t3_offload_tx - send an offload packet
 2461  *      @tdev: the offload device to send to
 2462  *      @m: the packet
 2463  *
 2464  *      Sends an offload packet.  We use the packet priority to select the
 2465  *      appropriate Tx queue as follows: bit 0 indicates whether the packet
 2466  *      should be sent as regular or control, bits 1-3 select the queue set.
 2467  */
 2468 int
 2469 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
 2470 {
 2471         adapter_t *adap = tdev2adap(tdev);
 2472         struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
 2473 
 2474         if (__predict_false(is_ctrl_pkt(m))) 
 2475                 return ctrl_xmit(adap, qs, m);
 2476 
 2477         return ofld_xmit(adap, qs, m);
 2478 }
 2479 
 2480 /**
 2481  *      deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
 2482  *      @tdev: the offload device that will be receiving the packets
 2483  *      @q: the SGE response queue that assembled the bundle
 2484  *      @m: the partial bundle
 2485  *      @n: the number of packets in the bundle
 2486  *
 2487  *      Delivers a (partial) bundle of Rx offload packets to an offload device.
 2488  */
 2489 static __inline void
 2490 deliver_partial_bundle(struct t3cdev *tdev,
 2491                         struct sge_rspq *q,
 2492                         struct mbuf *mbufs[], int n)
 2493 {
 2494         if (n) {
 2495                 q->offload_bundles++;
 2496                 cxgb_ofld_recv(tdev, mbufs, n);
 2497         }
 2498 }
 2499 
 2500 static __inline int
 2501 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
 2502     struct mbuf *m, struct mbuf *rx_gather[],
 2503     unsigned int gather_idx)
 2504 {
 2505         
 2506         rq->offload_pkts++;
 2507         m->m_pkthdr.header = mtod(m, void *);
 2508         rx_gather[gather_idx++] = m;
 2509         if (gather_idx == RX_BUNDLE_SIZE) {
 2510                 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
 2511                 gather_idx = 0;
 2512                 rq->offload_bundles++;
 2513         }
 2514         return (gather_idx);
 2515 }
 2516 
 2517 static void
 2518 restart_tx(struct sge_qset *qs)
 2519 {
 2520         struct adapter *sc = qs->port->adapter;
 2521         
 2522         
 2523         if (isset(&qs->txq_stopped, TXQ_OFLD) &&
 2524             should_restart_tx(&qs->txq[TXQ_OFLD]) &&
 2525             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
 2526                 qs->txq[TXQ_OFLD].restarts++;
 2527                 DPRINTF("restarting TXQ_OFLD\n");
 2528                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2529         }
 2530         DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
 2531             qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
 2532             qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
 2533             qs->txq[TXQ_CTRL].in_use);
 2534         
 2535         if (isset(&qs->txq_stopped, TXQ_CTRL) &&
 2536             should_restart_tx(&qs->txq[TXQ_CTRL]) &&
 2537             test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
 2538                 qs->txq[TXQ_CTRL].restarts++;
 2539                 DPRINTF("restarting TXQ_CTRL\n");
 2540                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2541         }
 2542 }
 2543 
 2544 /**
 2545  *      t3_sge_alloc_qset - initialize an SGE queue set
 2546  *      @sc: the controller softc
 2547  *      @id: the queue set id
 2548  *      @nports: how many Ethernet ports will be using this queue set
 2549  *      @irq_vec_idx: the IRQ vector index for response queue interrupts
 2550  *      @p: configuration parameters for this queue set
 2551  *      @ntxq: number of Tx queues for the queue set
 2552  *      @pi: port info for queue set
 2553  *
 2554  *      Allocate resources and initialize an SGE queue set.  A queue set
 2555  *      comprises a response queue, two Rx free-buffer queues, and up to 3
 2556  *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
 2557  *      queue, offload queue, and control queue.
 2558  */
 2559 int
 2560 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
 2561                   const struct qset_params *p, int ntxq, struct port_info *pi)
 2562 {
 2563         struct sge_qset *q = &sc->sge.qs[id];
 2564         int i, ret = 0;
 2565 
 2566         MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
 2567         q->port = pi;
 2568 
 2569         if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
 2570             M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
 2571                 device_printf(sc->dev, "failed to allocate mbuf ring\n");
 2572                 goto err;
 2573         }
 2574         if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
 2575             M_NOWAIT | M_ZERO)) == NULL) {
 2576                 device_printf(sc->dev, "failed to allocate ifq\n");
 2577                 goto err;
 2578         }
 2579         ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);     
 2580         callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
 2581         callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
 2582         q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
 2583         q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
 2584 
 2585         init_qset_cntxt(q, id);
 2586         q->idx = id;
 2587         if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
 2588                     sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
 2589                     &q->fl[0].desc, &q->fl[0].sdesc,
 2590                     &q->fl[0].desc_tag, &q->fl[0].desc_map,
 2591                     sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
 2592                 printf("error %d from alloc ring fl0\n", ret);
 2593                 goto err;
 2594         }
 2595 
 2596         if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
 2597                     sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
 2598                     &q->fl[1].desc, &q->fl[1].sdesc,
 2599                     &q->fl[1].desc_tag, &q->fl[1].desc_map,
 2600                     sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
 2601                 printf("error %d from alloc ring fl1\n", ret);
 2602                 goto err;
 2603         }
 2604 
 2605         if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
 2606                     &q->rspq.phys_addr, &q->rspq.desc, NULL,
 2607                     &q->rspq.desc_tag, &q->rspq.desc_map,
 2608                     NULL, NULL)) != 0) {
 2609                 printf("error %d from alloc ring rspq\n", ret);
 2610                 goto err;
 2611         }
 2612 
 2613         for (i = 0; i < ntxq; ++i) {
 2614                 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
 2615 
 2616                 if ((ret = alloc_ring(sc, p->txq_size[i],
 2617                             sizeof(struct tx_desc), sz,
 2618                             &q->txq[i].phys_addr, &q->txq[i].desc,
 2619                             &q->txq[i].sdesc, &q->txq[i].desc_tag,
 2620                             &q->txq[i].desc_map,
 2621                             sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
 2622                         printf("error %d from alloc ring tx %i\n", ret, i);
 2623                         goto err;
 2624                 }
 2625                 mbufq_init(&q->txq[i].sendq);
 2626                 q->txq[i].gen = 1;
 2627                 q->txq[i].size = p->txq_size[i];
 2628         }
 2629         
 2630         TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
 2631         TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
 2632         TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2633         TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2634 
 2635         q->fl[0].gen = q->fl[1].gen = 1;
 2636         q->fl[0].size = p->fl_size;
 2637         q->fl[1].size = p->jumbo_size;
 2638 
 2639         q->rspq.gen = 1;
 2640         q->rspq.cidx = 0;
 2641         q->rspq.size = p->rspq_size;
 2642 
 2643         q->txq[TXQ_ETH].stop_thres = nports *
 2644             flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
 2645 
 2646         q->fl[0].buf_size = MCLBYTES;
 2647         q->fl[0].zone = zone_pack;
 2648         q->fl[0].type = EXT_PACKET;
 2649 
 2650         if (p->jumbo_buf_size ==  MJUM16BYTES) {
 2651                 q->fl[1].zone = zone_jumbo16;
 2652                 q->fl[1].type = EXT_JUMBO16;
 2653         } else if (p->jumbo_buf_size ==  MJUM9BYTES) {
 2654                 q->fl[1].zone = zone_jumbo9;
 2655                 q->fl[1].type = EXT_JUMBO9;             
 2656         } else if (p->jumbo_buf_size ==  MJUMPAGESIZE) {
 2657                 q->fl[1].zone = zone_jumbop;
 2658                 q->fl[1].type = EXT_JUMBOP;
 2659         } else {
 2660                 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
 2661                 ret = EDOOFUS;
 2662                 goto err;
 2663         }
 2664         q->fl[1].buf_size = p->jumbo_buf_size;
 2665 
 2666         /* Allocate and setup the lro_ctrl structure */
 2667         q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
 2668 #ifdef INET
 2669         ret = tcp_lro_init(&q->lro.ctrl);
 2670         if (ret) {
 2671                 printf("error %d from tcp_lro_init\n", ret);
 2672                 goto err;
 2673         }
 2674 #endif
 2675         q->lro.ctrl.ifp = pi->ifp;
 2676 
 2677         mtx_lock_spin(&sc->sge.reg_lock);
 2678         ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
 2679                                    q->rspq.phys_addr, q->rspq.size,
 2680                                    q->fl[0].buf_size, 1, 0);
 2681         if (ret) {
 2682                 printf("error %d from t3_sge_init_rspcntxt\n", ret);
 2683                 goto err_unlock;
 2684         }
 2685 
 2686         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2687                 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
 2688                                           q->fl[i].phys_addr, q->fl[i].size,
 2689                                           q->fl[i].buf_size, p->cong_thres, 1,
 2690                                           0);
 2691                 if (ret) {
 2692                         printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
 2693                         goto err_unlock;
 2694                 }
 2695         }
 2696 
 2697         ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
 2698                                  SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
 2699                                  q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
 2700                                  1, 0);
 2701         if (ret) {
 2702                 printf("error %d from t3_sge_init_ecntxt\n", ret);
 2703                 goto err_unlock;
 2704         }
 2705 
 2706         if (ntxq > 1) {
 2707                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
 2708                                          USE_GTS, SGE_CNTXT_OFLD, id,
 2709                                          q->txq[TXQ_OFLD].phys_addr,
 2710                                          q->txq[TXQ_OFLD].size, 0, 1, 0);
 2711                 if (ret) {
 2712                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2713                         goto err_unlock;
 2714                 }
 2715         }
 2716 
 2717         if (ntxq > 2) {
 2718                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
 2719                                          SGE_CNTXT_CTRL, id,
 2720                                          q->txq[TXQ_CTRL].phys_addr,
 2721                                          q->txq[TXQ_CTRL].size,
 2722                                          q->txq[TXQ_CTRL].token, 1, 0);
 2723                 if (ret) {
 2724                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2725                         goto err_unlock;
 2726                 }
 2727         }
 2728         
 2729         snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
 2730             device_get_unit(sc->dev), irq_vec_idx);
 2731         MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
 2732         
 2733         mtx_unlock_spin(&sc->sge.reg_lock);
 2734         t3_update_qset_coalesce(q, p);
 2735         q->port = pi;
 2736         
 2737         refill_fl(sc, &q->fl[0], q->fl[0].size);
 2738         refill_fl(sc, &q->fl[1], q->fl[1].size);
 2739         refill_rspq(sc, &q->rspq, q->rspq.size - 1);
 2740 
 2741         t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
 2742                      V_NEWTIMER(q->rspq.holdoff_tmr));
 2743 
 2744         return (0);
 2745 
 2746 err_unlock:
 2747         mtx_unlock_spin(&sc->sge.reg_lock);
 2748 err:    
 2749         TXQ_LOCK(q);
 2750         t3_free_qset(sc, q);
 2751 
 2752         return (ret);
 2753 }
 2754 
 2755 /*
 2756  * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
 2757  * ethernet data.  Hardware assistance with various checksums and any vlan tag
 2758  * will also be taken into account here.
 2759  */
 2760 void
 2761 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
 2762 {
 2763         struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
 2764         struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
 2765         struct ifnet *ifp = pi->ifp;
 2766         
 2767         DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
 2768 
 2769         if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
 2770             cpl->csum_valid && cpl->csum == 0xffff) {
 2771                 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
 2772                 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
 2773                 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
 2774                 m->m_pkthdr.csum_data = 0xffff;
 2775         }
 2776 
 2777         if (cpl->vlan_valid) {
 2778                 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
 2779                 m->m_flags |= M_VLANTAG;
 2780         } 
 2781 
 2782         m->m_pkthdr.rcvif = ifp;
 2783         m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
 2784         /*
 2785          * adjust after conversion to mbuf chain
 2786          */
 2787         m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
 2788         m->m_len -= (sizeof(*cpl) + ethpad);
 2789         m->m_data += (sizeof(*cpl) + ethpad);
 2790 }
 2791 
 2792 /**
 2793  *      get_packet - return the next ingress packet buffer from a free list
 2794  *      @adap: the adapter that received the packet
 2795  *      @drop_thres: # of remaining buffers before we start dropping packets
 2796  *      @qs: the qset that the SGE free list holding the packet belongs to
 2797  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
 2798  *      @r: response descriptor 
 2799  *
 2800  *      Get the next packet from a free list and complete setup of the
 2801  *      sk_buff.  If the packet is small we make a copy and recycle the
 2802  *      original buffer, otherwise we use the original buffer itself.  If a
 2803  *      positive drop threshold is supplied packets are dropped and their
 2804  *      buffers recycled if (a) the number of remaining buffers is under the
 2805  *      threshold and the packet is too big to copy, or (b) the packet should
 2806  *      be copied but there is no memory for the copy.
 2807  */
 2808 static int
 2809 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
 2810     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
 2811 {
 2812 
 2813         unsigned int len_cq =  ntohl(r->len_cq);
 2814         struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
 2815         int mask, cidx = fl->cidx;
 2816         struct rx_sw_desc *sd = &fl->sdesc[cidx];
 2817         uint32_t len = G_RSPD_LEN(len_cq);
 2818         uint32_t flags = M_EXT;
 2819         uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
 2820         caddr_t cl;
 2821         struct mbuf *m;
 2822         int ret = 0;
 2823 
 2824         mask = fl->size - 1;
 2825         prefetch(fl->sdesc[(cidx + 1) & mask].m);
 2826         prefetch(fl->sdesc[(cidx + 2) & mask].m);
 2827         prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
 2828         prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 
 2829 
 2830         fl->credits--;
 2831         bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
 2832         
 2833         if (recycle_enable && len <= SGE_RX_COPY_THRES &&
 2834             sopeop == RSPQ_SOP_EOP) {
 2835                 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
 2836                         goto skip_recycle;
 2837                 cl = mtod(m, void *);
 2838                 memcpy(cl, sd->rxsd_cl, len);
 2839                 recycle_rx_buf(adap, fl, fl->cidx);
 2840                 m->m_pkthdr.len = m->m_len = len;
 2841                 m->m_flags = 0;
 2842                 mh->mh_head = mh->mh_tail = m;
 2843                 ret = 1;
 2844                 goto done;
 2845         } else {
 2846         skip_recycle:
 2847                 bus_dmamap_unload(fl->entry_tag, sd->map);
 2848                 cl = sd->rxsd_cl;
 2849                 m = sd->m;
 2850 
 2851                 if ((sopeop == RSPQ_SOP_EOP) ||
 2852                     (sopeop == RSPQ_SOP))
 2853                         flags |= M_PKTHDR;
 2854                 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags);
 2855                 if (fl->zone == zone_pack) {
 2856                         /*
 2857                          * restore clobbered data pointer
 2858                          */
 2859                         m->m_data = m->m_ext.ext_buf;
 2860                 } else {
 2861                         m_cljset(m, cl, fl->type);
 2862                 }
 2863                 m->m_len = len;
 2864         }               
 2865         switch(sopeop) {
 2866         case RSPQ_SOP_EOP:
 2867                 ret = 1;
 2868                 /* FALLTHROUGH */
 2869         case RSPQ_SOP:
 2870                 mh->mh_head = mh->mh_tail = m;
 2871                 m->m_pkthdr.len = len;
 2872                 break;
 2873         case RSPQ_EOP:
 2874                 ret = 1;
 2875                 /* FALLTHROUGH */
 2876         case RSPQ_NSOP_NEOP:
 2877                 if (mh->mh_tail == NULL) {
 2878                         log(LOG_ERR, "discarding intermediate descriptor entry\n");
 2879                         m_freem(m);
 2880                         break;
 2881                 }
 2882                 mh->mh_tail->m_next = m;
 2883                 mh->mh_tail = m;
 2884                 mh->mh_head->m_pkthdr.len += len;
 2885                 break;
 2886         }
 2887         if (cxgb_debug)
 2888                 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
 2889 done:
 2890         if (++fl->cidx == fl->size)
 2891                 fl->cidx = 0;
 2892 
 2893         return (ret);
 2894 }
 2895 
 2896 /**
 2897  *      handle_rsp_cntrl_info - handles control information in a response
 2898  *      @qs: the queue set corresponding to the response
 2899  *      @flags: the response control flags
 2900  *
 2901  *      Handles the control information of an SGE response, such as GTS
 2902  *      indications and completion credits for the queue set's Tx queues.
 2903  *      HW coalesces credits, we don't do any extra SW coalescing.
 2904  */
 2905 static __inline void
 2906 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
 2907 {
 2908         unsigned int credits;
 2909 
 2910 #if USE_GTS
 2911         if (flags & F_RSPD_TXQ0_GTS)
 2912                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
 2913 #endif
 2914         credits = G_RSPD_TXQ0_CR(flags);
 2915         if (credits) 
 2916                 qs->txq[TXQ_ETH].processed += credits;
 2917 
 2918         credits = G_RSPD_TXQ2_CR(flags);
 2919         if (credits)
 2920                 qs->txq[TXQ_CTRL].processed += credits;
 2921 
 2922 # if USE_GTS
 2923         if (flags & F_RSPD_TXQ1_GTS)
 2924                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
 2925 # endif
 2926         credits = G_RSPD_TXQ1_CR(flags);
 2927         if (credits)
 2928                 qs->txq[TXQ_OFLD].processed += credits;
 2929 
 2930 }
 2931 
 2932 static void
 2933 check_ring_db(adapter_t *adap, struct sge_qset *qs,
 2934     unsigned int sleeping)
 2935 {
 2936         ;
 2937 }
 2938 
 2939 /**
 2940  *      process_responses - process responses from an SGE response queue
 2941  *      @adap: the adapter
 2942  *      @qs: the queue set to which the response queue belongs
 2943  *      @budget: how many responses can be processed in this round
 2944  *
 2945  *      Process responses from an SGE response queue up to the supplied budget.
 2946  *      Responses include received packets as well as credits and other events
 2947  *      for the queues that belong to the response queue's queue set.
 2948  *      A negative budget is effectively unlimited.
 2949  *
 2950  *      Additionally choose the interrupt holdoff time for the next interrupt
 2951  *      on this queue.  If the system is under memory shortage use a fairly
 2952  *      long delay to help recovery.
 2953  */
 2954 static int
 2955 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
 2956 {
 2957         struct sge_rspq *rspq = &qs->rspq;
 2958         struct rsp_desc *r = &rspq->desc[rspq->cidx];
 2959         int budget_left = budget;
 2960         unsigned int sleeping = 0;
 2961         int lro_enabled = qs->lro.enabled;
 2962         int skip_lro;
 2963         struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
 2964         struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
 2965         int ngathered = 0;
 2966         struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
 2967 #ifdef DEBUG    
 2968         static int last_holdoff = 0;
 2969         if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
 2970                 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
 2971                 last_holdoff = rspq->holdoff_tmr;
 2972         }
 2973 #endif
 2974         rspq->next_holdoff = rspq->holdoff_tmr;
 2975 
 2976         while (__predict_true(budget_left && is_new_response(r, rspq))) {
 2977                 int eth, eop = 0, ethpad = 0;
 2978                 uint32_t flags = ntohl(r->flags);
 2979                 uint32_t rss_csum = *(const uint32_t *)r;
 2980                 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
 2981                 
 2982                 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
 2983                 
 2984                 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
 2985                         struct mbuf *m;
 2986 
 2987                         if (cxgb_debug)
 2988                                 printf("async notification\n");
 2989 
 2990                         if (mh->mh_head == NULL) {
 2991                                 mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
 2992                                 m = mh->mh_head;
 2993                         } else {
 2994                                 m = m_gethdr(M_DONTWAIT, MT_DATA);
 2995                         }
 2996                         if (m == NULL)
 2997                                 goto no_mem;
 2998 
 2999                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
 3000                         m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
 3001                         *mtod(m, char *) = CPL_ASYNC_NOTIF;
 3002                         rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
 3003                         eop = 1;
 3004                         rspq->async_notif++;
 3005                         goto skip;
 3006                 } else if  (flags & F_RSPD_IMM_DATA_VALID) {
 3007                         struct mbuf *m = NULL;
 3008 
 3009                         DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
 3010                             r->rss_hdr.opcode, rspq->cidx);
 3011                         if (mh->mh_head == NULL)
 3012                                 mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
 3013                         else 
 3014                                 m = m_gethdr(M_DONTWAIT, MT_DATA);
 3015 
 3016                         if (mh->mh_head == NULL &&  m == NULL) {        
 3017                 no_mem:
 3018                                 rspq->next_holdoff = NOMEM_INTR_DELAY;
 3019                                 budget_left--;
 3020                                 break;
 3021                         }
 3022                         get_imm_packet(adap, r, mh->mh_head);
 3023                         eop = 1;
 3024                         rspq->imm_data++;
 3025                 } else if (r->len_cq) {
 3026                         int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
 3027                         
 3028                         eop = get_packet(adap, drop_thresh, qs, mh, r);
 3029                         if (eop) {
 3030                                 if (r->rss_hdr.hash_type && !adap->timestamp)
 3031                                         mh->mh_head->m_flags |= M_FLOWID;
 3032                                 mh->mh_head->m_pkthdr.flowid = rss_hash;
 3033                         }
 3034                         
 3035                         ethpad = 2;
 3036                 } else {
 3037                         rspq->pure_rsps++;
 3038                 }
 3039         skip:
 3040                 if (flags & RSPD_CTRL_MASK) {
 3041                         sleeping |= flags & RSPD_GTS_MASK;
 3042                         handle_rsp_cntrl_info(qs, flags);
 3043                 }
 3044 
 3045                 r++;
 3046                 if (__predict_false(++rspq->cidx == rspq->size)) {
 3047                         rspq->cidx = 0;
 3048                         rspq->gen ^= 1;
 3049                         r = rspq->desc;
 3050                 }
 3051 
 3052                 if (++rspq->credits >= 64) {
 3053                         refill_rspq(adap, rspq, rspq->credits);
 3054                         rspq->credits = 0;
 3055                 }
 3056                 if (!eth && eop) {
 3057                         mh->mh_head->m_pkthdr.csum_data = rss_csum;
 3058                         /*
 3059                          * XXX size mismatch
 3060                          */
 3061                         m_set_priority(mh->mh_head, rss_hash);
 3062 
 3063                         
 3064                         ngathered = rx_offload(&adap->tdev, rspq,
 3065                             mh->mh_head, offload_mbufs, ngathered);
 3066                         mh->mh_head = NULL;
 3067                         DPRINTF("received offload packet\n");
 3068                         
 3069                 } else if (eth && eop) {
 3070                         struct mbuf *m = mh->mh_head;
 3071 
 3072                         t3_rx_eth(adap, rspq, m, ethpad);
 3073 
 3074                         /*
 3075                          * The T304 sends incoming packets on any qset.  If LRO
 3076                          * is also enabled, we could end up sending packet up
 3077                          * lro_ctrl->ifp's input.  That is incorrect.
 3078                          *
 3079                          * The mbuf's rcvif was derived from the cpl header and
 3080                          * is accurate.  Skip LRO and just use that.
 3081                          */
 3082                         skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
 3083 
 3084                         if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
 3085 #ifdef INET
 3086                             && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
 3087 #endif
 3088                             ) {
 3089                                 /* successfully queue'd for LRO */
 3090                         } else {
 3091                                 /*
 3092                                  * LRO not enabled, packet unsuitable for LRO,
 3093                                  * or unable to queue.  Pass it up right now in
 3094                                  * either case.
 3095                                  */
 3096                                 struct ifnet *ifp = m->m_pkthdr.rcvif;
 3097                                 (*ifp->if_input)(ifp, m);
 3098                         }
 3099                         mh->mh_head = NULL;
 3100 
 3101                 }
 3102                 __refill_fl_lt(adap, &qs->fl[0], 32);
 3103                 __refill_fl_lt(adap, &qs->fl[1], 32);
 3104                 --budget_left;
 3105         }
 3106 
 3107         deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
 3108 
 3109 #ifdef INET
 3110         /* Flush LRO */
 3111         while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
 3112                 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
 3113                 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
 3114                 tcp_lro_flush(lro_ctrl, queued);
 3115         }
 3116 #endif
 3117 
 3118         if (sleeping)
 3119                 check_ring_db(adap, qs, sleeping);
 3120 
 3121         mb();  /* commit Tx queue processed updates */
 3122         if (__predict_false(qs->txq_stopped > 1))
 3123                 restart_tx(qs);
 3124 
 3125         __refill_fl_lt(adap, &qs->fl[0], 512);
 3126         __refill_fl_lt(adap, &qs->fl[1], 512);
 3127         budget -= budget_left;
 3128         return (budget);
 3129 }
 3130 
 3131 /*
 3132  * A helper function that processes responses and issues GTS.
 3133  */
 3134 static __inline int
 3135 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
 3136 {
 3137         int work;
 3138         static int last_holdoff = 0;
 3139         
 3140         work = process_responses(adap, rspq_to_qset(rq), -1);
 3141 
 3142         if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
 3143                 printf("next_holdoff=%d\n", rq->next_holdoff);
 3144                 last_holdoff = rq->next_holdoff;
 3145         }
 3146         t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
 3147             V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
 3148         
 3149         return (work);
 3150 }
 3151 
 3152 
 3153 /*
 3154  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
 3155  * Handles data events from SGE response queues as well as error and other
 3156  * async events as they all use the same interrupt pin.  We use one SGE
 3157  * response queue per port in this mode and protect all response queues with
 3158  * queue 0's lock.
 3159  */
 3160 void
 3161 t3b_intr(void *data)
 3162 {
 3163         uint32_t i, map;
 3164         adapter_t *adap = data;
 3165         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3166         
 3167         t3_write_reg(adap, A_PL_CLI, 0);
 3168         map = t3_read_reg(adap, A_SG_DATA_INTR);
 3169 
 3170         if (!map) 
 3171                 return;
 3172 
 3173         if (__predict_false(map & F_ERRINTR)) {
 3174                 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
 3175                 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
 3176                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3177         }
 3178 
 3179         mtx_lock(&q0->lock);
 3180         for_each_port(adap, i)
 3181             if (map & (1 << i))
 3182                         process_responses_gts(adap, &adap->sge.qs[i].rspq);
 3183         mtx_unlock(&q0->lock);
 3184 }
 3185 
 3186 /*
 3187  * The MSI interrupt handler.  This needs to handle data events from SGE
 3188  * response queues as well as error and other async events as they all use
 3189  * the same MSI vector.  We use one SGE response queue per port in this mode
 3190  * and protect all response queues with queue 0's lock.
 3191  */
 3192 void
 3193 t3_intr_msi(void *data)
 3194 {
 3195         adapter_t *adap = data;
 3196         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3197         int i, new_packets = 0;
 3198 
 3199         mtx_lock(&q0->lock);
 3200 
 3201         for_each_port(adap, i)
 3202             if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 
 3203                     new_packets = 1;
 3204         mtx_unlock(&q0->lock);
 3205         if (new_packets == 0) {
 3206                 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
 3207                 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
 3208                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3209         }
 3210 }
 3211 
 3212 void
 3213 t3_intr_msix(void *data)
 3214 {
 3215         struct sge_qset *qs = data;
 3216         adapter_t *adap = qs->port->adapter;
 3217         struct sge_rspq *rspq = &qs->rspq;
 3218 
 3219         if (process_responses_gts(adap, rspq) == 0)
 3220                 rspq->unhandled_irqs++;
 3221 }
 3222 
 3223 #define QDUMP_SBUF_SIZE         32 * 400
 3224 static int
 3225 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
 3226 {
 3227         struct sge_rspq *rspq;
 3228         struct sge_qset *qs;
 3229         int i, err, dump_end, idx;
 3230         static int multiplier = 1;
 3231         struct sbuf *sb;
 3232         struct rsp_desc *rspd;
 3233         uint32_t data[4];
 3234         
 3235         rspq = arg1;
 3236         qs = rspq_to_qset(rspq);
 3237         if (rspq->rspq_dump_count == 0) 
 3238                 return (0);
 3239         if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
 3240                 log(LOG_WARNING,
 3241                     "dump count is too large %d\n", rspq->rspq_dump_count);
 3242                 rspq->rspq_dump_count = 0;
 3243                 return (EINVAL);
 3244         }
 3245         if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
 3246                 log(LOG_WARNING,
 3247                     "dump start of %d is greater than queue size\n",
 3248                     rspq->rspq_dump_start);
 3249                 rspq->rspq_dump_start = 0;
 3250                 return (EINVAL);
 3251         }
 3252         err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
 3253         if (err)
 3254                 return (err);
 3255 retry_sbufops:
 3256         sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
 3257 
 3258         sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
 3259             (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
 3260             ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
 3261         sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
 3262             ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
 3263         
 3264         sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
 3265             (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
 3266         
 3267         dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
 3268         for (i = rspq->rspq_dump_start; i < dump_end; i++) {
 3269                 idx = i & (RSPQ_Q_SIZE-1);
 3270                 
 3271                 rspd = &rspq->desc[idx];
 3272                 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
 3273                     idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
 3274                     rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
 3275                 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
 3276                     rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
 3277                     be32toh(rspd->len_cq), rspd->intr_gen);
 3278         }
 3279         if (sbuf_overflowed(sb)) {
 3280                 sbuf_delete(sb);
 3281                 multiplier++;
 3282                 goto retry_sbufops;
 3283         }
 3284         sbuf_finish(sb);
 3285         err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
 3286         sbuf_delete(sb);
 3287         return (err);
 3288 }       
 3289 
 3290 static int
 3291 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
 3292 {
 3293         struct sge_txq *txq;
 3294         struct sge_qset *qs;
 3295         int i, j, err, dump_end;
 3296         static int multiplier = 1;
 3297         struct sbuf *sb;
 3298         struct tx_desc *txd;
 3299         uint32_t *WR, wr_hi, wr_lo, gen;
 3300         uint32_t data[4];
 3301         
 3302         txq = arg1;
 3303         qs = txq_to_qset(txq, TXQ_ETH);
 3304         if (txq->txq_dump_count == 0) {
 3305                 return (0);
 3306         }
 3307         if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
 3308                 log(LOG_WARNING,
 3309                     "dump count is too large %d\n", txq->txq_dump_count);
 3310                 txq->txq_dump_count = 1;
 3311                 return (EINVAL);
 3312         }
 3313         if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
 3314                 log(LOG_WARNING,
 3315                     "dump start of %d is greater than queue size\n",
 3316                     txq->txq_dump_start);
 3317                 txq->txq_dump_start = 0;
 3318                 return (EINVAL);
 3319         }
 3320         err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
 3321         if (err)
 3322                 return (err);
 3323         
 3324             
 3325 retry_sbufops:
 3326         sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
 3327 
 3328         sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
 3329             (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 
 3330             (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
 3331         sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
 3332             ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
 3333             ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
 3334         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3335             txq->txq_dump_start,
 3336             (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
 3337 
 3338         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3339         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3340                 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
 3341                 WR = (uint32_t *)txd->flit;
 3342                 wr_hi = ntohl(WR[0]);
 3343                 wr_lo = ntohl(WR[1]);           
 3344                 gen = G_WR_GEN(wr_lo);
 3345                 
 3346                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3347                     wr_hi, wr_lo, gen);
 3348                 for (j = 2; j < 30; j += 4) 
 3349                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3350                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3351 
 3352         }
 3353         if (sbuf_overflowed(sb)) {
 3354                 sbuf_delete(sb);
 3355                 multiplier++;
 3356                 goto retry_sbufops;
 3357         }
 3358         sbuf_finish(sb);
 3359         err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
 3360         sbuf_delete(sb);
 3361         return (err);
 3362 }
 3363 
 3364 static int
 3365 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
 3366 {
 3367         struct sge_txq *txq;
 3368         struct sge_qset *qs;
 3369         int i, j, err, dump_end;
 3370         static int multiplier = 1;
 3371         struct sbuf *sb;
 3372         struct tx_desc *txd;
 3373         uint32_t *WR, wr_hi, wr_lo, gen;
 3374         
 3375         txq = arg1;
 3376         qs = txq_to_qset(txq, TXQ_CTRL);
 3377         if (txq->txq_dump_count == 0) {
 3378                 return (0);
 3379         }
 3380         if (txq->txq_dump_count > 256) {
 3381                 log(LOG_WARNING,
 3382                     "dump count is too large %d\n", txq->txq_dump_count);
 3383                 txq->txq_dump_count = 1;
 3384                 return (EINVAL);
 3385         }
 3386         if (txq->txq_dump_start > 255) {
 3387                 log(LOG_WARNING,
 3388                     "dump start of %d is greater than queue size\n",
 3389                     txq->txq_dump_start);
 3390                 txq->txq_dump_start = 0;
 3391                 return (EINVAL);
 3392         }
 3393 
 3394 retry_sbufops:
 3395         sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
 3396         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3397             txq->txq_dump_start,
 3398             (txq->txq_dump_start + txq->txq_dump_count) & 255);
 3399 
 3400         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3401         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3402                 txd = &txq->desc[i & (255)];
 3403                 WR = (uint32_t *)txd->flit;
 3404                 wr_hi = ntohl(WR[0]);
 3405                 wr_lo = ntohl(WR[1]);           
 3406                 gen = G_WR_GEN(wr_lo);
 3407                 
 3408                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3409                     wr_hi, wr_lo, gen);
 3410                 for (j = 2; j < 30; j += 4) 
 3411                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3412                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3413 
 3414         }
 3415         if (sbuf_overflowed(sb)) {
 3416                 sbuf_delete(sb);
 3417                 multiplier++;
 3418                 goto retry_sbufops;
 3419         }
 3420         sbuf_finish(sb);
 3421         err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
 3422         sbuf_delete(sb);
 3423         return (err);
 3424 }
 3425 
 3426 static int
 3427 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
 3428 {
 3429         adapter_t *sc = arg1;
 3430         struct qset_params *qsp = &sc->params.sge.qset[0]; 
 3431         int coalesce_usecs;     
 3432         struct sge_qset *qs;
 3433         int i, j, err, nqsets = 0;
 3434         struct mtx *lock;
 3435 
 3436         if ((sc->flags & FULL_INIT_DONE) == 0)
 3437                 return (ENXIO);
 3438                 
 3439         coalesce_usecs = qsp->coalesce_usecs;
 3440         err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
 3441 
 3442         if (err != 0) {
 3443                 return (err);
 3444         }
 3445         if (coalesce_usecs == qsp->coalesce_usecs)
 3446                 return (0);
 3447 
 3448         for (i = 0; i < sc->params.nports; i++) 
 3449                 for (j = 0; j < sc->port[i].nqsets; j++)
 3450                         nqsets++;
 3451 
 3452         coalesce_usecs = max(1, coalesce_usecs);
 3453 
 3454         for (i = 0; i < nqsets; i++) {
 3455                 qs = &sc->sge.qs[i];
 3456                 qsp = &sc->params.sge.qset[i];
 3457                 qsp->coalesce_usecs = coalesce_usecs;
 3458                 
 3459                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 3460                             &sc->sge.qs[0].rspq.lock;
 3461 
 3462                 mtx_lock(lock);
 3463                 t3_update_qset_coalesce(qs, qsp);
 3464                 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
 3465                     V_NEWTIMER(qs->rspq.holdoff_tmr));
 3466                 mtx_unlock(lock);
 3467         }
 3468 
 3469         return (0);
 3470 }
 3471 
 3472 static int
 3473 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)
 3474 {
 3475         adapter_t *sc = arg1;
 3476         int rc, timestamp;
 3477 
 3478         if ((sc->flags & FULL_INIT_DONE) == 0)
 3479                 return (ENXIO);
 3480 
 3481         timestamp = sc->timestamp;
 3482         rc = sysctl_handle_int(oidp, &timestamp, arg2, req);
 3483 
 3484         if (rc != 0)
 3485                 return (rc);
 3486 
 3487         if (timestamp != sc->timestamp) {
 3488                 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS,
 3489                     timestamp ? F_ENABLERXPKTTMSTPRSS : 0);
 3490                 sc->timestamp = timestamp;
 3491         }
 3492 
 3493         return (0);
 3494 }
 3495 
 3496 void
 3497 t3_add_attach_sysctls(adapter_t *sc)
 3498 {
 3499         struct sysctl_ctx_list *ctx;
 3500         struct sysctl_oid_list *children;
 3501 
 3502         ctx = device_get_sysctl_ctx(sc->dev);
 3503         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3504 
 3505         /* random information */
 3506         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3507             "firmware_version",
 3508             CTLFLAG_RD, &sc->fw_version,
 3509             0, "firmware version");
 3510         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3511             "hw_revision",
 3512             CTLFLAG_RD, &sc->params.rev,
 3513             0, "chip model");
 3514         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3515             "port_types",
 3516             CTLFLAG_RD, &sc->port_types,
 3517             0, "type of ports");
 3518         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3519             "enable_debug",
 3520             CTLFLAG_RW, &cxgb_debug,
 3521             0, "enable verbose debugging output");
 3522         SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce",
 3523             CTLFLAG_RD, &sc->tunq_coalesce,
 3524             "#tunneled packets freed");
 3525         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3526             "txq_overrun",
 3527             CTLFLAG_RD, &txq_fills,
 3528             0, "#times txq overrun");
 3529         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3530             "core_clock",
 3531             CTLFLAG_RD, &sc->params.vpd.cclk,
 3532             0, "core clock frequency (in KHz)");
 3533 }
 3534 
 3535 
 3536 static const char *rspq_name = "rspq";
 3537 static const char *txq_names[] =
 3538 {
 3539         "txq_eth",
 3540         "txq_ofld",
 3541         "txq_ctrl"      
 3542 };
 3543 
 3544 static int
 3545 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
 3546 {
 3547         struct port_info *p = arg1;
 3548         uint64_t *parg;
 3549 
 3550         if (!p)
 3551                 return (EINVAL);
 3552 
 3553         parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
 3554         PORT_LOCK(p);
 3555         t3_mac_update_stats(&p->mac);
 3556         PORT_UNLOCK(p);
 3557 
 3558         return (sysctl_handle_quad(oidp, parg, 0, req));
 3559 }
 3560 
 3561 void
 3562 t3_add_configured_sysctls(adapter_t *sc)
 3563 {
 3564         struct sysctl_ctx_list *ctx;
 3565         struct sysctl_oid_list *children;
 3566         int i, j;
 3567         
 3568         ctx = device_get_sysctl_ctx(sc->dev);
 3569         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3570 
 3571         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3572             "intr_coal",
 3573             CTLTYPE_INT|CTLFLAG_RW, sc,
 3574             0, t3_set_coalesce_usecs,
 3575             "I", "interrupt coalescing timer (us)");
 3576 
 3577         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3578             "pkt_timestamp",
 3579             CTLTYPE_INT | CTLFLAG_RW, sc,
 3580             0, t3_pkt_timestamp,
 3581             "I", "provide packet timestamp instead of connection hash");
 3582 
 3583         for (i = 0; i < sc->params.nports; i++) {
 3584                 struct port_info *pi = &sc->port[i];
 3585                 struct sysctl_oid *poid;
 3586                 struct sysctl_oid_list *poidlist;
 3587                 struct mac_stats *mstats = &pi->mac.stats;
 3588                 
 3589                 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
 3590                 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 
 3591                     pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
 3592                 poidlist = SYSCTL_CHILDREN(poid);
 3593                 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO, 
 3594                     "nqsets", CTLFLAG_RD, &pi->nqsets,
 3595                     0, "#queue sets");
 3596 
 3597                 for (j = 0; j < pi->nqsets; j++) {
 3598                         struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
 3599                         struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
 3600                                           *ctrlqpoid, *lropoid;
 3601                         struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
 3602                                                *txqpoidlist, *ctrlqpoidlist,
 3603                                                *lropoidlist;
 3604                         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 3605                         
 3606                         snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
 3607                         
 3608                         qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 
 3609                             qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
 3610                         qspoidlist = SYSCTL_CHILDREN(qspoid);
 3611 
 3612                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
 3613                                         CTLFLAG_RD, &qs->fl[0].empty, 0,
 3614                                         "freelist #0 empty");
 3615                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
 3616                                         CTLFLAG_RD, &qs->fl[1].empty, 0,
 3617                                         "freelist #1 empty");
 3618 
 3619                         rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3620                             rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
 3621                         rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
 3622 
 3623                         txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3624                             txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
 3625                         txqpoidlist = SYSCTL_CHILDREN(txqpoid);
 3626 
 3627                         ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3628                             txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
 3629                         ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
 3630 
 3631                         lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3632                             "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
 3633                         lropoidlist = SYSCTL_CHILDREN(lropoid);
 3634 
 3635                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
 3636                             CTLFLAG_RD, &qs->rspq.size,
 3637                             0, "#entries in response queue");
 3638                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
 3639                             CTLFLAG_RD, &qs->rspq.cidx,
 3640                             0, "consumer index");
 3641                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
 3642                             CTLFLAG_RD, &qs->rspq.credits,
 3643                             0, "#credits");
 3644                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
 3645                             CTLFLAG_RD, &qs->rspq.starved,
 3646                             0, "#times starved");
 3647                         SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
 3648                             CTLFLAG_RD, &qs->rspq.phys_addr,
 3649                             "physical_address_of the queue");
 3650                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
 3651                             CTLFLAG_RW, &qs->rspq.rspq_dump_start,
 3652                             0, "start rspq dump entry");
 3653                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
 3654                             CTLFLAG_RW, &qs->rspq.rspq_dump_count,
 3655                             0, "#rspq entries to dump");
 3656                         SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
 3657                             CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
 3658                             0, t3_dump_rspq, "A", "dump of the response queue");
 3659 
 3660                         SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
 3661                             CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
 3662                             "#tunneled packets dropped");
 3663                         SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
 3664                             CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
 3665                             0, "#tunneled packets waiting to be sent");
 3666 #if 0                   
 3667                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
 3668                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
 3669                             0, "#tunneled packets queue producer index");
 3670                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
 3671                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
 3672                             0, "#tunneled packets queue consumer index");
 3673 #endif                  
 3674                         SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
 3675                             CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
 3676                             0, "#tunneled packets processed by the card");
 3677                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
 3678                             CTLFLAG_RD, &txq->cleaned,
 3679                             0, "#tunneled packets cleaned");
 3680                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
 3681                             CTLFLAG_RD, &txq->in_use,
 3682                             0, "#tunneled packet slots in use");
 3683                         SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
 3684                             CTLFLAG_RD, &txq->txq_frees,
 3685                             "#tunneled packets freed");
 3686                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
 3687                             CTLFLAG_RD, &txq->txq_skipped,
 3688                             0, "#tunneled packet descriptors skipped");
 3689                         SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
 3690                             CTLFLAG_RD, &txq->txq_coalesced,
 3691                             "#tunneled packets coalesced");
 3692                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
 3693                             CTLFLAG_RD, &txq->txq_enqueued,
 3694                             0, "#tunneled packets enqueued to hardware");
 3695                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
 3696                             CTLFLAG_RD, &qs->txq_stopped,
 3697                             0, "tx queues stopped");
 3698                         SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
 3699                             CTLFLAG_RD, &txq->phys_addr,
 3700                             "physical_address_of the queue");
 3701                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
 3702                             CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
 3703                             0, "txq generation");
 3704                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
 3705                             CTLFLAG_RD, &txq->cidx,
 3706                             0, "hardware queue cidx");                  
 3707                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
 3708                             CTLFLAG_RD, &txq->pidx,
 3709                             0, "hardware queue pidx");
 3710                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
 3711                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
 3712                             0, "txq start idx for dump");
 3713                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
 3714                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
 3715                             0, "txq #entries to dump");                 
 3716                         SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
 3717                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
 3718                             0, t3_dump_txq_eth, "A", "dump of the transmit queue");
 3719 
 3720                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
 3721                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
 3722                             0, "ctrlq start idx for dump");
 3723                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
 3724                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
 3725                             0, "ctrl #entries to dump");                        
 3726                         SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
 3727                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
 3728                             0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
 3729 
 3730                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
 3731                             CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
 3732                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
 3733                             CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
 3734                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
 3735                             CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
 3736                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
 3737                             CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
 3738                 }
 3739 
 3740                 /* Now add a node for mac stats. */
 3741                 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
 3742                     CTLFLAG_RD, NULL, "MAC statistics");
 3743                 poidlist = SYSCTL_CHILDREN(poid);
 3744 
 3745                 /*
 3746                  * We (ab)use the length argument (arg2) to pass on the offset
 3747                  * of the data that we are interested in.  This is only required
 3748                  * for the quad counters that are updated from the hardware (we
 3749                  * make sure that we return the latest value).
 3750                  * sysctl_handle_macstat first updates *all* the counters from
 3751                  * the hardware, and then returns the latest value of the
 3752                  * requested counter.  Best would be to update only the
 3753                  * requested counter from hardware, but t3_mac_update_stats()
 3754                  * hides all the register details and we don't want to dive into
 3755                  * all that here.
 3756                  */
 3757 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
 3758     (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
 3759     sysctl_handle_macstat, "QU", 0)
 3760                 CXGB_SYSCTL_ADD_QUAD(tx_octets);
 3761                 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
 3762                 CXGB_SYSCTL_ADD_QUAD(tx_frames);
 3763                 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
 3764                 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
 3765                 CXGB_SYSCTL_ADD_QUAD(tx_pause);
 3766                 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
 3767                 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
 3768                 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
 3769                 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
 3770                 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
 3771                 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
 3772                 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
 3773                 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
 3774                 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
 3775                 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
 3776                 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
 3777                 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
 3778                 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
 3779                 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
 3780                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
 3781                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
 3782                 CXGB_SYSCTL_ADD_QUAD(rx_octets);
 3783                 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
 3784                 CXGB_SYSCTL_ADD_QUAD(rx_frames);
 3785                 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
 3786                 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
 3787                 CXGB_SYSCTL_ADD_QUAD(rx_pause);
 3788                 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
 3789                 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
 3790                 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
 3791                 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
 3792                 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
 3793                 CXGB_SYSCTL_ADD_QUAD(rx_runt);
 3794                 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
 3795                 CXGB_SYSCTL_ADD_QUAD(rx_short);
 3796                 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
 3797                 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
 3798                 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
 3799                 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
 3800                 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
 3801                 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
 3802                 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
 3803                 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
 3804                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
 3805                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
 3806 #undef CXGB_SYSCTL_ADD_QUAD
 3807 
 3808 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
 3809     CTLFLAG_RD, &mstats->a, 0)
 3810                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
 3811                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
 3812                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
 3813                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
 3814                 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
 3815                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
 3816                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
 3817                 CXGB_SYSCTL_ADD_ULONG(num_toggled);
 3818                 CXGB_SYSCTL_ADD_ULONG(num_resets);
 3819                 CXGB_SYSCTL_ADD_ULONG(link_faults);
 3820 #undef CXGB_SYSCTL_ADD_ULONG
 3821         }
 3822 }
 3823         
 3824 /**
 3825  *      t3_get_desc - dump an SGE descriptor for debugging purposes
 3826  *      @qs: the queue set
 3827  *      @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
 3828  *      @idx: the descriptor index in the queue
 3829  *      @data: where to dump the descriptor contents
 3830  *
 3831  *      Dumps the contents of a HW descriptor of an SGE queue.  Returns the
 3832  *      size of the descriptor.
 3833  */
 3834 int
 3835 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
 3836                 unsigned char *data)
 3837 {
 3838         if (qnum >= 6)
 3839                 return (EINVAL);
 3840 
 3841         if (qnum < 3) {
 3842                 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
 3843                         return -EINVAL;
 3844                 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
 3845                 return sizeof(struct tx_desc);
 3846         }
 3847 
 3848         if (qnum == 3) {
 3849                 if (!qs->rspq.desc || idx >= qs->rspq.size)
 3850                         return (EINVAL);
 3851                 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
 3852                 return sizeof(struct rsp_desc);
 3853         }
 3854 
 3855         qnum -= 4;
 3856         if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
 3857                 return (EINVAL);
 3858         memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
 3859         return sizeof(struct rx_desc);
 3860 }

Cache object: 2b019b07841bff3ed74eba6ee5f8583b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.