The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/cxgb/cxgb_sge.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /**************************************************************************
    2 
    3 Copyright (c) 2007-2009, Chelsio Inc.
    4 All rights reserved.
    5 
    6 Redistribution and use in source and binary forms, with or without
    7 modification, are permitted provided that the following conditions are met:
    8 
    9  1. Redistributions of source code must retain the above copyright notice,
   10     this list of conditions and the following disclaimer.
   11 
   12  2. Neither the name of the Chelsio Corporation nor the names of its
   13     contributors may be used to endorse or promote products derived from
   14     this software without specific prior written permission.
   15  
   16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   26 POSSIBILITY OF SUCH DAMAGE.
   27 
   28 ***************************************************************************/
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD: releng/9.0/sys/dev/cxgb/cxgb_sge.c 219946 2011-03-24 01:16:48Z np $");
   32 
   33 #include "opt_inet.h"
   34 
   35 #include <sys/param.h>
   36 #include <sys/systm.h>
   37 #include <sys/kernel.h>
   38 #include <sys/module.h>
   39 #include <sys/bus.h>
   40 #include <sys/conf.h>
   41 #include <machine/bus.h>
   42 #include <machine/resource.h>
   43 #include <sys/bus_dma.h>
   44 #include <sys/rman.h>
   45 #include <sys/queue.h>
   46 #include <sys/sysctl.h>
   47 #include <sys/taskqueue.h>
   48 
   49 #include <sys/proc.h>
   50 #include <sys/sbuf.h>
   51 #include <sys/sched.h>
   52 #include <sys/smp.h>
   53 #include <sys/systm.h>
   54 #include <sys/syslog.h>
   55 #include <sys/socket.h>
   56 
   57 #include <net/bpf.h>    
   58 #include <net/ethernet.h>
   59 #include <net/if.h>
   60 #include <net/if_vlan_var.h>
   61 
   62 #include <netinet/in_systm.h>
   63 #include <netinet/in.h>
   64 #include <netinet/ip.h>
   65 #include <netinet/tcp.h>
   66 
   67 #include <dev/pci/pcireg.h>
   68 #include <dev/pci/pcivar.h>
   69 
   70 #include <vm/vm.h>
   71 #include <vm/pmap.h>
   72 
   73 #include <cxgb_include.h>
   74 #include <sys/mvec.h>
   75 
   76 int     txq_fills = 0;
   77 int     multiq_tx_enable = 1;
   78 
   79 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
   80 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
   81 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
   82 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
   83     "size of per-queue mbuf ring");
   84 
   85 static int cxgb_tx_coalesce_force = 0;
   86 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force);
   87 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW,
   88     &cxgb_tx_coalesce_force, 0,
   89     "coalesce small packets into a single work request regardless of ring state");
   90 
   91 #define COALESCE_START_DEFAULT          TX_ETH_Q_SIZE>>1
   92 #define COALESCE_START_MAX              (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
   93 #define COALESCE_STOP_DEFAULT           TX_ETH_Q_SIZE>>2
   94 #define COALESCE_STOP_MIN               TX_ETH_Q_SIZE>>5
   95 #define TX_RECLAIM_DEFAULT              TX_ETH_Q_SIZE>>5
   96 #define TX_RECLAIM_MAX                  TX_ETH_Q_SIZE>>2
   97 #define TX_RECLAIM_MIN                  TX_ETH_Q_SIZE>>6
   98 
   99 
  100 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
  101 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start",
  102     &cxgb_tx_coalesce_enable_start);
  103 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW,
  104     &cxgb_tx_coalesce_enable_start, 0,
  105     "coalesce enable threshold");
  106 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
  107 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop);
  108 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW,
  109     &cxgb_tx_coalesce_enable_stop, 0,
  110     "coalesce disable threshold");
  111 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  112 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold);
  113 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW,
  114     &cxgb_tx_reclaim_threshold, 0,
  115     "tx cleaning minimum threshold");
  116 
  117 /*
  118  * XXX don't re-enable this until TOE stops assuming
  119  * we have an m_ext
  120  */
  121 static int recycle_enable = 0;
  122 
  123 extern int cxgb_use_16k_clusters;
  124 extern int nmbjumbop;
  125 extern int nmbjumbo9;
  126 extern int nmbjumbo16;
  127 
  128 #define USE_GTS 0
  129 
  130 #define SGE_RX_SM_BUF_SIZE      1536
  131 #define SGE_RX_DROP_THRES       16
  132 #define SGE_RX_COPY_THRES       128
  133 
  134 /*
  135  * Period of the Tx buffer reclaim timer.  This timer does not need to run
  136  * frequently as Tx buffers are usually reclaimed by new Tx packets.
  137  */
  138 #define TX_RECLAIM_PERIOD       (hz >> 1)
  139 
  140 /* 
  141  * Values for sge_txq.flags
  142  */
  143 enum {
  144         TXQ_RUNNING     = 1 << 0,  /* fetch engine is running */
  145         TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
  146 };
  147 
  148 struct tx_desc {
  149         uint64_t        flit[TX_DESC_FLITS];
  150 } __packed;
  151 
  152 struct rx_desc {
  153         uint32_t        addr_lo;
  154         uint32_t        len_gen;
  155         uint32_t        gen2;
  156         uint32_t        addr_hi;
  157 } __packed;
  158 
  159 struct rsp_desc {               /* response queue descriptor */
  160         struct rss_header       rss_hdr;
  161         uint32_t                flags;
  162         uint32_t                len_cq;
  163         uint8_t                 imm_data[47];
  164         uint8_t                 intr_gen;
  165 } __packed;
  166 
  167 #define RX_SW_DESC_MAP_CREATED  (1 << 0)
  168 #define TX_SW_DESC_MAP_CREATED  (1 << 1)
  169 #define RX_SW_DESC_INUSE        (1 << 3)
  170 #define TX_SW_DESC_MAPPED       (1 << 4)
  171 
  172 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
  173 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
  174 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
  175 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
  176 
  177 struct tx_sw_desc {                /* SW state per Tx descriptor */
  178         struct mbuf     *m;
  179         bus_dmamap_t    map;
  180         int             flags;
  181 };
  182 
  183 struct rx_sw_desc {                /* SW state per Rx descriptor */
  184         caddr_t         rxsd_cl;
  185         struct mbuf     *m;
  186         bus_dmamap_t    map;
  187         int             flags;
  188 };
  189 
  190 struct txq_state {
  191         unsigned int    compl;
  192         unsigned int    gen;
  193         unsigned int    pidx;
  194 };
  195 
  196 struct refill_fl_cb_arg {
  197         int               error;
  198         bus_dma_segment_t seg;
  199         int               nseg;
  200 };
  201 
  202 
  203 /*
  204  * Maps a number of flits to the number of Tx descriptors that can hold them.
  205  * The formula is
  206  *
  207  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
  208  *
  209  * HW allows up to 4 descriptors to be combined into a WR.
  210  */
  211 static uint8_t flit_desc_map[] = {
  212         0,
  213 #if SGE_NUM_GENBITS == 1
  214         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  215         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  216         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  217         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
  218 #elif SGE_NUM_GENBITS == 2
  219         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  220         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  221         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  222         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  223 #else
  224 # error "SGE_NUM_GENBITS must be 1 or 2"
  225 #endif
  226 };
  227 
  228 #define TXQ_LOCK_ASSERT(qs)     mtx_assert(&(qs)->lock, MA_OWNED)
  229 #define TXQ_TRYLOCK(qs)         mtx_trylock(&(qs)->lock)        
  230 #define TXQ_LOCK(qs)            mtx_lock(&(qs)->lock)   
  231 #define TXQ_UNLOCK(qs)          mtx_unlock(&(qs)->lock) 
  232 #define TXQ_RING_EMPTY(qs)      drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  233 #define TXQ_RING_NEEDS_ENQUEUE(qs)                                      \
  234         drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  235 #define TXQ_RING_FLUSH(qs)      drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  236 #define TXQ_RING_DEQUEUE_COND(qs, func, arg)                            \
  237         drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
  238 #define TXQ_RING_DEQUEUE(qs) \
  239         drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  240 
  241 int cxgb_debug = 0;
  242 
  243 static void sge_timer_cb(void *arg);
  244 static void sge_timer_reclaim(void *arg, int ncount);
  245 static void sge_txq_reclaim_handler(void *arg, int ncount);
  246 static void cxgb_start_locked(struct sge_qset *qs);
  247 
  248 /*
  249  * XXX need to cope with bursty scheduling by looking at a wider
  250  * window than we are now for determining the need for coalescing
  251  *
  252  */
  253 static __inline uint64_t
  254 check_pkt_coalesce(struct sge_qset *qs) 
  255 { 
  256         struct adapter *sc; 
  257         struct sge_txq *txq; 
  258         uint8_t *fill;
  259 
  260         if (__predict_false(cxgb_tx_coalesce_force))
  261                 return (1);
  262         txq = &qs->txq[TXQ_ETH]; 
  263         sc = qs->port->adapter; 
  264         fill = &sc->tunq_fill[qs->idx];
  265 
  266         if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
  267                 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
  268         if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
  269                 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
  270         /*
  271          * if the hardware transmit queue is more than 1/8 full
  272          * we mark it as coalescing - we drop back from coalescing
  273          * when we go below 1/32 full and there are no packets enqueued, 
  274          * this provides us with some degree of hysteresis
  275          */
  276         if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
  277             TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
  278                 *fill = 0; 
  279         else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
  280                 *fill = 1; 
  281 
  282         return (sc->tunq_coalesce);
  283 } 
  284 
  285 #ifdef __LP64__
  286 static void
  287 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  288 {
  289         uint64_t wr_hilo;
  290 #if _BYTE_ORDER == _LITTLE_ENDIAN
  291         wr_hilo = wr_hi;
  292         wr_hilo |= (((uint64_t)wr_lo)<<32);
  293 #else
  294         wr_hilo = wr_lo;
  295         wr_hilo |= (((uint64_t)wr_hi)<<32);
  296 #endif  
  297         wrp->wrh_hilo = wr_hilo;
  298 }
  299 #else
  300 static void
  301 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  302 {
  303 
  304         wrp->wrh_hi = wr_hi;
  305         wmb();
  306         wrp->wrh_lo = wr_lo;
  307 }
  308 #endif
  309 
  310 struct coalesce_info {
  311         int count;
  312         int nbytes;
  313 };
  314 
  315 static int
  316 coalesce_check(struct mbuf *m, void *arg)
  317 {
  318         struct coalesce_info *ci = arg;
  319         int *count = &ci->count;
  320         int *nbytes = &ci->nbytes;
  321 
  322         if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
  323                 (*count < 7) && (m->m_next == NULL))) {
  324                 *count += 1;
  325                 *nbytes += m->m_len;
  326                 return (1);
  327         }
  328         return (0);
  329 }
  330 
  331 static struct mbuf *
  332 cxgb_dequeue(struct sge_qset *qs)
  333 {
  334         struct mbuf *m, *m_head, *m_tail;
  335         struct coalesce_info ci;
  336 
  337         
  338         if (check_pkt_coalesce(qs) == 0) 
  339                 return TXQ_RING_DEQUEUE(qs);
  340 
  341         m_head = m_tail = NULL;
  342         ci.count = ci.nbytes = 0;
  343         do {
  344                 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
  345                 if (m_head == NULL) {
  346                         m_tail = m_head = m;
  347                 } else if (m != NULL) {
  348                         m_tail->m_nextpkt = m;
  349                         m_tail = m;
  350                 }
  351         } while (m != NULL);
  352         if (ci.count > 7)
  353                 panic("trying to coalesce %d packets in to one WR", ci.count);
  354         return (m_head);
  355 }
  356         
  357 /**
  358  *      reclaim_completed_tx - reclaims completed Tx descriptors
  359  *      @adapter: the adapter
  360  *      @q: the Tx queue to reclaim completed descriptors from
  361  *
  362  *      Reclaims Tx descriptors that the SGE has indicated it has processed,
  363  *      and frees the associated buffers if possible.  Called with the Tx
  364  *      queue's lock held.
  365  */
  366 static __inline int
  367 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
  368 {
  369         struct sge_txq *q = &qs->txq[queue];
  370         int reclaim = desc_reclaimable(q);
  371 
  372         if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
  373             (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
  374                 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  375 
  376         if (reclaim < reclaim_min)
  377                 return (0);
  378 
  379         mtx_assert(&qs->lock, MA_OWNED);
  380         if (reclaim > 0) {
  381                 t3_free_tx_desc(qs, reclaim, queue);
  382                 q->cleaned += reclaim;
  383                 q->in_use -= reclaim;
  384         }
  385         if (isset(&qs->txq_stopped, TXQ_ETH))
  386                 clrbit(&qs->txq_stopped, TXQ_ETH);
  387 
  388         return (reclaim);
  389 }
  390 
  391 /**
  392  *      should_restart_tx - are there enough resources to restart a Tx queue?
  393  *      @q: the Tx queue
  394  *
  395  *      Checks if there are enough descriptors to restart a suspended Tx queue.
  396  */
  397 static __inline int
  398 should_restart_tx(const struct sge_txq *q)
  399 {
  400         unsigned int r = q->processed - q->cleaned;
  401 
  402         return q->in_use - r < (q->size >> 1);
  403 }
  404 
  405 /**
  406  *      t3_sge_init - initialize SGE
  407  *      @adap: the adapter
  408  *      @p: the SGE parameters
  409  *
  410  *      Performs SGE initialization needed every time after a chip reset.
  411  *      We do not initialize any of the queue sets here, instead the driver
  412  *      top-level must request those individually.  We also do not enable DMA
  413  *      here, that should be done after the queues have been set up.
  414  */
  415 void
  416 t3_sge_init(adapter_t *adap, struct sge_params *p)
  417 {
  418         u_int ctrl, ups;
  419 
  420         ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
  421 
  422         ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
  423                F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
  424                V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
  425                V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
  426 #if SGE_NUM_GENBITS == 1
  427         ctrl |= F_EGRGENCTRL;
  428 #endif
  429         if (adap->params.rev > 0) {
  430                 if (!(adap->flags & (USING_MSIX | USING_MSI)))
  431                         ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
  432         }
  433         t3_write_reg(adap, A_SG_CONTROL, ctrl);
  434         t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
  435                      V_LORCQDRBTHRSH(512));
  436         t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
  437         t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
  438                      V_TIMEOUT(200 * core_ticks_per_usec(adap)));
  439         t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
  440                      adap->params.rev < T3_REV_C ? 1000 : 500);
  441         t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
  442         t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
  443         t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
  444         t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
  445         t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
  446 }
  447 
  448 
  449 /**
  450  *      sgl_len - calculates the size of an SGL of the given capacity
  451  *      @n: the number of SGL entries
  452  *
  453  *      Calculates the number of flits needed for a scatter/gather list that
  454  *      can hold the given number of entries.
  455  */
  456 static __inline unsigned int
  457 sgl_len(unsigned int n)
  458 {
  459         return ((3 * n) / 2 + (n & 1));
  460 }
  461 
  462 /**
  463  *      get_imm_packet - return the next ingress packet buffer from a response
  464  *      @resp: the response descriptor containing the packet data
  465  *
  466  *      Return a packet containing the immediate data of the given response.
  467  */
  468 static int
  469 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
  470 {
  471 
  472         m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
  473         m->m_ext.ext_buf = NULL;
  474         m->m_ext.ext_type = 0;
  475         memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 
  476         return (0);     
  477 }
  478 
  479 static __inline u_int
  480 flits_to_desc(u_int n)
  481 {
  482         return (flit_desc_map[n]);
  483 }
  484 
  485 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
  486                     F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
  487                     V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
  488                     F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
  489                     F_HIRCQPARITYERROR)
  490 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
  491 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
  492                       F_RSPQDISABLED)
  493 
  494 /**
  495  *      t3_sge_err_intr_handler - SGE async event interrupt handler
  496  *      @adapter: the adapter
  497  *
  498  *      Interrupt handler for SGE asynchronous (non-data) events.
  499  */
  500 void
  501 t3_sge_err_intr_handler(adapter_t *adapter)
  502 {
  503         unsigned int v, status;
  504 
  505         status = t3_read_reg(adapter, A_SG_INT_CAUSE);
  506         if (status & SGE_PARERR)
  507                 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
  508                          status & SGE_PARERR);
  509         if (status & SGE_FRAMINGERR)
  510                 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
  511                          status & SGE_FRAMINGERR);
  512         if (status & F_RSPQCREDITOVERFOW)
  513                 CH_ALERT(adapter, "SGE response queue credit overflow\n");
  514 
  515         if (status & F_RSPQDISABLED) {
  516                 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
  517 
  518                 CH_ALERT(adapter,
  519                          "packet delivered to disabled response queue (0x%x)\n",
  520                          (v >> S_RSPQ0DISABLED) & 0xff);
  521         }
  522 
  523         t3_write_reg(adapter, A_SG_INT_CAUSE, status);
  524         if (status & SGE_FATALERR)
  525                 t3_fatal_err(adapter);
  526 }
  527 
  528 void
  529 t3_sge_prep(adapter_t *adap, struct sge_params *p)
  530 {
  531         int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
  532 
  533         nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
  534         nqsets *= adap->params.nports;
  535 
  536         fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
  537 
  538         while (!powerof2(fl_q_size))
  539                 fl_q_size--;
  540 
  541         use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
  542             is_offload(adap);
  543 
  544 #if __FreeBSD_version >= 700111
  545         if (use_16k) {
  546                 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
  547                 jumbo_buf_size = MJUM16BYTES;
  548         } else {
  549                 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
  550                 jumbo_buf_size = MJUM9BYTES;
  551         }
  552 #else
  553         jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE);
  554         jumbo_buf_size = MJUMPAGESIZE;
  555 #endif
  556         while (!powerof2(jumbo_q_size))
  557                 jumbo_q_size--;
  558 
  559         if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
  560                 device_printf(adap->dev,
  561                     "Insufficient clusters and/or jumbo buffers.\n");
  562 
  563         p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
  564 
  565         for (i = 0; i < SGE_QSETS; ++i) {
  566                 struct qset_params *q = p->qset + i;
  567 
  568                 if (adap->params.nports > 2) {
  569                         q->coalesce_usecs = 50;
  570                 } else {
  571 #ifdef INVARIANTS                       
  572                         q->coalesce_usecs = 10;
  573 #else
  574                         q->coalesce_usecs = 5;
  575 #endif                  
  576                 }
  577                 q->polling = 0;
  578                 q->rspq_size = RSPQ_Q_SIZE;
  579                 q->fl_size = fl_q_size;
  580                 q->jumbo_size = jumbo_q_size;
  581                 q->jumbo_buf_size = jumbo_buf_size;
  582                 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
  583                 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
  584                 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
  585                 q->cong_thres = 0;
  586         }
  587 }
  588 
  589 int
  590 t3_sge_alloc(adapter_t *sc)
  591 {
  592 
  593         /* The parent tag. */
  594         if (bus_dma_tag_create( NULL,                   /* parent */
  595                                 1, 0,                   /* algnmnt, boundary */
  596                                 BUS_SPACE_MAXADDR,      /* lowaddr */
  597                                 BUS_SPACE_MAXADDR,      /* highaddr */
  598                                 NULL, NULL,             /* filter, filterarg */
  599                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
  600                                 BUS_SPACE_UNRESTRICTED, /* nsegments */
  601                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
  602                                 0,                      /* flags */
  603                                 NULL, NULL,             /* lock, lockarg */
  604                                 &sc->parent_dmat)) {
  605                 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
  606                 return (ENOMEM);
  607         }
  608 
  609         /*
  610          * DMA tag for normal sized RX frames
  611          */
  612         if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
  613                 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
  614                 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
  615                 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
  616                 return (ENOMEM);
  617         }
  618 
  619         /* 
  620          * DMA tag for jumbo sized RX frames.
  621          */
  622         if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
  623                 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
  624                 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
  625                 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
  626                 return (ENOMEM);
  627         }
  628 
  629         /* 
  630          * DMA tag for TX frames.
  631          */
  632         if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
  633                 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  634                 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  635                 NULL, NULL, &sc->tx_dmat)) {
  636                 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
  637                 return (ENOMEM);
  638         }
  639 
  640         return (0);
  641 }
  642 
  643 int
  644 t3_sge_free(struct adapter * sc)
  645 {
  646 
  647         if (sc->tx_dmat != NULL)
  648                 bus_dma_tag_destroy(sc->tx_dmat);
  649 
  650         if (sc->rx_jumbo_dmat != NULL)
  651                 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
  652 
  653         if (sc->rx_dmat != NULL)
  654                 bus_dma_tag_destroy(sc->rx_dmat);
  655 
  656         if (sc->parent_dmat != NULL)
  657                 bus_dma_tag_destroy(sc->parent_dmat);
  658 
  659         return (0);
  660 }
  661 
  662 void
  663 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
  664 {
  665 
  666         qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
  667         qs->rspq.polling = 0 /* p->polling */;
  668 }
  669 
  670 #if !defined(__i386__) && !defined(__amd64__)
  671 static void
  672 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
  673 {
  674         struct refill_fl_cb_arg *cb_arg = arg;
  675         
  676         cb_arg->error = error;
  677         cb_arg->seg = segs[0];
  678         cb_arg->nseg = nseg;
  679 
  680 }
  681 #endif
  682 /**
  683  *      refill_fl - refill an SGE free-buffer list
  684  *      @sc: the controller softc
  685  *      @q: the free-list to refill
  686  *      @n: the number of new buffers to allocate
  687  *
  688  *      (Re)populate an SGE free-buffer list with up to @n new packet buffers.
  689  *      The caller must assure that @n does not exceed the queue's capacity.
  690  */
  691 static void
  692 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
  693 {
  694         struct rx_sw_desc *sd = &q->sdesc[q->pidx];
  695         struct rx_desc *d = &q->desc[q->pidx];
  696         struct refill_fl_cb_arg cb_arg;
  697         struct mbuf *m;
  698         caddr_t cl;
  699         int err;
  700         
  701         cb_arg.error = 0;
  702         while (n--) {
  703                 /*
  704                  * We only allocate a cluster, mbuf allocation happens after rx
  705                  */
  706                 if (q->zone == zone_pack) {
  707                         if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
  708                                 break;
  709                         cl = m->m_ext.ext_buf;                  
  710                 } else {
  711                         if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
  712                                 break;
  713                         if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
  714                                 uma_zfree(q->zone, cl);
  715                                 break;
  716                         }
  717                 }
  718                 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
  719                         if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
  720                                 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
  721                                 uma_zfree(q->zone, cl);
  722                                 goto done;
  723                         }
  724                         sd->flags |= RX_SW_DESC_MAP_CREATED;
  725                 }
  726 #if !defined(__i386__) && !defined(__amd64__)
  727                 err = bus_dmamap_load(q->entry_tag, sd->map,
  728                     cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
  729                 
  730                 if (err != 0 || cb_arg.error) {
  731                         if (q->zone == zone_pack)
  732                                 uma_zfree(q->zone, cl);
  733                         m_free(m);
  734                         goto done;
  735                 }
  736 #else
  737                 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
  738 #endif          
  739                 sd->flags |= RX_SW_DESC_INUSE;
  740                 sd->rxsd_cl = cl;
  741                 sd->m = m;
  742                 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
  743                 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
  744                 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
  745                 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
  746 
  747                 d++;
  748                 sd++;
  749 
  750                 if (++q->pidx == q->size) {
  751                         q->pidx = 0;
  752                         q->gen ^= 1;
  753                         sd = q->sdesc;
  754                         d = q->desc;
  755                 }
  756                 q->credits++;
  757                 q->db_pending++;
  758         }
  759 
  760 done:
  761         if (q->db_pending >= 32) {
  762                 q->db_pending = 0;
  763                 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  764         }
  765 }
  766 
  767 
  768 /**
  769  *      free_rx_bufs - free the Rx buffers on an SGE free list
  770  *      @sc: the controle softc
  771  *      @q: the SGE free list to clean up
  772  *
  773  *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
  774  *      this queue should be stopped before calling this function.
  775  */
  776 static void
  777 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
  778 {
  779         u_int cidx = q->cidx;
  780 
  781         while (q->credits--) {
  782                 struct rx_sw_desc *d = &q->sdesc[cidx];
  783 
  784                 if (d->flags & RX_SW_DESC_INUSE) {
  785                         bus_dmamap_unload(q->entry_tag, d->map);
  786                         bus_dmamap_destroy(q->entry_tag, d->map);
  787                         if (q->zone == zone_pack) {
  788                                 m_init(d->m, zone_pack, MCLBYTES,
  789                                     M_NOWAIT, MT_DATA, M_EXT);
  790                                 uma_zfree(zone_pack, d->m);
  791                         } else {
  792                                 m_init(d->m, zone_mbuf, MLEN,
  793                                     M_NOWAIT, MT_DATA, 0);
  794                                 uma_zfree(zone_mbuf, d->m);
  795                                 uma_zfree(q->zone, d->rxsd_cl);
  796                         }                       
  797                 }
  798                 
  799                 d->rxsd_cl = NULL;
  800                 d->m = NULL;
  801                 if (++cidx == q->size)
  802                         cidx = 0;
  803         }
  804 }
  805 
  806 static __inline void
  807 __refill_fl(adapter_t *adap, struct sge_fl *fl)
  808 {
  809         refill_fl(adap, fl, min(16U, fl->size - fl->credits));
  810 }
  811 
  812 static __inline void
  813 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
  814 {
  815         uint32_t reclaimable = fl->size - fl->credits;
  816 
  817         if (reclaimable > 0)
  818                 refill_fl(adap, fl, min(max, reclaimable));
  819 }
  820 
  821 /**
  822  *      recycle_rx_buf - recycle a receive buffer
  823  *      @adapter: the adapter
  824  *      @q: the SGE free list
  825  *      @idx: index of buffer to recycle
  826  *
  827  *      Recycles the specified buffer on the given free list by adding it at
  828  *      the next available slot on the list.
  829  */
  830 static void
  831 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
  832 {
  833         struct rx_desc *from = &q->desc[idx];
  834         struct rx_desc *to   = &q->desc[q->pidx];
  835 
  836         q->sdesc[q->pidx] = q->sdesc[idx];
  837         to->addr_lo = from->addr_lo;        // already big endian
  838         to->addr_hi = from->addr_hi;        // likewise
  839         wmb();  /* necessary ? */
  840         to->len_gen = htobe32(V_FLD_GEN1(q->gen));
  841         to->gen2 = htobe32(V_FLD_GEN2(q->gen));
  842         q->credits++;
  843 
  844         if (++q->pidx == q->size) {
  845                 q->pidx = 0;
  846                 q->gen ^= 1;
  847         }
  848         t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  849 }
  850 
  851 static void
  852 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
  853 {
  854         uint32_t *addr;
  855 
  856         addr = arg;
  857         *addr = segs[0].ds_addr;
  858 }
  859 
  860 static int
  861 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
  862     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
  863     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
  864 {
  865         size_t len = nelem * elem_size;
  866         void *s = NULL;
  867         void *p = NULL;
  868         int err;
  869 
  870         if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
  871                                       BUS_SPACE_MAXADDR_32BIT,
  872                                       BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
  873                                       len, 0, NULL, NULL, tag)) != 0) {
  874                 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
  875                 return (ENOMEM);
  876         }
  877 
  878         if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
  879                                     map)) != 0) {
  880                 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
  881                 return (ENOMEM);
  882         }
  883 
  884         bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
  885         bzero(p, len);
  886         *(void **)desc = p;
  887 
  888         if (sw_size) {
  889                 len = nelem * sw_size;
  890                 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
  891                 *(void **)sdesc = s;
  892         }
  893         if (parent_entry_tag == NULL)
  894                 return (0);
  895             
  896         if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
  897                                       BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
  898                                       NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  899                                       TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  900                                       NULL, NULL, entry_tag)) != 0) {
  901                 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
  902                 return (ENOMEM);
  903         }
  904         return (0);
  905 }
  906 
  907 static void
  908 sge_slow_intr_handler(void *arg, int ncount)
  909 {
  910         adapter_t *sc = arg;
  911 
  912         t3_slow_intr_handler(sc);
  913         t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
  914         (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
  915 }
  916 
  917 /**
  918  *      sge_timer_cb - perform periodic maintenance of an SGE qset
  919  *      @data: the SGE queue set to maintain
  920  *
  921  *      Runs periodically from a timer to perform maintenance of an SGE queue
  922  *      set.  It performs two tasks:
  923  *
  924  *      a) Cleans up any completed Tx descriptors that may still be pending.
  925  *      Normal descriptor cleanup happens when new packets are added to a Tx
  926  *      queue so this timer is relatively infrequent and does any cleanup only
  927  *      if the Tx queue has not seen any new packets in a while.  We make a
  928  *      best effort attempt to reclaim descriptors, in that we don't wait
  929  *      around if we cannot get a queue's lock (which most likely is because
  930  *      someone else is queueing new packets and so will also handle the clean
  931  *      up).  Since control queues use immediate data exclusively we don't
  932  *      bother cleaning them up here.
  933  *
  934  *      b) Replenishes Rx queues that have run out due to memory shortage.
  935  *      Normally new Rx buffers are added when existing ones are consumed but
  936  *      when out of memory a queue can become empty.  We try to add only a few
  937  *      buffers here, the queue will be replenished fully as these new buffers
  938  *      are used up if memory shortage has subsided.
  939  *      
  940  *      c) Return coalesced response queue credits in case a response queue is
  941  *      starved.
  942  *
  943  *      d) Ring doorbells for T304 tunnel queues since we have seen doorbell 
  944  *      fifo overflows and the FW doesn't implement any recovery scheme yet.
  945  */
  946 static void
  947 sge_timer_cb(void *arg)
  948 {
  949         adapter_t *sc = arg;
  950         if ((sc->flags & USING_MSIX) == 0) {
  951                 
  952                 struct port_info *pi;
  953                 struct sge_qset *qs;
  954                 struct sge_txq  *txq;
  955                 int i, j;
  956                 int reclaim_ofl, refill_rx;
  957 
  958                 if (sc->open_device_map == 0) 
  959                         return;
  960 
  961                 for (i = 0; i < sc->params.nports; i++) {
  962                         pi = &sc->port[i];
  963                         for (j = 0; j < pi->nqsets; j++) {
  964                                 qs = &sc->sge.qs[pi->first_qset + j];
  965                                 txq = &qs->txq[0];
  966                                 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
  967                                 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 
  968                                     (qs->fl[1].credits < qs->fl[1].size));
  969                                 if (reclaim_ofl || refill_rx) {
  970                                         taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
  971                                         break;
  972                                 }
  973                         }
  974                 }
  975         }
  976         
  977         if (sc->params.nports > 2) {
  978                 int i;
  979 
  980                 for_each_port(sc, i) {
  981                         struct port_info *pi = &sc->port[i];
  982 
  983                         t3_write_reg(sc, A_SG_KDOORBELL, 
  984                                      F_SELEGRCNTX | 
  985                                      (FW_TUNNEL_SGEEC_START + pi->first_qset));
  986                 }
  987         }       
  988         if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
  989             sc->open_device_map != 0)
  990                 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
  991 }
  992 
  993 /*
  994  * This is meant to be a catch-all function to keep sge state private
  995  * to sge.c
  996  *
  997  */
  998 int
  999 t3_sge_init_adapter(adapter_t *sc)
 1000 {
 1001         callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
 1002         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1003         TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
 1004         return (0);
 1005 }
 1006 
 1007 int
 1008 t3_sge_reset_adapter(adapter_t *sc)
 1009 {
 1010         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1011         return (0);
 1012 }
 1013 
 1014 int
 1015 t3_sge_init_port(struct port_info *pi)
 1016 {
 1017         TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
 1018         return (0);
 1019 }
 1020 
 1021 /**
 1022  *      refill_rspq - replenish an SGE response queue
 1023  *      @adapter: the adapter
 1024  *      @q: the response queue to replenish
 1025  *      @credits: how many new responses to make available
 1026  *
 1027  *      Replenishes a response queue by making the supplied number of responses
 1028  *      available to HW.
 1029  */
 1030 static __inline void
 1031 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
 1032 {
 1033 
 1034         /* mbufs are allocated on demand when a rspq entry is processed. */
 1035         t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
 1036                      V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 1037 }
 1038 
 1039 static void
 1040 sge_txq_reclaim_handler(void *arg, int ncount)
 1041 {
 1042         struct sge_qset *qs = arg;
 1043         int i;
 1044 
 1045         for (i = 0; i < 3; i++)
 1046                 reclaim_completed_tx(qs, 16, i);
 1047 }
 1048 
 1049 static void
 1050 sge_timer_reclaim(void *arg, int ncount)
 1051 {
 1052         struct port_info *pi = arg;
 1053         int i, nqsets = pi->nqsets;
 1054         adapter_t *sc = pi->adapter;
 1055         struct sge_qset *qs;
 1056         struct mtx *lock;
 1057         
 1058         KASSERT((sc->flags & USING_MSIX) == 0,
 1059             ("can't call timer reclaim for msi-x"));
 1060 
 1061         for (i = 0; i < nqsets; i++) {
 1062                 qs = &sc->sge.qs[pi->first_qset + i];
 1063 
 1064                 reclaim_completed_tx(qs, 16, TXQ_OFLD);
 1065                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 1066                             &sc->sge.qs[0].rspq.lock;
 1067 
 1068                 if (mtx_trylock(lock)) {
 1069                         /* XXX currently assume that we are *NOT* polling */
 1070                         uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
 1071 
 1072                         if (qs->fl[0].credits < qs->fl[0].size - 16)
 1073                                 __refill_fl(sc, &qs->fl[0]);
 1074                         if (qs->fl[1].credits < qs->fl[1].size - 16)
 1075                                 __refill_fl(sc, &qs->fl[1]);
 1076                         
 1077                         if (status & (1 << qs->rspq.cntxt_id)) {
 1078                                 if (qs->rspq.credits) {
 1079                                         refill_rspq(sc, &qs->rspq, 1);
 1080                                         qs->rspq.credits--;
 1081                                         t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 
 1082                                             1 << qs->rspq.cntxt_id);
 1083                                 }
 1084                         }
 1085                         mtx_unlock(lock);
 1086                 }
 1087         }
 1088 }
 1089 
 1090 /**
 1091  *      init_qset_cntxt - initialize an SGE queue set context info
 1092  *      @qs: the queue set
 1093  *      @id: the queue set id
 1094  *
 1095  *      Initializes the TIDs and context ids for the queues of a queue set.
 1096  */
 1097 static void
 1098 init_qset_cntxt(struct sge_qset *qs, u_int id)
 1099 {
 1100 
 1101         qs->rspq.cntxt_id = id;
 1102         qs->fl[0].cntxt_id = 2 * id;
 1103         qs->fl[1].cntxt_id = 2 * id + 1;
 1104         qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
 1105         qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
 1106         qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 1107         qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 1108         qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
 1109 
 1110         mbufq_init(&qs->txq[TXQ_ETH].sendq);
 1111         mbufq_init(&qs->txq[TXQ_OFLD].sendq);
 1112         mbufq_init(&qs->txq[TXQ_CTRL].sendq);
 1113 }
 1114 
 1115 
 1116 static void
 1117 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
 1118 {
 1119         txq->in_use += ndesc;
 1120         /*
 1121          * XXX we don't handle stopping of queue
 1122          * presumably start handles this when we bump against the end
 1123          */
 1124         txqs->gen = txq->gen;
 1125         txq->unacked += ndesc;
 1126         txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
 1127         txq->unacked &= 31;
 1128         txqs->pidx = txq->pidx;
 1129         txq->pidx += ndesc;
 1130 #ifdef INVARIANTS
 1131         if (((txqs->pidx > txq->cidx) &&
 1132                 (txq->pidx < txqs->pidx) &&
 1133                 (txq->pidx >= txq->cidx)) ||
 1134             ((txqs->pidx < txq->cidx) &&
 1135                 (txq->pidx >= txq-> cidx)) ||
 1136             ((txqs->pidx < txq->cidx) &&
 1137                 (txq->cidx < txqs->pidx)))
 1138                 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
 1139                     txqs->pidx, txq->pidx, txq->cidx);
 1140 #endif
 1141         if (txq->pidx >= txq->size) {
 1142                 txq->pidx -= txq->size;
 1143                 txq->gen ^= 1;
 1144         }
 1145 
 1146 }
 1147 
 1148 /**
 1149  *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 1150  *      @m: the packet mbufs
 1151  *      @nsegs: the number of segments 
 1152  *
 1153  *      Returns the number of Tx descriptors needed for the given Ethernet
 1154  *      packet.  Ethernet packets require addition of WR and CPL headers.
 1155  */
 1156 static __inline unsigned int
 1157 calc_tx_descs(const struct mbuf *m, int nsegs)
 1158 {
 1159         unsigned int flits;
 1160 
 1161         if (m->m_pkthdr.len <= PIO_LEN)
 1162                 return 1;
 1163 
 1164         flits = sgl_len(nsegs) + 2;
 1165         if (m->m_pkthdr.csum_flags & CSUM_TSO)
 1166                 flits++;
 1167 
 1168         return flits_to_desc(flits);
 1169 }
 1170 
 1171 static unsigned int
 1172 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
 1173     struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
 1174 {
 1175         struct mbuf *m0;
 1176         int err, pktlen, pass = 0;
 1177         bus_dma_tag_t tag = txq->entry_tag;
 1178 
 1179 retry:
 1180         err = 0;
 1181         m0 = *m;
 1182         pktlen = m0->m_pkthdr.len;
 1183 #if defined(__i386__) || defined(__amd64__)
 1184         if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) {
 1185                 goto done;
 1186         } else
 1187 #endif
 1188                 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0);
 1189 
 1190         if (err == 0) {
 1191                 goto done;
 1192         }
 1193         if (err == EFBIG && pass == 0) {
 1194                 pass = 1;
 1195                 /* Too many segments, try to defrag */
 1196                 m0 = m_defrag(m0, M_DONTWAIT);
 1197                 if (m0 == NULL) {
 1198                         m_freem(*m);
 1199                         *m = NULL;
 1200                         return (ENOBUFS);
 1201                 }
 1202                 *m = m0;
 1203                 goto retry;
 1204         } else if (err == ENOMEM) {
 1205                 return (err);
 1206         } if (err) {
 1207                 if (cxgb_debug)
 1208                         printf("map failure err=%d pktlen=%d\n", err, pktlen);
 1209                 m_freem(m0);
 1210                 *m = NULL;
 1211                 return (err);
 1212         }
 1213 done:
 1214 #if !defined(__i386__) && !defined(__amd64__)
 1215         bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE);
 1216 #endif  
 1217         txsd->flags |= TX_SW_DESC_MAPPED;
 1218 
 1219         return (0);
 1220 }
 1221 
 1222 /**
 1223  *      make_sgl - populate a scatter/gather list for a packet
 1224  *      @sgp: the SGL to populate
 1225  *      @segs: the packet dma segments
 1226  *      @nsegs: the number of segments
 1227  *
 1228  *      Generates a scatter/gather list for the buffers that make up a packet
 1229  *      and returns the SGL size in 8-byte words.  The caller must size the SGL
 1230  *      appropriately.
 1231  */
 1232 static __inline void
 1233 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
 1234 {
 1235         int i, idx;
 1236         
 1237         for (idx = 0, i = 0; i < nsegs; i++) {
 1238                 /*
 1239                  * firmware doesn't like empty segments
 1240                  */
 1241                 if (segs[i].ds_len == 0)
 1242                         continue;
 1243                 if (i && idx == 0) 
 1244                         ++sgp;
 1245                 
 1246                 sgp->len[idx] = htobe32(segs[i].ds_len);
 1247                 sgp->addr[idx] = htobe64(segs[i].ds_addr);
 1248                 idx ^= 1;
 1249         }
 1250         
 1251         if (idx) {
 1252                 sgp->len[idx] = 0;
 1253                 sgp->addr[idx] = 0;
 1254         }
 1255 }
 1256         
 1257 /**
 1258  *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
 1259  *      @adap: the adapter
 1260  *      @q: the Tx queue
 1261  *
 1262  *      Ring the doorbell if a Tx queue is asleep.  There is a natural race,
 1263  *      where the HW is going to sleep just after we checked, however,
 1264  *      then the interrupt handler will detect the outstanding TX packet
 1265  *      and ring the doorbell for us.
 1266  *
 1267  *      When GTS is disabled we unconditionally ring the doorbell.
 1268  */
 1269 static __inline void
 1270 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
 1271 {
 1272 #if USE_GTS
 1273         clear_bit(TXQ_LAST_PKT_DB, &q->flags);
 1274         if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
 1275                 set_bit(TXQ_LAST_PKT_DB, &q->flags);
 1276 #ifdef T3_TRACE
 1277                 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
 1278                           q->cntxt_id);
 1279 #endif
 1280                 t3_write_reg(adap, A_SG_KDOORBELL,
 1281                              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1282         }
 1283 #else
 1284         if (mustring || ++q->db_pending >= 32) {
 1285                 wmb();            /* write descriptors before telling HW */
 1286                 t3_write_reg(adap, A_SG_KDOORBELL,
 1287                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1288                 q->db_pending = 0;
 1289         }
 1290 #endif
 1291 }
 1292 
 1293 static __inline void
 1294 wr_gen2(struct tx_desc *d, unsigned int gen)
 1295 {
 1296 #if SGE_NUM_GENBITS == 2
 1297         d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
 1298 #endif
 1299 }
 1300 
 1301 /**
 1302  *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
 1303  *      @ndesc: number of Tx descriptors spanned by the SGL
 1304  *      @txd: first Tx descriptor to be written
 1305  *      @txqs: txq state (generation and producer index)
 1306  *      @txq: the SGE Tx queue
 1307  *      @sgl: the SGL
 1308  *      @flits: number of flits to the start of the SGL in the first descriptor
 1309  *      @sgl_flits: the SGL size in flits
 1310  *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
 1311  *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
 1312  *
 1313  *      Write a work request header and an associated SGL.  If the SGL is
 1314  *      small enough to fit into one Tx descriptor it has already been written
 1315  *      and we just need to write the WR header.  Otherwise we distribute the
 1316  *      SGL across the number of descriptors it spans.
 1317  */
 1318 static void
 1319 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
 1320     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
 1321     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
 1322 {
 1323 
 1324         struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
 1325         struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
 1326         
 1327         if (__predict_true(ndesc == 1)) {
 1328                 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1329                         V_WR_SGLSFLT(flits)) | wr_hi,
 1330                     htonl(V_WR_LEN(flits + sgl_flits) |
 1331                         V_WR_GEN(txqs->gen)) | wr_lo);
 1332                 /* XXX gen? */
 1333                 wr_gen2(txd, txqs->gen);
 1334                 
 1335         } else {
 1336                 unsigned int ogen = txqs->gen;
 1337                 const uint64_t *fp = (const uint64_t *)sgl;
 1338                 struct work_request_hdr *wp = wrp;
 1339                 
 1340                 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
 1341                     V_WR_SGLSFLT(flits)) | wr_hi;
 1342                 
 1343                 while (sgl_flits) {
 1344                         unsigned int avail = WR_FLITS - flits;
 1345 
 1346                         if (avail > sgl_flits)
 1347                                 avail = sgl_flits;
 1348                         memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
 1349                         sgl_flits -= avail;
 1350                         ndesc--;
 1351                         if (!sgl_flits)
 1352                                 break;
 1353                         
 1354                         fp += avail;
 1355                         txd++;
 1356                         txsd++;
 1357                         if (++txqs->pidx == txq->size) {
 1358                                 txqs->pidx = 0;
 1359                                 txqs->gen ^= 1;
 1360                                 txd = txq->desc;
 1361                                 txsd = txq->sdesc;
 1362                         }
 1363 
 1364                         /*
 1365                          * when the head of the mbuf chain
 1366                          * is freed all clusters will be freed
 1367                          * with it
 1368                          */
 1369                         wrp = (struct work_request_hdr *)txd;
 1370                         wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
 1371                             V_WR_SGLSFLT(1)) | wr_hi;
 1372                         wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
 1373                                     sgl_flits + 1)) |
 1374                             V_WR_GEN(txqs->gen)) | wr_lo;
 1375                         wr_gen2(txd, txqs->gen);
 1376                         flits = 1;
 1377                 }
 1378                 wrp->wrh_hi |= htonl(F_WR_EOP);
 1379                 wmb();
 1380                 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
 1381                 wr_gen2((struct tx_desc *)wp, ogen);
 1382         }
 1383 }
 1384 
 1385 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
 1386 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
 1387 
 1388 #define GET_VTAG(cntrl, m) \
 1389 do { \
 1390         if ((m)->m_flags & M_VLANTAG)                                               \
 1391                 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
 1392 } while (0)
 1393 
 1394 static int
 1395 t3_encap(struct sge_qset *qs, struct mbuf **m)
 1396 {
 1397         adapter_t *sc;
 1398         struct mbuf *m0;
 1399         struct sge_txq *txq;
 1400         struct txq_state txqs;
 1401         struct port_info *pi;
 1402         unsigned int ndesc, flits, cntrl, mlen;
 1403         int err, nsegs, tso_info = 0;
 1404 
 1405         struct work_request_hdr *wrp;
 1406         struct tx_sw_desc *txsd;
 1407         struct sg_ent *sgp, *sgl;
 1408         uint32_t wr_hi, wr_lo, sgl_flits; 
 1409         bus_dma_segment_t segs[TX_MAX_SEGS];
 1410 
 1411         struct tx_desc *txd;
 1412                 
 1413         pi = qs->port;
 1414         sc = pi->adapter;
 1415         txq = &qs->txq[TXQ_ETH];
 1416         txd = &txq->desc[txq->pidx];
 1417         txsd = &txq->sdesc[txq->pidx];
 1418         sgl = txq->txq_sgl;
 1419 
 1420         prefetch(txd);
 1421         m0 = *m;
 1422 
 1423         mtx_assert(&qs->lock, MA_OWNED);
 1424         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1425         KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
 1426         
 1427         if  (m0->m_nextpkt == NULL && m0->m_next != NULL &&
 1428             m0->m_pkthdr.csum_flags & (CSUM_TSO))
 1429                 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
 1430 
 1431         if (m0->m_nextpkt != NULL) {
 1432                 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
 1433                 ndesc = 1;
 1434                 mlen = 0;
 1435         } else {
 1436                 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
 1437                     &m0, segs, &nsegs))) {
 1438                         if (cxgb_debug)
 1439                                 printf("failed ... err=%d\n", err);
 1440                         return (err);
 1441                 }
 1442                 mlen = m0->m_pkthdr.len;
 1443                 ndesc = calc_tx_descs(m0, nsegs);
 1444         }
 1445         txq_prod(txq, ndesc, &txqs);
 1446 
 1447         KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
 1448         txsd->m = m0;
 1449 
 1450         if (m0->m_nextpkt != NULL) {
 1451                 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
 1452                 int i, fidx;
 1453 
 1454                 if (nsegs > 7)
 1455                         panic("trying to coalesce %d packets in to one WR", nsegs);
 1456                 txq->txq_coalesced += nsegs;
 1457                 wrp = (struct work_request_hdr *)txd;
 1458                 flits = nsegs*2 + 1;
 1459 
 1460                 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
 1461                         struct cpl_tx_pkt_batch_entry *cbe;
 1462                         uint64_t flit;
 1463                         uint32_t *hflit = (uint32_t *)&flit;
 1464                         int cflags = m0->m_pkthdr.csum_flags;
 1465 
 1466                         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1467                         GET_VTAG(cntrl, m0);
 1468                         cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1469                         if (__predict_false(!(cflags & CSUM_IP)))
 1470                                 cntrl |= F_TXPKT_IPCSUM_DIS;
 1471                         if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP))))
 1472                                 cntrl |= F_TXPKT_L4CSUM_DIS;
 1473 
 1474                         hflit[0] = htonl(cntrl);
 1475                         hflit[1] = htonl(segs[i].ds_len | 0x80000000);
 1476                         flit |= htobe64(1 << 24);
 1477                         cbe = &cpl_batch->pkt_entry[i];
 1478                         cbe->cntrl = hflit[0];
 1479                         cbe->len = hflit[1];
 1480                         cbe->addr = htobe64(segs[i].ds_addr);
 1481                 }
 1482 
 1483                 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1484                     V_WR_SGLSFLT(flits)) |
 1485                     htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1486                 wr_lo = htonl(V_WR_LEN(flits) |
 1487                     V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
 1488                 set_wr_hdr(wrp, wr_hi, wr_lo);
 1489                 wmb();
 1490                 ETHER_BPF_MTAP(pi->ifp, m0);
 1491                 wr_gen2(txd, txqs.gen);
 1492                 check_ring_tx_db(sc, txq, 0);
 1493                 return (0);             
 1494         } else if (tso_info) {
 1495                 int eth_type;
 1496                 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
 1497                 struct ether_header *eh;
 1498                 struct ip *ip;
 1499                 struct tcphdr *tcp;
 1500 
 1501                 txd->flit[2] = 0;
 1502                 GET_VTAG(cntrl, m0);
 1503                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
 1504                 hdr->cntrl = htonl(cntrl);
 1505                 hdr->len = htonl(mlen | 0x80000000);
 1506 
 1507                 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
 1508                         printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
 1509                             m0, mlen, m0->m_pkthdr.tso_segsz,
 1510                             m0->m_pkthdr.csum_flags, m0->m_flags);
 1511                         panic("tx tso packet too small");
 1512                 }
 1513 
 1514                 /* Make sure that ether, ip, tcp headers are all in m0 */
 1515                 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
 1516                         m0 = m_pullup(m0, TCPPKTHDRSIZE);
 1517                         if (__predict_false(m0 == NULL)) {
 1518                                 /* XXX panic probably an overreaction */
 1519                                 panic("couldn't fit header into mbuf");
 1520                         }
 1521                 }
 1522 
 1523                 eh = mtod(m0, struct ether_header *);
 1524                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 1525                         eth_type = CPL_ETH_II_VLAN;
 1526                         ip = (struct ip *)((struct ether_vlan_header *)eh + 1);
 1527                 } else {
 1528                         eth_type = CPL_ETH_II;
 1529                         ip = (struct ip *)(eh + 1);
 1530                 }
 1531                 tcp = (struct tcphdr *)(ip + 1);
 1532 
 1533                 tso_info |= V_LSO_ETH_TYPE(eth_type) |
 1534                             V_LSO_IPHDR_WORDS(ip->ip_hl) |
 1535                             V_LSO_TCPHDR_WORDS(tcp->th_off);
 1536                 hdr->lso_info = htonl(tso_info);
 1537 
 1538                 if (__predict_false(mlen <= PIO_LEN)) {
 1539                         /*
 1540                          * pkt not undersized but fits in PIO_LEN
 1541                          * Indicates a TSO bug at the higher levels.
 1542                          */
 1543                         txsd->m = NULL;
 1544                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
 1545                         flits = (mlen + 7) / 8 + 3;
 1546                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1547                                           V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1548                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1549                         wr_lo = htonl(V_WR_LEN(flits) |
 1550                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1551                         set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
 1552                         wmb();
 1553                         ETHER_BPF_MTAP(pi->ifp, m0);
 1554                         wr_gen2(txd, txqs.gen);
 1555                         check_ring_tx_db(sc, txq, 0);
 1556                         m_freem(m0);
 1557                         return (0);
 1558                 }
 1559                 flits = 3;      
 1560         } else {
 1561                 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
 1562                 
 1563                 GET_VTAG(cntrl, m0);
 1564                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1565                 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
 1566                         cntrl |= F_TXPKT_IPCSUM_DIS;
 1567                 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
 1568                         cntrl |= F_TXPKT_L4CSUM_DIS;
 1569                 cpl->cntrl = htonl(cntrl);
 1570                 cpl->len = htonl(mlen | 0x80000000);
 1571 
 1572                 if (mlen <= PIO_LEN) {
 1573                         txsd->m = NULL;
 1574                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
 1575                         flits = (mlen + 7) / 8 + 2;
 1576                         
 1577                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1578                             V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1579                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1580                         wr_lo = htonl(V_WR_LEN(flits) |
 1581                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1582                         set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
 1583                         wmb();
 1584                         ETHER_BPF_MTAP(pi->ifp, m0);
 1585                         wr_gen2(txd, txqs.gen);
 1586                         check_ring_tx_db(sc, txq, 0);
 1587                         m_freem(m0);
 1588                         return (0);
 1589                 }
 1590                 flits = 2;
 1591         }
 1592         wrp = (struct work_request_hdr *)txd;
 1593         sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
 1594         make_sgl(sgp, segs, nsegs);
 1595 
 1596         sgl_flits = sgl_len(nsegs);
 1597 
 1598         ETHER_BPF_MTAP(pi->ifp, m0);
 1599 
 1600         KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
 1601         wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1602         wr_lo = htonl(V_WR_TID(txq->token));
 1603         write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
 1604             sgl_flits, wr_hi, wr_lo);
 1605         check_ring_tx_db(sc, txq, 0);
 1606 
 1607         return (0);
 1608 }
 1609 
 1610 void
 1611 cxgb_tx_watchdog(void *arg)
 1612 {
 1613         struct sge_qset *qs = arg;
 1614         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1615 
 1616         if (qs->coalescing != 0 &&
 1617             (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
 1618             TXQ_RING_EMPTY(qs))
 1619                 qs->coalescing = 0; 
 1620         else if (qs->coalescing == 0 &&
 1621             (txq->in_use >= cxgb_tx_coalesce_enable_start))
 1622                 qs->coalescing = 1;
 1623         if (TXQ_TRYLOCK(qs)) {
 1624                 qs->qs_flags |= QS_FLUSHING;
 1625                 cxgb_start_locked(qs);
 1626                 qs->qs_flags &= ~QS_FLUSHING;
 1627                 TXQ_UNLOCK(qs);
 1628         }
 1629         if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
 1630                 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
 1631                     qs, txq->txq_watchdog.c_cpu);
 1632 }
 1633 
 1634 static void
 1635 cxgb_tx_timeout(void *arg)
 1636 {
 1637         struct sge_qset *qs = arg;
 1638         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1639 
 1640         if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
 1641                 qs->coalescing = 1;     
 1642         if (TXQ_TRYLOCK(qs)) {
 1643                 qs->qs_flags |= QS_TIMEOUT;
 1644                 cxgb_start_locked(qs);
 1645                 qs->qs_flags &= ~QS_TIMEOUT;
 1646                 TXQ_UNLOCK(qs);
 1647         }
 1648 }
 1649 
 1650 static void
 1651 cxgb_start_locked(struct sge_qset *qs)
 1652 {
 1653         struct mbuf *m_head = NULL;
 1654         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1655         struct port_info *pi = qs->port;
 1656         struct ifnet *ifp = pi->ifp;
 1657 
 1658         if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
 1659                 reclaim_completed_tx(qs, 0, TXQ_ETH);
 1660 
 1661         if (!pi->link_config.link_ok) {
 1662                 TXQ_RING_FLUSH(qs);
 1663                 return;
 1664         }
 1665         TXQ_LOCK_ASSERT(qs);
 1666         while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
 1667             pi->link_config.link_ok) {
 1668                 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1669 
 1670                 if (txq->size - txq->in_use <= TX_MAX_DESC)
 1671                         break;
 1672 
 1673                 if ((m_head = cxgb_dequeue(qs)) == NULL)
 1674                         break;
 1675                 /*
 1676                  *  Encapsulation can modify our pointer, and or make it
 1677                  *  NULL on failure.  In that event, we can't requeue.
 1678                  */
 1679                 if (t3_encap(qs, &m_head) || m_head == NULL)
 1680                         break;
 1681 
 1682                 m_head = NULL;
 1683         }
 1684 
 1685         if (txq->db_pending)
 1686                 check_ring_tx_db(pi->adapter, txq, 1);
 1687 
 1688         if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
 1689             pi->link_config.link_ok)
 1690                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1691                     qs, txq->txq_timer.c_cpu);
 1692         if (m_head != NULL)
 1693                 m_freem(m_head);
 1694 }
 1695 
 1696 static int
 1697 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
 1698 {
 1699         struct port_info *pi = qs->port;
 1700         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1701         struct buf_ring *br = txq->txq_mr;
 1702         int error, avail;
 1703 
 1704         avail = txq->size - txq->in_use;
 1705         TXQ_LOCK_ASSERT(qs);
 1706 
 1707         /*
 1708          * We can only do a direct transmit if the following are true:
 1709          * - we aren't coalescing (ring < 3/4 full)
 1710          * - the link is up -- checked in caller
 1711          * - there are no packets enqueued already
 1712          * - there is space in hardware transmit queue 
 1713          */
 1714         if (check_pkt_coalesce(qs) == 0 &&
 1715             !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
 1716                 if (t3_encap(qs, &m)) {
 1717                         if (m != NULL &&
 1718                             (error = drbr_enqueue(ifp, br, m)) != 0) 
 1719                                 return (error);
 1720                 } else {
 1721                         if (txq->db_pending)
 1722                                 check_ring_tx_db(pi->adapter, txq, 1);
 1723 
 1724                         /*
 1725                          * We've bypassed the buf ring so we need to update
 1726                          * the stats directly
 1727                          */
 1728                         txq->txq_direct_packets++;
 1729                         txq->txq_direct_bytes += m->m_pkthdr.len;
 1730                 }
 1731         } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
 1732                 return (error);
 1733 
 1734         reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1735         if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
 1736             (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
 1737                 cxgb_start_locked(qs);
 1738         else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
 1739                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1740                     qs, txq->txq_timer.c_cpu);
 1741         return (0);
 1742 }
 1743 
 1744 int
 1745 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
 1746 {
 1747         struct sge_qset *qs;
 1748         struct port_info *pi = ifp->if_softc;
 1749         int error, qidx = pi->first_qset;
 1750 
 1751         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
 1752             ||(!pi->link_config.link_ok)) {
 1753                 m_freem(m);
 1754                 return (0);
 1755         }
 1756         
 1757         if (m->m_flags & M_FLOWID)
 1758                 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
 1759 
 1760         qs = &pi->adapter->sge.qs[qidx];
 1761         
 1762         if (TXQ_TRYLOCK(qs)) {
 1763                 /* XXX running */
 1764                 error = cxgb_transmit_locked(ifp, qs, m);
 1765                 TXQ_UNLOCK(qs);
 1766         } else
 1767                 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
 1768         return (error);
 1769 }
 1770 void
 1771 cxgb_start(struct ifnet *ifp)
 1772 {
 1773         struct port_info *pi = ifp->if_softc;
 1774         struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset];
 1775         
 1776         if (!pi->link_config.link_ok)
 1777                 return;
 1778 
 1779         TXQ_LOCK(qs);
 1780         cxgb_start_locked(qs);
 1781         TXQ_UNLOCK(qs);
 1782 }
 1783 
 1784 void
 1785 cxgb_qflush(struct ifnet *ifp)
 1786 {
 1787         /*
 1788          * flush any enqueued mbufs in the buf_rings
 1789          * and in the transmit queues
 1790          * no-op for now
 1791          */
 1792         return;
 1793 }
 1794 
 1795 /**
 1796  *      write_imm - write a packet into a Tx descriptor as immediate data
 1797  *      @d: the Tx descriptor to write
 1798  *      @m: the packet
 1799  *      @len: the length of packet data to write as immediate data
 1800  *      @gen: the generation bit value to write
 1801  *
 1802  *      Writes a packet as immediate data into a Tx descriptor.  The packet
 1803  *      contains a work request at its beginning.  We must write the packet
 1804  *      carefully so the SGE doesn't read accidentally before it's written in
 1805  *      its entirety.
 1806  */
 1807 static __inline void
 1808 write_imm(struct tx_desc *d, struct mbuf *m,
 1809           unsigned int len, unsigned int gen)
 1810 {
 1811         struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
 1812         struct work_request_hdr *to = (struct work_request_hdr *)d;
 1813         uint32_t wr_hi, wr_lo;
 1814 
 1815         if (len > WR_LEN)
 1816                 panic("len too big %d\n", len);
 1817         if (len < sizeof(*from))
 1818                 panic("len too small %d", len);
 1819         
 1820         memcpy(&to[1], &from[1], len - sizeof(*from));
 1821         wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
 1822                                         V_WR_BCNTLFLT(len & 7));
 1823         wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) |
 1824                                         V_WR_LEN((len + 7) / 8));
 1825         set_wr_hdr(to, wr_hi, wr_lo);
 1826         wmb();
 1827         wr_gen2(d, gen);
 1828 
 1829         /*
 1830          * This check is a hack we should really fix the logic so
 1831          * that this can't happen
 1832          */
 1833         if (m->m_type != MT_DONTFREE)
 1834                 m_freem(m);
 1835         
 1836 }
 1837 
 1838 /**
 1839  *      check_desc_avail - check descriptor availability on a send queue
 1840  *      @adap: the adapter
 1841  *      @q: the TX queue
 1842  *      @m: the packet needing the descriptors
 1843  *      @ndesc: the number of Tx descriptors needed
 1844  *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
 1845  *
 1846  *      Checks if the requested number of Tx descriptors is available on an
 1847  *      SGE send queue.  If the queue is already suspended or not enough
 1848  *      descriptors are available the packet is queued for later transmission.
 1849  *      Must be called with the Tx queue locked.
 1850  *
 1851  *      Returns 0 if enough descriptors are available, 1 if there aren't
 1852  *      enough descriptors and the packet has been queued, and 2 if the caller
 1853  *      needs to retry because there weren't enough descriptors at the
 1854  *      beginning of the call but some freed up in the mean time.
 1855  */
 1856 static __inline int
 1857 check_desc_avail(adapter_t *adap, struct sge_txq *q,
 1858                  struct mbuf *m, unsigned int ndesc,
 1859                  unsigned int qid)
 1860 {
 1861         /* 
 1862          * XXX We currently only use this for checking the control queue
 1863          * the control queue is only used for binding qsets which happens
 1864          * at init time so we are guaranteed enough descriptors
 1865          */
 1866         if (__predict_false(!mbufq_empty(&q->sendq))) {
 1867 addq_exit:      mbufq_tail(&q->sendq, m);
 1868                 return 1;
 1869         }
 1870         if (__predict_false(q->size - q->in_use < ndesc)) {
 1871 
 1872                 struct sge_qset *qs = txq_to_qset(q, qid);
 1873 
 1874                 setbit(&qs->txq_stopped, qid);
 1875                 if (should_restart_tx(q) &&
 1876                     test_and_clear_bit(qid, &qs->txq_stopped))
 1877                         return 2;
 1878 
 1879                 q->stops++;
 1880                 goto addq_exit;
 1881         }
 1882         return 0;
 1883 }
 1884 
 1885 
 1886 /**
 1887  *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
 1888  *      @q: the SGE control Tx queue
 1889  *
 1890  *      This is a variant of reclaim_completed_tx() that is used for Tx queues
 1891  *      that send only immediate data (presently just the control queues) and
 1892  *      thus do not have any mbufs
 1893  */
 1894 static __inline void
 1895 reclaim_completed_tx_imm(struct sge_txq *q)
 1896 {
 1897         unsigned int reclaim = q->processed - q->cleaned;
 1898 
 1899         q->in_use -= reclaim;
 1900         q->cleaned += reclaim;
 1901 }
 1902 
 1903 static __inline int
 1904 immediate(const struct mbuf *m)
 1905 {
 1906         return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
 1907 }
 1908 
 1909 /**
 1910  *      ctrl_xmit - send a packet through an SGE control Tx queue
 1911  *      @adap: the adapter
 1912  *      @q: the control queue
 1913  *      @m: the packet
 1914  *
 1915  *      Send a packet through an SGE control Tx queue.  Packets sent through
 1916  *      a control queue must fit entirely as immediate data in a single Tx
 1917  *      descriptor and have no page fragments.
 1918  */
 1919 static int
 1920 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 1921 {
 1922         int ret;
 1923         struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
 1924         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1925         
 1926         if (__predict_false(!immediate(m))) {
 1927                 m_freem(m);
 1928                 return 0;
 1929         }
 1930         
 1931         wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
 1932         wrp->wrh_lo = htonl(V_WR_TID(q->token));
 1933 
 1934         TXQ_LOCK(qs);
 1935 again:  reclaim_completed_tx_imm(q);
 1936 
 1937         ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
 1938         if (__predict_false(ret)) {
 1939                 if (ret == 1) {
 1940                         TXQ_UNLOCK(qs);
 1941                         return (ENOSPC);
 1942                 }
 1943                 goto again;
 1944         }
 1945         write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
 1946         
 1947         q->in_use++;
 1948         if (++q->pidx >= q->size) {
 1949                 q->pidx = 0;
 1950                 q->gen ^= 1;
 1951         }
 1952         TXQ_UNLOCK(qs);
 1953         wmb();
 1954         t3_write_reg(adap, A_SG_KDOORBELL,
 1955                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1956         return (0);
 1957 }
 1958 
 1959 
 1960 /**
 1961  *      restart_ctrlq - restart a suspended control queue
 1962  *      @qs: the queue set cotaining the control queue
 1963  *
 1964  *      Resumes transmission on a suspended Tx control queue.
 1965  */
 1966 static void
 1967 restart_ctrlq(void *data, int npending)
 1968 {
 1969         struct mbuf *m;
 1970         struct sge_qset *qs = (struct sge_qset *)data;
 1971         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1972         adapter_t *adap = qs->port->adapter;
 1973 
 1974         TXQ_LOCK(qs);
 1975 again:  reclaim_completed_tx_imm(q);
 1976 
 1977         while (q->in_use < q->size &&
 1978                (m = mbufq_dequeue(&q->sendq)) != NULL) {
 1979 
 1980                 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
 1981 
 1982                 if (++q->pidx >= q->size) {
 1983                         q->pidx = 0;
 1984                         q->gen ^= 1;
 1985                 }
 1986                 q->in_use++;
 1987         }
 1988         if (!mbufq_empty(&q->sendq)) {
 1989                 setbit(&qs->txq_stopped, TXQ_CTRL);
 1990 
 1991                 if (should_restart_tx(q) &&
 1992                     test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
 1993                         goto again;
 1994                 q->stops++;
 1995         }
 1996         TXQ_UNLOCK(qs);
 1997         t3_write_reg(adap, A_SG_KDOORBELL,
 1998                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1999 }
 2000 
 2001 
 2002 /*
 2003  * Send a management message through control queue 0
 2004  */
 2005 int
 2006 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
 2007 {
 2008         return ctrl_xmit(adap, &adap->sge.qs[0], m);
 2009 }
 2010 
 2011 /**
 2012  *      free_qset - free the resources of an SGE queue set
 2013  *      @sc: the controller owning the queue set
 2014  *      @q: the queue set
 2015  *
 2016  *      Release the HW and SW resources associated with an SGE queue set, such
 2017  *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 2018  *      queue set must be quiesced prior to calling this.
 2019  */
 2020 static void
 2021 t3_free_qset(adapter_t *sc, struct sge_qset *q)
 2022 {
 2023         int i;
 2024         
 2025         reclaim_completed_tx(q, 0, TXQ_ETH);
 2026         if (q->txq[TXQ_ETH].txq_mr != NULL) 
 2027                 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
 2028         if (q->txq[TXQ_ETH].txq_ifq != NULL) {
 2029                 ifq_delete(q->txq[TXQ_ETH].txq_ifq);
 2030                 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
 2031         }
 2032 
 2033         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2034                 if (q->fl[i].desc) {
 2035                         mtx_lock_spin(&sc->sge.reg_lock);
 2036                         t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
 2037                         mtx_unlock_spin(&sc->sge.reg_lock);
 2038                         bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
 2039                         bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
 2040                                         q->fl[i].desc_map);
 2041                         bus_dma_tag_destroy(q->fl[i].desc_tag);
 2042                         bus_dma_tag_destroy(q->fl[i].entry_tag);
 2043                 }
 2044                 if (q->fl[i].sdesc) {
 2045                         free_rx_bufs(sc, &q->fl[i]);
 2046                         free(q->fl[i].sdesc, M_DEVBUF);
 2047                 }
 2048         }
 2049 
 2050         mtx_unlock(&q->lock);
 2051         MTX_DESTROY(&q->lock);
 2052         for (i = 0; i < SGE_TXQ_PER_SET; i++) {
 2053                 if (q->txq[i].desc) {
 2054                         mtx_lock_spin(&sc->sge.reg_lock);
 2055                         t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
 2056                         mtx_unlock_spin(&sc->sge.reg_lock);
 2057                         bus_dmamap_unload(q->txq[i].desc_tag,
 2058                                         q->txq[i].desc_map);
 2059                         bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
 2060                                         q->txq[i].desc_map);
 2061                         bus_dma_tag_destroy(q->txq[i].desc_tag);
 2062                         bus_dma_tag_destroy(q->txq[i].entry_tag);
 2063                 }
 2064                 if (q->txq[i].sdesc) {
 2065                         free(q->txq[i].sdesc, M_DEVBUF);
 2066                 }
 2067         }
 2068 
 2069         if (q->rspq.desc) {
 2070                 mtx_lock_spin(&sc->sge.reg_lock);
 2071                 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
 2072                 mtx_unlock_spin(&sc->sge.reg_lock);
 2073                 
 2074                 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
 2075                 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
 2076                                 q->rspq.desc_map);
 2077                 bus_dma_tag_destroy(q->rspq.desc_tag);
 2078                 MTX_DESTROY(&q->rspq.lock);
 2079         }
 2080 
 2081 #ifdef INET
 2082         tcp_lro_free(&q->lro.ctrl);
 2083 #endif
 2084 
 2085         bzero(q, sizeof(*q));
 2086 }
 2087 
 2088 /**
 2089  *      t3_free_sge_resources - free SGE resources
 2090  *      @sc: the adapter softc
 2091  *
 2092  *      Frees resources used by the SGE queue sets.
 2093  */
 2094 void
 2095 t3_free_sge_resources(adapter_t *sc, int nqsets)
 2096 {
 2097         int i;
 2098 
 2099         for (i = 0; i < nqsets; ++i) {
 2100                 TXQ_LOCK(&sc->sge.qs[i]);
 2101                 t3_free_qset(sc, &sc->sge.qs[i]);
 2102         }
 2103 }
 2104 
 2105 /**
 2106  *      t3_sge_start - enable SGE
 2107  *      @sc: the controller softc
 2108  *
 2109  *      Enables the SGE for DMAs.  This is the last step in starting packet
 2110  *      transfers.
 2111  */
 2112 void
 2113 t3_sge_start(adapter_t *sc)
 2114 {
 2115         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
 2116 }
 2117 
 2118 /**
 2119  *      t3_sge_stop - disable SGE operation
 2120  *      @sc: the adapter
 2121  *
 2122  *      Disables the DMA engine.  This can be called in emeregencies (e.g.,
 2123  *      from error interrupts) or from normal process context.  In the latter
 2124  *      case it also disables any pending queue restart tasklets.  Note that
 2125  *      if it is called in interrupt context it cannot disable the restart
 2126  *      tasklets as it cannot wait, however the tasklets will have no effect
 2127  *      since the doorbells are disabled and the driver will call this again
 2128  *      later from process context, at which time the tasklets will be stopped
 2129  *      if they are still running.
 2130  */
 2131 void
 2132 t3_sge_stop(adapter_t *sc)
 2133 {
 2134         int i, nqsets;
 2135         
 2136         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
 2137 
 2138         if (sc->tq == NULL)
 2139                 return;
 2140         
 2141         for (nqsets = i = 0; i < (sc)->params.nports; i++) 
 2142                 nqsets += sc->port[i].nqsets;
 2143 #ifdef notyet
 2144         /*
 2145          * 
 2146          * XXX
 2147          */
 2148         for (i = 0; i < nqsets; ++i) {
 2149                 struct sge_qset *qs = &sc->sge.qs[i];
 2150                 
 2151                 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2152                 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2153         }
 2154 #endif
 2155 }
 2156 
 2157 /**
 2158  *      t3_free_tx_desc - reclaims Tx descriptors and their buffers
 2159  *      @adapter: the adapter
 2160  *      @q: the Tx queue to reclaim descriptors from
 2161  *      @reclaimable: the number of descriptors to reclaim
 2162  *      @m_vec_size: maximum number of buffers to reclaim
 2163  *      @desc_reclaimed: returns the number of descriptors reclaimed
 2164  *
 2165  *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 2166  *      Tx buffers.  Called with the Tx queue lock held.
 2167  *
 2168  *      Returns number of buffers of reclaimed   
 2169  */
 2170 void
 2171 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
 2172 {
 2173         struct tx_sw_desc *txsd;
 2174         unsigned int cidx, mask;
 2175         struct sge_txq *q = &qs->txq[queue];
 2176 
 2177 #ifdef T3_TRACE
 2178         T3_TRACE2(sc->tb[q->cntxt_id & 7],
 2179                   "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
 2180 #endif
 2181         cidx = q->cidx;
 2182         mask = q->size - 1;
 2183         txsd = &q->sdesc[cidx];
 2184 
 2185         mtx_assert(&qs->lock, MA_OWNED);
 2186         while (reclaimable--) {
 2187                 prefetch(q->sdesc[(cidx + 1) & mask].m);
 2188                 prefetch(q->sdesc[(cidx + 2) & mask].m);
 2189 
 2190                 if (txsd->m != NULL) {
 2191                         if (txsd->flags & TX_SW_DESC_MAPPED) {
 2192                                 bus_dmamap_unload(q->entry_tag, txsd->map);
 2193                                 txsd->flags &= ~TX_SW_DESC_MAPPED;
 2194                         }
 2195                         m_freem_list(txsd->m);
 2196                         txsd->m = NULL;
 2197                 } else
 2198                         q->txq_skipped++;
 2199                 
 2200                 ++txsd;
 2201                 if (++cidx == q->size) {
 2202                         cidx = 0;
 2203                         txsd = q->sdesc;
 2204                 }
 2205         }
 2206         q->cidx = cidx;
 2207 
 2208 }
 2209 
 2210 /**
 2211  *      is_new_response - check if a response is newly written
 2212  *      @r: the response descriptor
 2213  *      @q: the response queue
 2214  *
 2215  *      Returns true if a response descriptor contains a yet unprocessed
 2216  *      response.
 2217  */
 2218 static __inline int
 2219 is_new_response(const struct rsp_desc *r,
 2220     const struct sge_rspq *q)
 2221 {
 2222         return (r->intr_gen & F_RSPD_GEN2) == q->gen;
 2223 }
 2224 
 2225 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
 2226 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
 2227                         V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
 2228                         V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
 2229                         V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
 2230 
 2231 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
 2232 #define NOMEM_INTR_DELAY 2500
 2233 
 2234 /**
 2235  *      write_ofld_wr - write an offload work request
 2236  *      @adap: the adapter
 2237  *      @m: the packet to send
 2238  *      @q: the Tx queue
 2239  *      @pidx: index of the first Tx descriptor to write
 2240  *      @gen: the generation value to use
 2241  *      @ndesc: number of descriptors the packet will occupy
 2242  *
 2243  *      Write an offload work request to send the supplied packet.  The packet
 2244  *      data already carry the work request with most fields populated.
 2245  */
 2246 static void
 2247 write_ofld_wr(adapter_t *adap, struct mbuf *m,
 2248     struct sge_txq *q, unsigned int pidx,
 2249     unsigned int gen, unsigned int ndesc,
 2250     bus_dma_segment_t *segs, unsigned int nsegs)
 2251 {
 2252         unsigned int sgl_flits, flits;
 2253         struct work_request_hdr *from;
 2254         struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
 2255         struct tx_desc *d = &q->desc[pidx];
 2256         struct txq_state txqs;
 2257         
 2258         if (immediate(m) && nsegs == 0) {
 2259                 write_imm(d, m, m->m_len, gen);
 2260                 return;
 2261         }
 2262 
 2263         /* Only TX_DATA builds SGLs */
 2264         from = mtod(m, struct work_request_hdr *);
 2265         memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
 2266 
 2267         flits = m->m_len / 8;
 2268         sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
 2269 
 2270         make_sgl(sgp, segs, nsegs);
 2271         sgl_flits = sgl_len(nsegs);
 2272 
 2273         txqs.gen = gen;
 2274         txqs.pidx = pidx;
 2275         txqs.compl = 0;
 2276 
 2277         write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
 2278             from->wrh_hi, from->wrh_lo);
 2279 }
 2280 
 2281 /**
 2282  *      calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
 2283  *      @m: the packet
 2284  *
 2285  *      Returns the number of Tx descriptors needed for the given offload
 2286  *      packet.  These packets are already fully constructed.
 2287  */
 2288 static __inline unsigned int
 2289 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
 2290 {
 2291         unsigned int flits, cnt = 0;
 2292         int ndescs;
 2293 
 2294         if (m->m_len <= WR_LEN && nsegs == 0)
 2295                 return (1);                 /* packet fits as immediate data */
 2296 
 2297         /*
 2298          * This needs to be re-visited for TOE
 2299          */
 2300 
 2301         cnt = nsegs;
 2302                 
 2303         /* headers */
 2304         flits = m->m_len / 8;
 2305 
 2306         ndescs = flits_to_desc(flits + sgl_len(cnt));
 2307 
 2308         return (ndescs);
 2309 }
 2310 
 2311 /**
 2312  *      ofld_xmit - send a packet through an offload queue
 2313  *      @adap: the adapter
 2314  *      @q: the Tx offload queue
 2315  *      @m: the packet
 2316  *
 2317  *      Send an offload packet through an SGE offload queue.
 2318  */
 2319 static int
 2320 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 2321 {
 2322         int ret, nsegs;
 2323         unsigned int ndesc;
 2324         unsigned int pidx, gen;
 2325         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2326         bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
 2327         struct tx_sw_desc *stx;
 2328 
 2329         nsegs = m_get_sgllen(m);
 2330         vsegs = m_get_sgl(m);
 2331         ndesc = calc_tx_descs_ofld(m, nsegs);
 2332         busdma_map_sgl(vsegs, segs, nsegs);
 2333 
 2334         stx = &q->sdesc[q->pidx];
 2335         
 2336         TXQ_LOCK(qs);
 2337 again:  reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2338         ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
 2339         if (__predict_false(ret)) {
 2340                 if (ret == 1) {
 2341                         printf("no ofld desc avail\n");
 2342                         
 2343                         m_set_priority(m, ndesc);     /* save for restart */
 2344                         TXQ_UNLOCK(qs);
 2345                         return (EINTR);
 2346                 }
 2347                 goto again;
 2348         }
 2349 
 2350         gen = q->gen;
 2351         q->in_use += ndesc;
 2352         pidx = q->pidx;
 2353         q->pidx += ndesc;
 2354         if (q->pidx >= q->size) {
 2355                 q->pidx -= q->size;
 2356                 q->gen ^= 1;
 2357         }
 2358 #ifdef T3_TRACE
 2359         T3_TRACE5(adap->tb[q->cntxt_id & 7],
 2360                   "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
 2361                   ndesc, pidx, skb->len, skb->len - skb->data_len,
 2362                   skb_shinfo(skb)->nr_frags);
 2363 #endif
 2364         TXQ_UNLOCK(qs);
 2365 
 2366         write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
 2367         check_ring_tx_db(adap, q, 1);
 2368         return (0);
 2369 }
 2370 
 2371 /**
 2372  *      restart_offloadq - restart a suspended offload queue
 2373  *      @qs: the queue set cotaining the offload queue
 2374  *
 2375  *      Resumes transmission on a suspended Tx offload queue.
 2376  */
 2377 static void
 2378 restart_offloadq(void *data, int npending)
 2379 {
 2380         struct mbuf *m;
 2381         struct sge_qset *qs = data;
 2382         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2383         adapter_t *adap = qs->port->adapter;
 2384         bus_dma_segment_t segs[TX_MAX_SEGS];
 2385         struct tx_sw_desc *stx = &q->sdesc[q->pidx];
 2386         int nsegs, cleaned;
 2387                 
 2388         TXQ_LOCK(qs);
 2389 again:  cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2390 
 2391         while ((m = mbufq_peek(&q->sendq)) != NULL) {
 2392                 unsigned int gen, pidx;
 2393                 unsigned int ndesc = m_get_priority(m);
 2394 
 2395                 if (__predict_false(q->size - q->in_use < ndesc)) {
 2396                         setbit(&qs->txq_stopped, TXQ_OFLD);
 2397                         if (should_restart_tx(q) &&
 2398                             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
 2399                                 goto again;
 2400                         q->stops++;
 2401                         break;
 2402                 }
 2403 
 2404                 gen = q->gen;
 2405                 q->in_use += ndesc;
 2406                 pidx = q->pidx;
 2407                 q->pidx += ndesc;
 2408                 if (q->pidx >= q->size) {
 2409                         q->pidx -= q->size;
 2410                         q->gen ^= 1;
 2411                 }
 2412                 
 2413                 (void)mbufq_dequeue(&q->sendq);
 2414                 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
 2415                 TXQ_UNLOCK(qs);
 2416                 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
 2417                 TXQ_LOCK(qs);
 2418         }
 2419 #if USE_GTS
 2420         set_bit(TXQ_RUNNING, &q->flags);
 2421         set_bit(TXQ_LAST_PKT_DB, &q->flags);
 2422 #endif
 2423         TXQ_UNLOCK(qs);
 2424         wmb();
 2425         t3_write_reg(adap, A_SG_KDOORBELL,
 2426                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 2427 }
 2428 
 2429 /**
 2430  *      queue_set - return the queue set a packet should use
 2431  *      @m: the packet
 2432  *
 2433  *      Maps a packet to the SGE queue set it should use.  The desired queue
 2434  *      set is carried in bits 1-3 in the packet's priority.
 2435  */
 2436 static __inline int
 2437 queue_set(const struct mbuf *m)
 2438 {
 2439         return m_get_priority(m) >> 1;
 2440 }
 2441 
 2442 /**
 2443  *      is_ctrl_pkt - return whether an offload packet is a control packet
 2444  *      @m: the packet
 2445  *
 2446  *      Determines whether an offload packet should use an OFLD or a CTRL
 2447  *      Tx queue.  This is indicated by bit 0 in the packet's priority.
 2448  */
 2449 static __inline int
 2450 is_ctrl_pkt(const struct mbuf *m)
 2451 {
 2452         return m_get_priority(m) & 1;
 2453 }
 2454 
 2455 /**
 2456  *      t3_offload_tx - send an offload packet
 2457  *      @tdev: the offload device to send to
 2458  *      @m: the packet
 2459  *
 2460  *      Sends an offload packet.  We use the packet priority to select the
 2461  *      appropriate Tx queue as follows: bit 0 indicates whether the packet
 2462  *      should be sent as regular or control, bits 1-3 select the queue set.
 2463  */
 2464 int
 2465 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
 2466 {
 2467         adapter_t *adap = tdev2adap(tdev);
 2468         struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
 2469 
 2470         if (__predict_false(is_ctrl_pkt(m))) 
 2471                 return ctrl_xmit(adap, qs, m);
 2472 
 2473         return ofld_xmit(adap, qs, m);
 2474 }
 2475 
 2476 /**
 2477  *      deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
 2478  *      @tdev: the offload device that will be receiving the packets
 2479  *      @q: the SGE response queue that assembled the bundle
 2480  *      @m: the partial bundle
 2481  *      @n: the number of packets in the bundle
 2482  *
 2483  *      Delivers a (partial) bundle of Rx offload packets to an offload device.
 2484  */
 2485 static __inline void
 2486 deliver_partial_bundle(struct t3cdev *tdev,
 2487                         struct sge_rspq *q,
 2488                         struct mbuf *mbufs[], int n)
 2489 {
 2490         if (n) {
 2491                 q->offload_bundles++;
 2492                 cxgb_ofld_recv(tdev, mbufs, n);
 2493         }
 2494 }
 2495 
 2496 static __inline int
 2497 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
 2498     struct mbuf *m, struct mbuf *rx_gather[],
 2499     unsigned int gather_idx)
 2500 {
 2501         
 2502         rq->offload_pkts++;
 2503         m->m_pkthdr.header = mtod(m, void *);
 2504         rx_gather[gather_idx++] = m;
 2505         if (gather_idx == RX_BUNDLE_SIZE) {
 2506                 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
 2507                 gather_idx = 0;
 2508                 rq->offload_bundles++;
 2509         }
 2510         return (gather_idx);
 2511 }
 2512 
 2513 static void
 2514 restart_tx(struct sge_qset *qs)
 2515 {
 2516         struct adapter *sc = qs->port->adapter;
 2517         
 2518         
 2519         if (isset(&qs->txq_stopped, TXQ_OFLD) &&
 2520             should_restart_tx(&qs->txq[TXQ_OFLD]) &&
 2521             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
 2522                 qs->txq[TXQ_OFLD].restarts++;
 2523                 DPRINTF("restarting TXQ_OFLD\n");
 2524                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2525         }
 2526         DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
 2527             qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
 2528             qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
 2529             qs->txq[TXQ_CTRL].in_use);
 2530         
 2531         if (isset(&qs->txq_stopped, TXQ_CTRL) &&
 2532             should_restart_tx(&qs->txq[TXQ_CTRL]) &&
 2533             test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
 2534                 qs->txq[TXQ_CTRL].restarts++;
 2535                 DPRINTF("restarting TXQ_CTRL\n");
 2536                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2537         }
 2538 }
 2539 
 2540 /**
 2541  *      t3_sge_alloc_qset - initialize an SGE queue set
 2542  *      @sc: the controller softc
 2543  *      @id: the queue set id
 2544  *      @nports: how many Ethernet ports will be using this queue set
 2545  *      @irq_vec_idx: the IRQ vector index for response queue interrupts
 2546  *      @p: configuration parameters for this queue set
 2547  *      @ntxq: number of Tx queues for the queue set
 2548  *      @pi: port info for queue set
 2549  *
 2550  *      Allocate resources and initialize an SGE queue set.  A queue set
 2551  *      comprises a response queue, two Rx free-buffer queues, and up to 3
 2552  *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
 2553  *      queue, offload queue, and control queue.
 2554  */
 2555 int
 2556 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
 2557                   const struct qset_params *p, int ntxq, struct port_info *pi)
 2558 {
 2559         struct sge_qset *q = &sc->sge.qs[id];
 2560         int i, ret = 0;
 2561 
 2562         MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
 2563         q->port = pi;
 2564 
 2565         if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
 2566             M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
 2567                 device_printf(sc->dev, "failed to allocate mbuf ring\n");
 2568                 goto err;
 2569         }
 2570         if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
 2571             M_NOWAIT | M_ZERO)) == NULL) {
 2572                 device_printf(sc->dev, "failed to allocate ifq\n");
 2573                 goto err;
 2574         }
 2575         ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);     
 2576         callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
 2577         callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
 2578         q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
 2579         q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
 2580 
 2581         init_qset_cntxt(q, id);
 2582         q->idx = id;
 2583         if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
 2584                     sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
 2585                     &q->fl[0].desc, &q->fl[0].sdesc,
 2586                     &q->fl[0].desc_tag, &q->fl[0].desc_map,
 2587                     sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
 2588                 printf("error %d from alloc ring fl0\n", ret);
 2589                 goto err;
 2590         }
 2591 
 2592         if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
 2593                     sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
 2594                     &q->fl[1].desc, &q->fl[1].sdesc,
 2595                     &q->fl[1].desc_tag, &q->fl[1].desc_map,
 2596                     sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
 2597                 printf("error %d from alloc ring fl1\n", ret);
 2598                 goto err;
 2599         }
 2600 
 2601         if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
 2602                     &q->rspq.phys_addr, &q->rspq.desc, NULL,
 2603                     &q->rspq.desc_tag, &q->rspq.desc_map,
 2604                     NULL, NULL)) != 0) {
 2605                 printf("error %d from alloc ring rspq\n", ret);
 2606                 goto err;
 2607         }
 2608 
 2609         for (i = 0; i < ntxq; ++i) {
 2610                 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
 2611 
 2612                 if ((ret = alloc_ring(sc, p->txq_size[i],
 2613                             sizeof(struct tx_desc), sz,
 2614                             &q->txq[i].phys_addr, &q->txq[i].desc,
 2615                             &q->txq[i].sdesc, &q->txq[i].desc_tag,
 2616                             &q->txq[i].desc_map,
 2617                             sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
 2618                         printf("error %d from alloc ring tx %i\n", ret, i);
 2619                         goto err;
 2620                 }
 2621                 mbufq_init(&q->txq[i].sendq);
 2622                 q->txq[i].gen = 1;
 2623                 q->txq[i].size = p->txq_size[i];
 2624         }
 2625         
 2626         TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
 2627         TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
 2628         TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2629         TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2630 
 2631         q->fl[0].gen = q->fl[1].gen = 1;
 2632         q->fl[0].size = p->fl_size;
 2633         q->fl[1].size = p->jumbo_size;
 2634 
 2635         q->rspq.gen = 1;
 2636         q->rspq.cidx = 0;
 2637         q->rspq.size = p->rspq_size;
 2638 
 2639         q->txq[TXQ_ETH].stop_thres = nports *
 2640             flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
 2641 
 2642         q->fl[0].buf_size = MCLBYTES;
 2643         q->fl[0].zone = zone_pack;
 2644         q->fl[0].type = EXT_PACKET;
 2645 
 2646         if (p->jumbo_buf_size ==  MJUM16BYTES) {
 2647                 q->fl[1].zone = zone_jumbo16;
 2648                 q->fl[1].type = EXT_JUMBO16;
 2649         } else if (p->jumbo_buf_size ==  MJUM9BYTES) {
 2650                 q->fl[1].zone = zone_jumbo9;
 2651                 q->fl[1].type = EXT_JUMBO9;             
 2652         } else if (p->jumbo_buf_size ==  MJUMPAGESIZE) {
 2653                 q->fl[1].zone = zone_jumbop;
 2654                 q->fl[1].type = EXT_JUMBOP;
 2655         } else {
 2656                 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
 2657                 ret = EDOOFUS;
 2658                 goto err;
 2659         }
 2660         q->fl[1].buf_size = p->jumbo_buf_size;
 2661 
 2662         /* Allocate and setup the lro_ctrl structure */
 2663         q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
 2664 #ifdef INET
 2665         ret = tcp_lro_init(&q->lro.ctrl);
 2666         if (ret) {
 2667                 printf("error %d from tcp_lro_init\n", ret);
 2668                 goto err;
 2669         }
 2670 #endif
 2671         q->lro.ctrl.ifp = pi->ifp;
 2672 
 2673         mtx_lock_spin(&sc->sge.reg_lock);
 2674         ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
 2675                                    q->rspq.phys_addr, q->rspq.size,
 2676                                    q->fl[0].buf_size, 1, 0);
 2677         if (ret) {
 2678                 printf("error %d from t3_sge_init_rspcntxt\n", ret);
 2679                 goto err_unlock;
 2680         }
 2681 
 2682         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2683                 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
 2684                                           q->fl[i].phys_addr, q->fl[i].size,
 2685                                           q->fl[i].buf_size, p->cong_thres, 1,
 2686                                           0);
 2687                 if (ret) {
 2688                         printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
 2689                         goto err_unlock;
 2690                 }
 2691         }
 2692 
 2693         ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
 2694                                  SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
 2695                                  q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
 2696                                  1, 0);
 2697         if (ret) {
 2698                 printf("error %d from t3_sge_init_ecntxt\n", ret);
 2699                 goto err_unlock;
 2700         }
 2701 
 2702         if (ntxq > 1) {
 2703                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
 2704                                          USE_GTS, SGE_CNTXT_OFLD, id,
 2705                                          q->txq[TXQ_OFLD].phys_addr,
 2706                                          q->txq[TXQ_OFLD].size, 0, 1, 0);
 2707                 if (ret) {
 2708                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2709                         goto err_unlock;
 2710                 }
 2711         }
 2712 
 2713         if (ntxq > 2) {
 2714                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
 2715                                          SGE_CNTXT_CTRL, id,
 2716                                          q->txq[TXQ_CTRL].phys_addr,
 2717                                          q->txq[TXQ_CTRL].size,
 2718                                          q->txq[TXQ_CTRL].token, 1, 0);
 2719                 if (ret) {
 2720                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2721                         goto err_unlock;
 2722                 }
 2723         }
 2724         
 2725         snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
 2726             device_get_unit(sc->dev), irq_vec_idx);
 2727         MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
 2728         
 2729         mtx_unlock_spin(&sc->sge.reg_lock);
 2730         t3_update_qset_coalesce(q, p);
 2731         q->port = pi;
 2732         
 2733         refill_fl(sc, &q->fl[0], q->fl[0].size);
 2734         refill_fl(sc, &q->fl[1], q->fl[1].size);
 2735         refill_rspq(sc, &q->rspq, q->rspq.size - 1);
 2736 
 2737         t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
 2738                      V_NEWTIMER(q->rspq.holdoff_tmr));
 2739 
 2740         return (0);
 2741 
 2742 err_unlock:
 2743         mtx_unlock_spin(&sc->sge.reg_lock);
 2744 err:    
 2745         TXQ_LOCK(q);
 2746         t3_free_qset(sc, q);
 2747 
 2748         return (ret);
 2749 }
 2750 
 2751 /*
 2752  * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
 2753  * ethernet data.  Hardware assistance with various checksums and any vlan tag
 2754  * will also be taken into account here.
 2755  */
 2756 void
 2757 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
 2758 {
 2759         struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
 2760         struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
 2761         struct ifnet *ifp = pi->ifp;
 2762         
 2763         DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
 2764 
 2765         if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
 2766             cpl->csum_valid && cpl->csum == 0xffff) {
 2767                 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
 2768                 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
 2769                 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
 2770                 m->m_pkthdr.csum_data = 0xffff;
 2771         }
 2772 
 2773         if (cpl->vlan_valid) {
 2774                 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
 2775                 m->m_flags |= M_VLANTAG;
 2776         } 
 2777 
 2778         m->m_pkthdr.rcvif = ifp;
 2779         m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
 2780         /*
 2781          * adjust after conversion to mbuf chain
 2782          */
 2783         m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
 2784         m->m_len -= (sizeof(*cpl) + ethpad);
 2785         m->m_data += (sizeof(*cpl) + ethpad);
 2786 }
 2787 
 2788 /**
 2789  *      get_packet - return the next ingress packet buffer from a free list
 2790  *      @adap: the adapter that received the packet
 2791  *      @drop_thres: # of remaining buffers before we start dropping packets
 2792  *      @qs: the qset that the SGE free list holding the packet belongs to
 2793  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
 2794  *      @r: response descriptor 
 2795  *
 2796  *      Get the next packet from a free list and complete setup of the
 2797  *      sk_buff.  If the packet is small we make a copy and recycle the
 2798  *      original buffer, otherwise we use the original buffer itself.  If a
 2799  *      positive drop threshold is supplied packets are dropped and their
 2800  *      buffers recycled if (a) the number of remaining buffers is under the
 2801  *      threshold and the packet is too big to copy, or (b) the packet should
 2802  *      be copied but there is no memory for the copy.
 2803  */
 2804 static int
 2805 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
 2806     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
 2807 {
 2808 
 2809         unsigned int len_cq =  ntohl(r->len_cq);
 2810         struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
 2811         int mask, cidx = fl->cidx;
 2812         struct rx_sw_desc *sd = &fl->sdesc[cidx];
 2813         uint32_t len = G_RSPD_LEN(len_cq);
 2814         uint32_t flags = M_EXT;
 2815         uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
 2816         caddr_t cl;
 2817         struct mbuf *m;
 2818         int ret = 0;
 2819 
 2820         mask = fl->size - 1;
 2821         prefetch(fl->sdesc[(cidx + 1) & mask].m);
 2822         prefetch(fl->sdesc[(cidx + 2) & mask].m);
 2823         prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
 2824         prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 
 2825 
 2826         fl->credits--;
 2827         bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
 2828         
 2829         if (recycle_enable && len <= SGE_RX_COPY_THRES &&
 2830             sopeop == RSPQ_SOP_EOP) {
 2831                 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
 2832                         goto skip_recycle;
 2833                 cl = mtod(m, void *);
 2834                 memcpy(cl, sd->rxsd_cl, len);
 2835                 recycle_rx_buf(adap, fl, fl->cidx);
 2836                 m->m_pkthdr.len = m->m_len = len;
 2837                 m->m_flags = 0;
 2838                 mh->mh_head = mh->mh_tail = m;
 2839                 ret = 1;
 2840                 goto done;
 2841         } else {
 2842         skip_recycle:
 2843                 bus_dmamap_unload(fl->entry_tag, sd->map);
 2844                 cl = sd->rxsd_cl;
 2845                 m = sd->m;
 2846 
 2847                 if ((sopeop == RSPQ_SOP_EOP) ||
 2848                     (sopeop == RSPQ_SOP))
 2849                         flags |= M_PKTHDR;
 2850                 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags);
 2851                 if (fl->zone == zone_pack) {
 2852                         /*
 2853                          * restore clobbered data pointer
 2854                          */
 2855                         m->m_data = m->m_ext.ext_buf;
 2856                 } else {
 2857                         m_cljset(m, cl, fl->type);
 2858                 }
 2859                 m->m_len = len;
 2860         }               
 2861         switch(sopeop) {
 2862         case RSPQ_SOP_EOP:
 2863                 ret = 1;
 2864                 /* FALLTHROUGH */
 2865         case RSPQ_SOP:
 2866                 mh->mh_head = mh->mh_tail = m;
 2867                 m->m_pkthdr.len = len;
 2868                 break;
 2869         case RSPQ_EOP:
 2870                 ret = 1;
 2871                 /* FALLTHROUGH */
 2872         case RSPQ_NSOP_NEOP:
 2873                 if (mh->mh_tail == NULL) {
 2874                         log(LOG_ERR, "discarding intermediate descriptor entry\n");
 2875                         m_freem(m);
 2876                         break;
 2877                 }
 2878                 mh->mh_tail->m_next = m;
 2879                 mh->mh_tail = m;
 2880                 mh->mh_head->m_pkthdr.len += len;
 2881                 break;
 2882         }
 2883         if (cxgb_debug)
 2884                 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
 2885 done:
 2886         if (++fl->cidx == fl->size)
 2887                 fl->cidx = 0;
 2888 
 2889         return (ret);
 2890 }
 2891 
 2892 /**
 2893  *      handle_rsp_cntrl_info - handles control information in a response
 2894  *      @qs: the queue set corresponding to the response
 2895  *      @flags: the response control flags
 2896  *
 2897  *      Handles the control information of an SGE response, such as GTS
 2898  *      indications and completion credits for the queue set's Tx queues.
 2899  *      HW coalesces credits, we don't do any extra SW coalescing.
 2900  */
 2901 static __inline void
 2902 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
 2903 {
 2904         unsigned int credits;
 2905 
 2906 #if USE_GTS
 2907         if (flags & F_RSPD_TXQ0_GTS)
 2908                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
 2909 #endif
 2910         credits = G_RSPD_TXQ0_CR(flags);
 2911         if (credits) 
 2912                 qs->txq[TXQ_ETH].processed += credits;
 2913 
 2914         credits = G_RSPD_TXQ2_CR(flags);
 2915         if (credits)
 2916                 qs->txq[TXQ_CTRL].processed += credits;
 2917 
 2918 # if USE_GTS
 2919         if (flags & F_RSPD_TXQ1_GTS)
 2920                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
 2921 # endif
 2922         credits = G_RSPD_TXQ1_CR(flags);
 2923         if (credits)
 2924                 qs->txq[TXQ_OFLD].processed += credits;
 2925 
 2926 }
 2927 
 2928 static void
 2929 check_ring_db(adapter_t *adap, struct sge_qset *qs,
 2930     unsigned int sleeping)
 2931 {
 2932         ;
 2933 }
 2934 
 2935 /**
 2936  *      process_responses - process responses from an SGE response queue
 2937  *      @adap: the adapter
 2938  *      @qs: the queue set to which the response queue belongs
 2939  *      @budget: how many responses can be processed in this round
 2940  *
 2941  *      Process responses from an SGE response queue up to the supplied budget.
 2942  *      Responses include received packets as well as credits and other events
 2943  *      for the queues that belong to the response queue's queue set.
 2944  *      A negative budget is effectively unlimited.
 2945  *
 2946  *      Additionally choose the interrupt holdoff time for the next interrupt
 2947  *      on this queue.  If the system is under memory shortage use a fairly
 2948  *      long delay to help recovery.
 2949  */
 2950 static int
 2951 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
 2952 {
 2953         struct sge_rspq *rspq = &qs->rspq;
 2954         struct rsp_desc *r = &rspq->desc[rspq->cidx];
 2955         int budget_left = budget;
 2956         unsigned int sleeping = 0;
 2957         int lro_enabled = qs->lro.enabled;
 2958         int skip_lro;
 2959         struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
 2960         struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
 2961         int ngathered = 0;
 2962         struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
 2963 #ifdef DEBUG    
 2964         static int last_holdoff = 0;
 2965         if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
 2966                 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
 2967                 last_holdoff = rspq->holdoff_tmr;
 2968         }
 2969 #endif
 2970         rspq->next_holdoff = rspq->holdoff_tmr;
 2971 
 2972         while (__predict_true(budget_left && is_new_response(r, rspq))) {
 2973                 int eth, eop = 0, ethpad = 0;
 2974                 uint32_t flags = ntohl(r->flags);
 2975                 uint32_t rss_csum = *(const uint32_t *)r;
 2976                 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
 2977                 
 2978                 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
 2979                 
 2980                 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
 2981                         struct mbuf *m;
 2982 
 2983                         if (cxgb_debug)
 2984                                 printf("async notification\n");
 2985 
 2986                         if (mh->mh_head == NULL) {
 2987                                 mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
 2988                                 m = mh->mh_head;
 2989                         } else {
 2990                                 m = m_gethdr(M_DONTWAIT, MT_DATA);
 2991                         }
 2992                         if (m == NULL)
 2993                                 goto no_mem;
 2994 
 2995                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
 2996                         m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
 2997                         *mtod(m, char *) = CPL_ASYNC_NOTIF;
 2998                         rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
 2999                         eop = 1;
 3000                         rspq->async_notif++;
 3001                         goto skip;
 3002                 } else if  (flags & F_RSPD_IMM_DATA_VALID) {
 3003                         struct mbuf *m = NULL;
 3004 
 3005                         DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
 3006                             r->rss_hdr.opcode, rspq->cidx);
 3007                         if (mh->mh_head == NULL)
 3008                                 mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
 3009                         else 
 3010                                 m = m_gethdr(M_DONTWAIT, MT_DATA);
 3011 
 3012                         if (mh->mh_head == NULL &&  m == NULL) {        
 3013                 no_mem:
 3014                                 rspq->next_holdoff = NOMEM_INTR_DELAY;
 3015                                 budget_left--;
 3016                                 break;
 3017                         }
 3018                         get_imm_packet(adap, r, mh->mh_head);
 3019                         eop = 1;
 3020                         rspq->imm_data++;
 3021                 } else if (r->len_cq) {
 3022                         int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
 3023                         
 3024                         eop = get_packet(adap, drop_thresh, qs, mh, r);
 3025                         if (eop) {
 3026                                 if (r->rss_hdr.hash_type && !adap->timestamp)
 3027                                         mh->mh_head->m_flags |= M_FLOWID;
 3028                                 mh->mh_head->m_pkthdr.flowid = rss_hash;
 3029                         }
 3030                         
 3031                         ethpad = 2;
 3032                 } else {
 3033                         rspq->pure_rsps++;
 3034                 }
 3035         skip:
 3036                 if (flags & RSPD_CTRL_MASK) {
 3037                         sleeping |= flags & RSPD_GTS_MASK;
 3038                         handle_rsp_cntrl_info(qs, flags);
 3039                 }
 3040 
 3041                 r++;
 3042                 if (__predict_false(++rspq->cidx == rspq->size)) {
 3043                         rspq->cidx = 0;
 3044                         rspq->gen ^= 1;
 3045                         r = rspq->desc;
 3046                 }
 3047 
 3048                 if (++rspq->credits >= 64) {
 3049                         refill_rspq(adap, rspq, rspq->credits);
 3050                         rspq->credits = 0;
 3051                 }
 3052                 if (!eth && eop) {
 3053                         mh->mh_head->m_pkthdr.csum_data = rss_csum;
 3054                         /*
 3055                          * XXX size mismatch
 3056                          */
 3057                         m_set_priority(mh->mh_head, rss_hash);
 3058 
 3059                         
 3060                         ngathered = rx_offload(&adap->tdev, rspq,
 3061                             mh->mh_head, offload_mbufs, ngathered);
 3062                         mh->mh_head = NULL;
 3063                         DPRINTF("received offload packet\n");
 3064                         
 3065                 } else if (eth && eop) {
 3066                         struct mbuf *m = mh->mh_head;
 3067 
 3068                         t3_rx_eth(adap, rspq, m, ethpad);
 3069 
 3070                         /*
 3071                          * The T304 sends incoming packets on any qset.  If LRO
 3072                          * is also enabled, we could end up sending packet up
 3073                          * lro_ctrl->ifp's input.  That is incorrect.
 3074                          *
 3075                          * The mbuf's rcvif was derived from the cpl header and
 3076                          * is accurate.  Skip LRO and just use that.
 3077                          */
 3078                         skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
 3079 
 3080                         if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
 3081 #ifdef INET
 3082                             && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
 3083 #endif
 3084                             ) {
 3085                                 /* successfully queue'd for LRO */
 3086                         } else {
 3087                                 /*
 3088                                  * LRO not enabled, packet unsuitable for LRO,
 3089                                  * or unable to queue.  Pass it up right now in
 3090                                  * either case.
 3091                                  */
 3092                                 struct ifnet *ifp = m->m_pkthdr.rcvif;
 3093                                 (*ifp->if_input)(ifp, m);
 3094                         }
 3095                         mh->mh_head = NULL;
 3096 
 3097                 }
 3098                 __refill_fl_lt(adap, &qs->fl[0], 32);
 3099                 __refill_fl_lt(adap, &qs->fl[1], 32);
 3100                 --budget_left;
 3101         }
 3102 
 3103         deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
 3104 
 3105 #ifdef INET
 3106         /* Flush LRO */
 3107         while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
 3108                 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
 3109                 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
 3110                 tcp_lro_flush(lro_ctrl, queued);
 3111         }
 3112 #endif
 3113 
 3114         if (sleeping)
 3115                 check_ring_db(adap, qs, sleeping);
 3116 
 3117         mb();  /* commit Tx queue processed updates */
 3118         if (__predict_false(qs->txq_stopped > 1))
 3119                 restart_tx(qs);
 3120 
 3121         __refill_fl_lt(adap, &qs->fl[0], 512);
 3122         __refill_fl_lt(adap, &qs->fl[1], 512);
 3123         budget -= budget_left;
 3124         return (budget);
 3125 }
 3126 
 3127 /*
 3128  * A helper function that processes responses and issues GTS.
 3129  */
 3130 static __inline int
 3131 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
 3132 {
 3133         int work;
 3134         static int last_holdoff = 0;
 3135         
 3136         work = process_responses(adap, rspq_to_qset(rq), -1);
 3137 
 3138         if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
 3139                 printf("next_holdoff=%d\n", rq->next_holdoff);
 3140                 last_holdoff = rq->next_holdoff;
 3141         }
 3142         t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
 3143             V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
 3144         
 3145         return (work);
 3146 }
 3147 
 3148 
 3149 /*
 3150  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
 3151  * Handles data events from SGE response queues as well as error and other
 3152  * async events as they all use the same interrupt pin.  We use one SGE
 3153  * response queue per port in this mode and protect all response queues with
 3154  * queue 0's lock.
 3155  */
 3156 void
 3157 t3b_intr(void *data)
 3158 {
 3159         uint32_t i, map;
 3160         adapter_t *adap = data;
 3161         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3162         
 3163         t3_write_reg(adap, A_PL_CLI, 0);
 3164         map = t3_read_reg(adap, A_SG_DATA_INTR);
 3165 
 3166         if (!map) 
 3167                 return;
 3168 
 3169         if (__predict_false(map & F_ERRINTR)) {
 3170                 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
 3171                 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
 3172                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3173         }
 3174 
 3175         mtx_lock(&q0->lock);
 3176         for_each_port(adap, i)
 3177             if (map & (1 << i))
 3178                         process_responses_gts(adap, &adap->sge.qs[i].rspq);
 3179         mtx_unlock(&q0->lock);
 3180 }
 3181 
 3182 /*
 3183  * The MSI interrupt handler.  This needs to handle data events from SGE
 3184  * response queues as well as error and other async events as they all use
 3185  * the same MSI vector.  We use one SGE response queue per port in this mode
 3186  * and protect all response queues with queue 0's lock.
 3187  */
 3188 void
 3189 t3_intr_msi(void *data)
 3190 {
 3191         adapter_t *adap = data;
 3192         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3193         int i, new_packets = 0;
 3194 
 3195         mtx_lock(&q0->lock);
 3196 
 3197         for_each_port(adap, i)
 3198             if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 
 3199                     new_packets = 1;
 3200         mtx_unlock(&q0->lock);
 3201         if (new_packets == 0) {
 3202                 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
 3203                 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
 3204                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3205         }
 3206 }
 3207 
 3208 void
 3209 t3_intr_msix(void *data)
 3210 {
 3211         struct sge_qset *qs = data;
 3212         adapter_t *adap = qs->port->adapter;
 3213         struct sge_rspq *rspq = &qs->rspq;
 3214 
 3215         if (process_responses_gts(adap, rspq) == 0)
 3216                 rspq->unhandled_irqs++;
 3217 }
 3218 
 3219 #define QDUMP_SBUF_SIZE         32 * 400
 3220 static int
 3221 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
 3222 {
 3223         struct sge_rspq *rspq;
 3224         struct sge_qset *qs;
 3225         int i, err, dump_end, idx;
 3226         struct sbuf *sb;
 3227         struct rsp_desc *rspd;
 3228         uint32_t data[4];
 3229         
 3230         rspq = arg1;
 3231         qs = rspq_to_qset(rspq);
 3232         if (rspq->rspq_dump_count == 0) 
 3233                 return (0);
 3234         if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
 3235                 log(LOG_WARNING,
 3236                     "dump count is too large %d\n", rspq->rspq_dump_count);
 3237                 rspq->rspq_dump_count = 0;
 3238                 return (EINVAL);
 3239         }
 3240         if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
 3241                 log(LOG_WARNING,
 3242                     "dump start of %d is greater than queue size\n",
 3243                     rspq->rspq_dump_start);
 3244                 rspq->rspq_dump_start = 0;
 3245                 return (EINVAL);
 3246         }
 3247         err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
 3248         if (err)
 3249                 return (err);
 3250         err = sysctl_wire_old_buffer(req, 0);
 3251         if (err)
 3252                 return (err);
 3253         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3254 
 3255         sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
 3256             (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
 3257             ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
 3258         sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
 3259             ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
 3260         
 3261         sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
 3262             (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
 3263         
 3264         dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
 3265         for (i = rspq->rspq_dump_start; i < dump_end; i++) {
 3266                 idx = i & (RSPQ_Q_SIZE-1);
 3267                 
 3268                 rspd = &rspq->desc[idx];
 3269                 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
 3270                     idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
 3271                     rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
 3272                 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
 3273                     rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
 3274                     be32toh(rspd->len_cq), rspd->intr_gen);
 3275         }
 3276 
 3277         err = sbuf_finish(sb);
 3278         /* Output a trailing NUL. */
 3279         if (err == 0)
 3280                 err = SYSCTL_OUT(req, "", 1);
 3281         sbuf_delete(sb);
 3282         return (err);
 3283 }       
 3284 
 3285 static int
 3286 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
 3287 {
 3288         struct sge_txq *txq;
 3289         struct sge_qset *qs;
 3290         int i, j, err, dump_end;
 3291         struct sbuf *sb;
 3292         struct tx_desc *txd;
 3293         uint32_t *WR, wr_hi, wr_lo, gen;
 3294         uint32_t data[4];
 3295         
 3296         txq = arg1;
 3297         qs = txq_to_qset(txq, TXQ_ETH);
 3298         if (txq->txq_dump_count == 0) {
 3299                 return (0);
 3300         }
 3301         if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
 3302                 log(LOG_WARNING,
 3303                     "dump count is too large %d\n", txq->txq_dump_count);
 3304                 txq->txq_dump_count = 1;
 3305                 return (EINVAL);
 3306         }
 3307         if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
 3308                 log(LOG_WARNING,
 3309                     "dump start of %d is greater than queue size\n",
 3310                     txq->txq_dump_start);
 3311                 txq->txq_dump_start = 0;
 3312                 return (EINVAL);
 3313         }
 3314         err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
 3315         if (err)
 3316                 return (err);
 3317         err = sysctl_wire_old_buffer(req, 0);
 3318         if (err)
 3319                 return (err);
 3320         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3321 
 3322         sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
 3323             (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 
 3324             (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
 3325         sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
 3326             ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
 3327             ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
 3328         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3329             txq->txq_dump_start,
 3330             (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
 3331 
 3332         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3333         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3334                 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
 3335                 WR = (uint32_t *)txd->flit;
 3336                 wr_hi = ntohl(WR[0]);
 3337                 wr_lo = ntohl(WR[1]);           
 3338                 gen = G_WR_GEN(wr_lo);
 3339                 
 3340                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3341                     wr_hi, wr_lo, gen);
 3342                 for (j = 2; j < 30; j += 4) 
 3343                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3344                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3345 
 3346         }
 3347         err = sbuf_finish(sb);
 3348         /* Output a trailing NUL. */
 3349         if (err == 0)
 3350                 err = SYSCTL_OUT(req, "", 1);
 3351         sbuf_delete(sb);
 3352         return (err);
 3353 }
 3354 
 3355 static int
 3356 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
 3357 {
 3358         struct sge_txq *txq;
 3359         struct sge_qset *qs;
 3360         int i, j, err, dump_end;
 3361         struct sbuf *sb;
 3362         struct tx_desc *txd;
 3363         uint32_t *WR, wr_hi, wr_lo, gen;
 3364         
 3365         txq = arg1;
 3366         qs = txq_to_qset(txq, TXQ_CTRL);
 3367         if (txq->txq_dump_count == 0) {
 3368                 return (0);
 3369         }
 3370         if (txq->txq_dump_count > 256) {
 3371                 log(LOG_WARNING,
 3372                     "dump count is too large %d\n", txq->txq_dump_count);
 3373                 txq->txq_dump_count = 1;
 3374                 return (EINVAL);
 3375         }
 3376         if (txq->txq_dump_start > 255) {
 3377                 log(LOG_WARNING,
 3378                     "dump start of %d is greater than queue size\n",
 3379                     txq->txq_dump_start);
 3380                 txq->txq_dump_start = 0;
 3381                 return (EINVAL);
 3382         }
 3383 
 3384         err = sysctl_wire_old_buffer(req, 0);
 3385         if (err != 0)
 3386                 return (err);
 3387         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3388         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3389             txq->txq_dump_start,
 3390             (txq->txq_dump_start + txq->txq_dump_count) & 255);
 3391 
 3392         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3393         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3394                 txd = &txq->desc[i & (255)];
 3395                 WR = (uint32_t *)txd->flit;
 3396                 wr_hi = ntohl(WR[0]);
 3397                 wr_lo = ntohl(WR[1]);           
 3398                 gen = G_WR_GEN(wr_lo);
 3399                 
 3400                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3401                     wr_hi, wr_lo, gen);
 3402                 for (j = 2; j < 30; j += 4) 
 3403                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3404                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3405 
 3406         }
 3407         err = sbuf_finish(sb);
 3408         /* Output a trailing NUL. */
 3409         if (err == 0)
 3410                 err = SYSCTL_OUT(req, "", 1);
 3411         sbuf_delete(sb);
 3412         return (err);
 3413 }
 3414 
 3415 static int
 3416 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
 3417 {
 3418         adapter_t *sc = arg1;
 3419         struct qset_params *qsp = &sc->params.sge.qset[0]; 
 3420         int coalesce_usecs;     
 3421         struct sge_qset *qs;
 3422         int i, j, err, nqsets = 0;
 3423         struct mtx *lock;
 3424 
 3425         if ((sc->flags & FULL_INIT_DONE) == 0)
 3426                 return (ENXIO);
 3427                 
 3428         coalesce_usecs = qsp->coalesce_usecs;
 3429         err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
 3430 
 3431         if (err != 0) {
 3432                 return (err);
 3433         }
 3434         if (coalesce_usecs == qsp->coalesce_usecs)
 3435                 return (0);
 3436 
 3437         for (i = 0; i < sc->params.nports; i++) 
 3438                 for (j = 0; j < sc->port[i].nqsets; j++)
 3439                         nqsets++;
 3440 
 3441         coalesce_usecs = max(1, coalesce_usecs);
 3442 
 3443         for (i = 0; i < nqsets; i++) {
 3444                 qs = &sc->sge.qs[i];
 3445                 qsp = &sc->params.sge.qset[i];
 3446                 qsp->coalesce_usecs = coalesce_usecs;
 3447                 
 3448                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 3449                             &sc->sge.qs[0].rspq.lock;
 3450 
 3451                 mtx_lock(lock);
 3452                 t3_update_qset_coalesce(qs, qsp);
 3453                 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
 3454                     V_NEWTIMER(qs->rspq.holdoff_tmr));
 3455                 mtx_unlock(lock);
 3456         }
 3457 
 3458         return (0);
 3459 }
 3460 
 3461 static int
 3462 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)
 3463 {
 3464         adapter_t *sc = arg1;
 3465         int rc, timestamp;
 3466 
 3467         if ((sc->flags & FULL_INIT_DONE) == 0)
 3468                 return (ENXIO);
 3469 
 3470         timestamp = sc->timestamp;
 3471         rc = sysctl_handle_int(oidp, &timestamp, arg2, req);
 3472 
 3473         if (rc != 0)
 3474                 return (rc);
 3475 
 3476         if (timestamp != sc->timestamp) {
 3477                 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS,
 3478                     timestamp ? F_ENABLERXPKTTMSTPRSS : 0);
 3479                 sc->timestamp = timestamp;
 3480         }
 3481 
 3482         return (0);
 3483 }
 3484 
 3485 void
 3486 t3_add_attach_sysctls(adapter_t *sc)
 3487 {
 3488         struct sysctl_ctx_list *ctx;
 3489         struct sysctl_oid_list *children;
 3490 
 3491         ctx = device_get_sysctl_ctx(sc->dev);
 3492         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3493 
 3494         /* random information */
 3495         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3496             "firmware_version",
 3497             CTLFLAG_RD, &sc->fw_version,
 3498             0, "firmware version");
 3499         SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
 3500             "hw_revision",
 3501             CTLFLAG_RD, &sc->params.rev,
 3502             0, "chip model");
 3503         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3504             "port_types",
 3505             CTLFLAG_RD, &sc->port_types,
 3506             0, "type of ports");
 3507         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3508             "enable_debug",
 3509             CTLFLAG_RW, &cxgb_debug,
 3510             0, "enable verbose debugging output");
 3511         SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce",
 3512             CTLFLAG_RD, &sc->tunq_coalesce,
 3513             "#tunneled packets freed");
 3514         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3515             "txq_overrun",
 3516             CTLFLAG_RD, &txq_fills,
 3517             0, "#times txq overrun");
 3518         SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
 3519             "core_clock",
 3520             CTLFLAG_RD, &sc->params.vpd.cclk,
 3521             0, "core clock frequency (in KHz)");
 3522 }
 3523 
 3524 
 3525 static const char *rspq_name = "rspq";
 3526 static const char *txq_names[] =
 3527 {
 3528         "txq_eth",
 3529         "txq_ofld",
 3530         "txq_ctrl"      
 3531 };
 3532 
 3533 static int
 3534 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
 3535 {
 3536         struct port_info *p = arg1;
 3537         uint64_t *parg;
 3538 
 3539         if (!p)
 3540                 return (EINVAL);
 3541 
 3542         parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
 3543         PORT_LOCK(p);
 3544         t3_mac_update_stats(&p->mac);
 3545         PORT_UNLOCK(p);
 3546 
 3547         return (sysctl_handle_64(oidp, parg, 0, req));
 3548 }
 3549 
 3550 void
 3551 t3_add_configured_sysctls(adapter_t *sc)
 3552 {
 3553         struct sysctl_ctx_list *ctx;
 3554         struct sysctl_oid_list *children;
 3555         int i, j;
 3556         
 3557         ctx = device_get_sysctl_ctx(sc->dev);
 3558         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3559 
 3560         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3561             "intr_coal",
 3562             CTLTYPE_INT|CTLFLAG_RW, sc,
 3563             0, t3_set_coalesce_usecs,
 3564             "I", "interrupt coalescing timer (us)");
 3565 
 3566         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3567             "pkt_timestamp",
 3568             CTLTYPE_INT | CTLFLAG_RW, sc,
 3569             0, t3_pkt_timestamp,
 3570             "I", "provide packet timestamp instead of connection hash");
 3571 
 3572         for (i = 0; i < sc->params.nports; i++) {
 3573                 struct port_info *pi = &sc->port[i];
 3574                 struct sysctl_oid *poid;
 3575                 struct sysctl_oid_list *poidlist;
 3576                 struct mac_stats *mstats = &pi->mac.stats;
 3577                 
 3578                 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
 3579                 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 
 3580                     pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
 3581                 poidlist = SYSCTL_CHILDREN(poid);
 3582                 SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO,
 3583                     "nqsets", CTLFLAG_RD, &pi->nqsets,
 3584                     0, "#queue sets");
 3585 
 3586                 for (j = 0; j < pi->nqsets; j++) {
 3587                         struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
 3588                         struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
 3589                                           *ctrlqpoid, *lropoid;
 3590                         struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
 3591                                                *txqpoidlist, *ctrlqpoidlist,
 3592                                                *lropoidlist;
 3593                         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 3594                         
 3595                         snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
 3596                         
 3597                         qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 
 3598                             qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
 3599                         qspoidlist = SYSCTL_CHILDREN(qspoid);
 3600 
 3601                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
 3602                                         CTLFLAG_RD, &qs->fl[0].empty, 0,
 3603                                         "freelist #0 empty");
 3604                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
 3605                                         CTLFLAG_RD, &qs->fl[1].empty, 0,
 3606                                         "freelist #1 empty");
 3607 
 3608                         rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3609                             rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
 3610                         rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
 3611 
 3612                         txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3613                             txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
 3614                         txqpoidlist = SYSCTL_CHILDREN(txqpoid);
 3615 
 3616                         ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3617                             txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
 3618                         ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
 3619 
 3620                         lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3621                             "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
 3622                         lropoidlist = SYSCTL_CHILDREN(lropoid);
 3623 
 3624                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
 3625                             CTLFLAG_RD, &qs->rspq.size,
 3626                             0, "#entries in response queue");
 3627                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
 3628                             CTLFLAG_RD, &qs->rspq.cidx,
 3629                             0, "consumer index");
 3630                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
 3631                             CTLFLAG_RD, &qs->rspq.credits,
 3632                             0, "#credits");
 3633                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
 3634                             CTLFLAG_RD, &qs->rspq.starved,
 3635                             0, "#times starved");
 3636                         SYSCTL_ADD_ULONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
 3637                             CTLFLAG_RD, &qs->rspq.phys_addr,
 3638                             "physical_address_of the queue");
 3639                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
 3640                             CTLFLAG_RW, &qs->rspq.rspq_dump_start,
 3641                             0, "start rspq dump entry");
 3642                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
 3643                             CTLFLAG_RW, &qs->rspq.rspq_dump_count,
 3644                             0, "#rspq entries to dump");
 3645                         SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
 3646                             CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
 3647                             0, t3_dump_rspq, "A", "dump of the response queue");
 3648 
 3649                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
 3650                             CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
 3651                             "#tunneled packets dropped");
 3652                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
 3653                             CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
 3654                             0, "#tunneled packets waiting to be sent");
 3655 #if 0                   
 3656                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
 3657                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
 3658                             0, "#tunneled packets queue producer index");
 3659                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
 3660                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
 3661                             0, "#tunneled packets queue consumer index");
 3662 #endif                  
 3663                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed",
 3664                             CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
 3665                             0, "#tunneled packets processed by the card");
 3666                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
 3667                             CTLFLAG_RD, &txq->cleaned,
 3668                             0, "#tunneled packets cleaned");
 3669                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
 3670                             CTLFLAG_RD, &txq->in_use,
 3671                             0, "#tunneled packet slots in use");
 3672                         SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
 3673                             CTLFLAG_RD, &txq->txq_frees,
 3674                             "#tunneled packets freed");
 3675                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
 3676                             CTLFLAG_RD, &txq->txq_skipped,
 3677                             0, "#tunneled packet descriptors skipped");
 3678                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
 3679                             CTLFLAG_RD, &txq->txq_coalesced,
 3680                             "#tunneled packets coalesced");
 3681                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
 3682                             CTLFLAG_RD, &txq->txq_enqueued,
 3683                             0, "#tunneled packets enqueued to hardware");
 3684                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
 3685                             CTLFLAG_RD, &qs->txq_stopped,
 3686                             0, "tx queues stopped");
 3687                         SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
 3688                             CTLFLAG_RD, &txq->phys_addr,
 3689                             "physical_address_of the queue");
 3690                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
 3691                             CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
 3692                             0, "txq generation");
 3693                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
 3694                             CTLFLAG_RD, &txq->cidx,
 3695                             0, "hardware queue cidx");                  
 3696                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
 3697                             CTLFLAG_RD, &txq->pidx,
 3698                             0, "hardware queue pidx");
 3699                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
 3700                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
 3701                             0, "txq start idx for dump");
 3702                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
 3703                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
 3704                             0, "txq #entries to dump");                 
 3705                         SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
 3706                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
 3707                             0, t3_dump_txq_eth, "A", "dump of the transmit queue");
 3708 
 3709                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
 3710                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
 3711                             0, "ctrlq start idx for dump");
 3712                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
 3713                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
 3714                             0, "ctrl #entries to dump");                        
 3715                         SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
 3716                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
 3717                             0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
 3718 
 3719                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
 3720                             CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
 3721                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
 3722                             CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
 3723                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
 3724                             CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
 3725                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
 3726                             CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
 3727                 }
 3728 
 3729                 /* Now add a node for mac stats. */
 3730                 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
 3731                     CTLFLAG_RD, NULL, "MAC statistics");
 3732                 poidlist = SYSCTL_CHILDREN(poid);
 3733 
 3734                 /*
 3735                  * We (ab)use the length argument (arg2) to pass on the offset
 3736                  * of the data that we are interested in.  This is only required
 3737                  * for the quad counters that are updated from the hardware (we
 3738                  * make sure that we return the latest value).
 3739                  * sysctl_handle_macstat first updates *all* the counters from
 3740                  * the hardware, and then returns the latest value of the
 3741                  * requested counter.  Best would be to update only the
 3742                  * requested counter from hardware, but t3_mac_update_stats()
 3743                  * hides all the register details and we don't want to dive into
 3744                  * all that here.
 3745                  */
 3746 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
 3747     (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
 3748     sysctl_handle_macstat, "QU", 0)
 3749                 CXGB_SYSCTL_ADD_QUAD(tx_octets);
 3750                 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
 3751                 CXGB_SYSCTL_ADD_QUAD(tx_frames);
 3752                 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
 3753                 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
 3754                 CXGB_SYSCTL_ADD_QUAD(tx_pause);
 3755                 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
 3756                 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
 3757                 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
 3758                 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
 3759                 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
 3760                 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
 3761                 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
 3762                 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
 3763                 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
 3764                 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
 3765                 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
 3766                 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
 3767                 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
 3768                 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
 3769                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
 3770                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
 3771                 CXGB_SYSCTL_ADD_QUAD(rx_octets);
 3772                 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
 3773                 CXGB_SYSCTL_ADD_QUAD(rx_frames);
 3774                 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
 3775                 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
 3776                 CXGB_SYSCTL_ADD_QUAD(rx_pause);
 3777                 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
 3778                 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
 3779                 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
 3780                 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
 3781                 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
 3782                 CXGB_SYSCTL_ADD_QUAD(rx_runt);
 3783                 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
 3784                 CXGB_SYSCTL_ADD_QUAD(rx_short);
 3785                 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
 3786                 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
 3787                 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
 3788                 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
 3789                 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
 3790                 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
 3791                 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
 3792                 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
 3793                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
 3794                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
 3795 #undef CXGB_SYSCTL_ADD_QUAD
 3796 
 3797 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
 3798     CTLFLAG_RD, &mstats->a, 0)
 3799                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
 3800                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
 3801                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
 3802                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
 3803                 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
 3804                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
 3805                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
 3806                 CXGB_SYSCTL_ADD_ULONG(num_toggled);
 3807                 CXGB_SYSCTL_ADD_ULONG(num_resets);
 3808                 CXGB_SYSCTL_ADD_ULONG(link_faults);
 3809 #undef CXGB_SYSCTL_ADD_ULONG
 3810         }
 3811 }
 3812         
 3813 /**
 3814  *      t3_get_desc - dump an SGE descriptor for debugging purposes
 3815  *      @qs: the queue set
 3816  *      @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
 3817  *      @idx: the descriptor index in the queue
 3818  *      @data: where to dump the descriptor contents
 3819  *
 3820  *      Dumps the contents of a HW descriptor of an SGE queue.  Returns the
 3821  *      size of the descriptor.
 3822  */
 3823 int
 3824 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
 3825                 unsigned char *data)
 3826 {
 3827         if (qnum >= 6)
 3828                 return (EINVAL);
 3829 
 3830         if (qnum < 3) {
 3831                 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
 3832                         return -EINVAL;
 3833                 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
 3834                 return sizeof(struct tx_desc);
 3835         }
 3836 
 3837         if (qnum == 3) {
 3838                 if (!qs->rspq.desc || idx >= qs->rspq.size)
 3839                         return (EINVAL);
 3840                 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
 3841                 return sizeof(struct rsp_desc);
 3842         }
 3843 
 3844         qnum -= 4;
 3845         if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
 3846                 return (EINVAL);
 3847         memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
 3848         return sizeof(struct rx_desc);
 3849 }

Cache object: 150edb276b3a9352feb6c4cd42f3e6cc


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.