The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/cxgb/cxgb_sge.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /**************************************************************************
    2 
    3 Copyright (c) 2007-2009, Chelsio Inc.
    4 All rights reserved.
    5 
    6 Redistribution and use in source and binary forms, with or without
    7 modification, are permitted provided that the following conditions are met:
    8 
    9  1. Redistributions of source code must retain the above copyright notice,
   10     this list of conditions and the following disclaimer.
   11 
   12  2. Neither the name of the Chelsio Corporation nor the names of its
   13     contributors may be used to endorse or promote products derived from
   14     this software without specific prior written permission.
   15  
   16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   26 POSSIBILITY OF SUCH DAMAGE.
   27 
   28 ***************************************************************************/
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD: releng/8.3/sys/dev/cxgb/cxgb_sge.c 231598 2012-02-13 19:18:08Z np $");
   32 
   33 #include "opt_inet.h"
   34 
   35 #include <sys/param.h>
   36 #include <sys/systm.h>
   37 #include <sys/kernel.h>
   38 #include <sys/module.h>
   39 #include <sys/bus.h>
   40 #include <sys/conf.h>
   41 #include <machine/bus.h>
   42 #include <machine/resource.h>
   43 #include <sys/bus_dma.h>
   44 #include <sys/rman.h>
   45 #include <sys/queue.h>
   46 #include <sys/sysctl.h>
   47 #include <sys/taskqueue.h>
   48 
   49 #include <sys/proc.h>
   50 #include <sys/sbuf.h>
   51 #include <sys/sched.h>
   52 #include <sys/smp.h>
   53 #include <sys/systm.h>
   54 #include <sys/syslog.h>
   55 #include <sys/socket.h>
   56 
   57 #include <net/bpf.h>    
   58 #include <net/ethernet.h>
   59 #include <net/if.h>
   60 #include <net/if_vlan_var.h>
   61 
   62 #include <netinet/in_systm.h>
   63 #include <netinet/in.h>
   64 #include <netinet/ip.h>
   65 #include <netinet/tcp.h>
   66 
   67 #include <dev/pci/pcireg.h>
   68 #include <dev/pci/pcivar.h>
   69 
   70 #include <vm/vm.h>
   71 #include <vm/pmap.h>
   72 
   73 #include <cxgb_include.h>
   74 #include <sys/mvec.h>
   75 
   76 int     txq_fills = 0;
   77 int     multiq_tx_enable = 1;
   78 
   79 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
   80 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
   81 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
   82 SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
   83     "size of per-queue mbuf ring");
   84 
   85 static int cxgb_tx_coalesce_force = 0;
   86 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force);
   87 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW,
   88     &cxgb_tx_coalesce_force, 0,
   89     "coalesce small packets into a single work request regardless of ring state");
   90 
   91 #define COALESCE_START_DEFAULT          TX_ETH_Q_SIZE>>1
   92 #define COALESCE_START_MAX              (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
   93 #define COALESCE_STOP_DEFAULT           TX_ETH_Q_SIZE>>2
   94 #define COALESCE_STOP_MIN               TX_ETH_Q_SIZE>>5
   95 #define TX_RECLAIM_DEFAULT              TX_ETH_Q_SIZE>>5
   96 #define TX_RECLAIM_MAX                  TX_ETH_Q_SIZE>>2
   97 #define TX_RECLAIM_MIN                  TX_ETH_Q_SIZE>>6
   98 
   99 
  100 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
  101 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start",
  102     &cxgb_tx_coalesce_enable_start);
  103 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW,
  104     &cxgb_tx_coalesce_enable_start, 0,
  105     "coalesce enable threshold");
  106 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
  107 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop);
  108 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW,
  109     &cxgb_tx_coalesce_enable_stop, 0,
  110     "coalesce disable threshold");
  111 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  112 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold);
  113 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW,
  114     &cxgb_tx_reclaim_threshold, 0,
  115     "tx cleaning minimum threshold");
  116 
  117 /*
  118  * XXX don't re-enable this until TOE stops assuming
  119  * we have an m_ext
  120  */
  121 static int recycle_enable = 0;
  122 
  123 extern int cxgb_use_16k_clusters;
  124 extern int nmbjumbop;
  125 extern int nmbjumbo9;
  126 extern int nmbjumbo16;
  127 
  128 #define USE_GTS 0
  129 
  130 #define SGE_RX_SM_BUF_SIZE      1536
  131 #define SGE_RX_DROP_THRES       16
  132 #define SGE_RX_COPY_THRES       128
  133 
  134 /*
  135  * Period of the Tx buffer reclaim timer.  This timer does not need to run
  136  * frequently as Tx buffers are usually reclaimed by new Tx packets.
  137  */
  138 #define TX_RECLAIM_PERIOD       (hz >> 1)
  139 
  140 /* 
  141  * Values for sge_txq.flags
  142  */
  143 enum {
  144         TXQ_RUNNING     = 1 << 0,  /* fetch engine is running */
  145         TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
  146 };
  147 
  148 struct tx_desc {
  149         uint64_t        flit[TX_DESC_FLITS];
  150 } __packed;
  151 
  152 struct rx_desc {
  153         uint32_t        addr_lo;
  154         uint32_t        len_gen;
  155         uint32_t        gen2;
  156         uint32_t        addr_hi;
  157 } __packed;
  158 
  159 struct rsp_desc {               /* response queue descriptor */
  160         struct rss_header       rss_hdr;
  161         uint32_t                flags;
  162         uint32_t                len_cq;
  163         uint8_t                 imm_data[47];
  164         uint8_t                 intr_gen;
  165 } __packed;
  166 
  167 #define RX_SW_DESC_MAP_CREATED  (1 << 0)
  168 #define TX_SW_DESC_MAP_CREATED  (1 << 1)
  169 #define RX_SW_DESC_INUSE        (1 << 3)
  170 #define TX_SW_DESC_MAPPED       (1 << 4)
  171 
  172 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
  173 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
  174 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
  175 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
  176 
  177 struct tx_sw_desc {                /* SW state per Tx descriptor */
  178         struct mbuf     *m;
  179         bus_dmamap_t    map;
  180         int             flags;
  181 };
  182 
  183 struct rx_sw_desc {                /* SW state per Rx descriptor */
  184         caddr_t         rxsd_cl;
  185         struct mbuf     *m;
  186         bus_dmamap_t    map;
  187         int             flags;
  188 };
  189 
  190 struct txq_state {
  191         unsigned int    compl;
  192         unsigned int    gen;
  193         unsigned int    pidx;
  194 };
  195 
  196 struct refill_fl_cb_arg {
  197         int               error;
  198         bus_dma_segment_t seg;
  199         int               nseg;
  200 };
  201 
  202 
  203 /*
  204  * Maps a number of flits to the number of Tx descriptors that can hold them.
  205  * The formula is
  206  *
  207  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
  208  *
  209  * HW allows up to 4 descriptors to be combined into a WR.
  210  */
  211 static uint8_t flit_desc_map[] = {
  212         0,
  213 #if SGE_NUM_GENBITS == 1
  214         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  215         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  216         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  217         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
  218 #elif SGE_NUM_GENBITS == 2
  219         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  220         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  221         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  222         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  223 #else
  224 # error "SGE_NUM_GENBITS must be 1 or 2"
  225 #endif
  226 };
  227 
  228 #define TXQ_LOCK_ASSERT(qs)     mtx_assert(&(qs)->lock, MA_OWNED)
  229 #define TXQ_TRYLOCK(qs)         mtx_trylock(&(qs)->lock)        
  230 #define TXQ_LOCK(qs)            mtx_lock(&(qs)->lock)   
  231 #define TXQ_UNLOCK(qs)          mtx_unlock(&(qs)->lock) 
  232 #define TXQ_RING_EMPTY(qs)      drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  233 #define TXQ_RING_NEEDS_ENQUEUE(qs)                                      \
  234         drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  235 #define TXQ_RING_FLUSH(qs)      drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  236 #define TXQ_RING_DEQUEUE_COND(qs, func, arg)                            \
  237         drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
  238 #define TXQ_RING_DEQUEUE(qs) \
  239         drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  240 
  241 int cxgb_debug = 0;
  242 
  243 static void sge_timer_cb(void *arg);
  244 static void sge_timer_reclaim(void *arg, int ncount);
  245 static void sge_txq_reclaim_handler(void *arg, int ncount);
  246 static void cxgb_start_locked(struct sge_qset *qs);
  247 
  248 /*
  249  * XXX need to cope with bursty scheduling by looking at a wider
  250  * window than we are now for determining the need for coalescing
  251  *
  252  */
  253 static __inline uint64_t
  254 check_pkt_coalesce(struct sge_qset *qs) 
  255 { 
  256         struct adapter *sc; 
  257         struct sge_txq *txq; 
  258         uint8_t *fill;
  259 
  260         if (__predict_false(cxgb_tx_coalesce_force))
  261                 return (1);
  262         txq = &qs->txq[TXQ_ETH]; 
  263         sc = qs->port->adapter; 
  264         fill = &sc->tunq_fill[qs->idx];
  265 
  266         if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
  267                 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
  268         if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
  269                 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
  270         /*
  271          * if the hardware transmit queue is more than 1/8 full
  272          * we mark it as coalescing - we drop back from coalescing
  273          * when we go below 1/32 full and there are no packets enqueued, 
  274          * this provides us with some degree of hysteresis
  275          */
  276         if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
  277             TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
  278                 *fill = 0; 
  279         else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
  280                 *fill = 1; 
  281 
  282         return (sc->tunq_coalesce);
  283 } 
  284 
  285 #ifdef __LP64__
  286 static void
  287 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  288 {
  289         uint64_t wr_hilo;
  290 #if _BYTE_ORDER == _LITTLE_ENDIAN
  291         wr_hilo = wr_hi;
  292         wr_hilo |= (((uint64_t)wr_lo)<<32);
  293 #else
  294         wr_hilo = wr_lo;
  295         wr_hilo |= (((uint64_t)wr_hi)<<32);
  296 #endif  
  297         wrp->wrh_hilo = wr_hilo;
  298 }
  299 #else
  300 static void
  301 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  302 {
  303 
  304         wrp->wrh_hi = wr_hi;
  305         wmb();
  306         wrp->wrh_lo = wr_lo;
  307 }
  308 #endif
  309 
  310 struct coalesce_info {
  311         int count;
  312         int nbytes;
  313 };
  314 
  315 static int
  316 coalesce_check(struct mbuf *m, void *arg)
  317 {
  318         struct coalesce_info *ci = arg;
  319         int *count = &ci->count;
  320         int *nbytes = &ci->nbytes;
  321 
  322         if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
  323                 (*count < 7) && (m->m_next == NULL))) {
  324                 *count += 1;
  325                 *nbytes += m->m_len;
  326                 return (1);
  327         }
  328         return (0);
  329 }
  330 
  331 static struct mbuf *
  332 cxgb_dequeue(struct sge_qset *qs)
  333 {
  334         struct mbuf *m, *m_head, *m_tail;
  335         struct coalesce_info ci;
  336 
  337         
  338         if (check_pkt_coalesce(qs) == 0) 
  339                 return TXQ_RING_DEQUEUE(qs);
  340 
  341         m_head = m_tail = NULL;
  342         ci.count = ci.nbytes = 0;
  343         do {
  344                 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
  345                 if (m_head == NULL) {
  346                         m_tail = m_head = m;
  347                 } else if (m != NULL) {
  348                         m_tail->m_nextpkt = m;
  349                         m_tail = m;
  350                 }
  351         } while (m != NULL);
  352         if (ci.count > 7)
  353                 panic("trying to coalesce %d packets in to one WR", ci.count);
  354         return (m_head);
  355 }
  356         
  357 /**
  358  *      reclaim_completed_tx - reclaims completed Tx descriptors
  359  *      @adapter: the adapter
  360  *      @q: the Tx queue to reclaim completed descriptors from
  361  *
  362  *      Reclaims Tx descriptors that the SGE has indicated it has processed,
  363  *      and frees the associated buffers if possible.  Called with the Tx
  364  *      queue's lock held.
  365  */
  366 static __inline int
  367 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
  368 {
  369         struct sge_txq *q = &qs->txq[queue];
  370         int reclaim = desc_reclaimable(q);
  371 
  372         if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
  373             (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
  374                 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  375 
  376         if (reclaim < reclaim_min)
  377                 return (0);
  378 
  379         mtx_assert(&qs->lock, MA_OWNED);
  380         if (reclaim > 0) {
  381                 t3_free_tx_desc(qs, reclaim, queue);
  382                 q->cleaned += reclaim;
  383                 q->in_use -= reclaim;
  384         }
  385         if (isset(&qs->txq_stopped, TXQ_ETH))
  386                 clrbit(&qs->txq_stopped, TXQ_ETH);
  387 
  388         return (reclaim);
  389 }
  390 
  391 /**
  392  *      should_restart_tx - are there enough resources to restart a Tx queue?
  393  *      @q: the Tx queue
  394  *
  395  *      Checks if there are enough descriptors to restart a suspended Tx queue.
  396  */
  397 static __inline int
  398 should_restart_tx(const struct sge_txq *q)
  399 {
  400         unsigned int r = q->processed - q->cleaned;
  401 
  402         return q->in_use - r < (q->size >> 1);
  403 }
  404 
  405 /**
  406  *      t3_sge_init - initialize SGE
  407  *      @adap: the adapter
  408  *      @p: the SGE parameters
  409  *
  410  *      Performs SGE initialization needed every time after a chip reset.
  411  *      We do not initialize any of the queue sets here, instead the driver
  412  *      top-level must request those individually.  We also do not enable DMA
  413  *      here, that should be done after the queues have been set up.
  414  */
  415 void
  416 t3_sge_init(adapter_t *adap, struct sge_params *p)
  417 {
  418         u_int ctrl, ups;
  419 
  420         ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
  421 
  422         ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
  423                F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
  424                V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
  425                V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
  426 #if SGE_NUM_GENBITS == 1
  427         ctrl |= F_EGRGENCTRL;
  428 #endif
  429         if (adap->params.rev > 0) {
  430                 if (!(adap->flags & (USING_MSIX | USING_MSI)))
  431                         ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
  432         }
  433         t3_write_reg(adap, A_SG_CONTROL, ctrl);
  434         t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
  435                      V_LORCQDRBTHRSH(512));
  436         t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
  437         t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
  438                      V_TIMEOUT(200 * core_ticks_per_usec(adap)));
  439         t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
  440                      adap->params.rev < T3_REV_C ? 1000 : 500);
  441         t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
  442         t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
  443         t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
  444         t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
  445         t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
  446 }
  447 
  448 
  449 /**
  450  *      sgl_len - calculates the size of an SGL of the given capacity
  451  *      @n: the number of SGL entries
  452  *
  453  *      Calculates the number of flits needed for a scatter/gather list that
  454  *      can hold the given number of entries.
  455  */
  456 static __inline unsigned int
  457 sgl_len(unsigned int n)
  458 {
  459         return ((3 * n) / 2 + (n & 1));
  460 }
  461 
  462 /**
  463  *      get_imm_packet - return the next ingress packet buffer from a response
  464  *      @resp: the response descriptor containing the packet data
  465  *
  466  *      Return a packet containing the immediate data of the given response.
  467  */
  468 static int
  469 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
  470 {
  471 
  472         m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
  473         m->m_ext.ext_buf = NULL;
  474         m->m_ext.ext_type = 0;
  475         memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 
  476         return (0);     
  477 }
  478 
  479 static __inline u_int
  480 flits_to_desc(u_int n)
  481 {
  482         return (flit_desc_map[n]);
  483 }
  484 
  485 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
  486                     F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
  487                     V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
  488                     F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
  489                     F_HIRCQPARITYERROR)
  490 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
  491 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
  492                       F_RSPQDISABLED)
  493 
  494 /**
  495  *      t3_sge_err_intr_handler - SGE async event interrupt handler
  496  *      @adapter: the adapter
  497  *
  498  *      Interrupt handler for SGE asynchronous (non-data) events.
  499  */
  500 void
  501 t3_sge_err_intr_handler(adapter_t *adapter)
  502 {
  503         unsigned int v, status;
  504 
  505         status = t3_read_reg(adapter, A_SG_INT_CAUSE);
  506         if (status & SGE_PARERR)
  507                 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
  508                          status & SGE_PARERR);
  509         if (status & SGE_FRAMINGERR)
  510                 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
  511                          status & SGE_FRAMINGERR);
  512         if (status & F_RSPQCREDITOVERFOW)
  513                 CH_ALERT(adapter, "SGE response queue credit overflow\n");
  514 
  515         if (status & F_RSPQDISABLED) {
  516                 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
  517 
  518                 CH_ALERT(adapter,
  519                          "packet delivered to disabled response queue (0x%x)\n",
  520                          (v >> S_RSPQ0DISABLED) & 0xff);
  521         }
  522 
  523         t3_write_reg(adapter, A_SG_INT_CAUSE, status);
  524         if (status & SGE_FATALERR)
  525                 t3_fatal_err(adapter);
  526 }
  527 
  528 void
  529 t3_sge_prep(adapter_t *adap, struct sge_params *p)
  530 {
  531         int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
  532 
  533         nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
  534         nqsets *= adap->params.nports;
  535 
  536         fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
  537 
  538         while (!powerof2(fl_q_size))
  539                 fl_q_size--;
  540 
  541         use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
  542             is_offload(adap);
  543 
  544 #if __FreeBSD_version >= 700111
  545         if (use_16k) {
  546                 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
  547                 jumbo_buf_size = MJUM16BYTES;
  548         } else {
  549                 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
  550                 jumbo_buf_size = MJUM9BYTES;
  551         }
  552 #else
  553         jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE);
  554         jumbo_buf_size = MJUMPAGESIZE;
  555 #endif
  556         while (!powerof2(jumbo_q_size))
  557                 jumbo_q_size--;
  558 
  559         if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
  560                 device_printf(adap->dev,
  561                     "Insufficient clusters and/or jumbo buffers.\n");
  562 
  563         p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
  564 
  565         for (i = 0; i < SGE_QSETS; ++i) {
  566                 struct qset_params *q = p->qset + i;
  567 
  568                 if (adap->params.nports > 2) {
  569                         q->coalesce_usecs = 50;
  570                 } else {
  571 #ifdef INVARIANTS                       
  572                         q->coalesce_usecs = 10;
  573 #else
  574                         q->coalesce_usecs = 5;
  575 #endif                  
  576                 }
  577                 q->polling = 0;
  578                 q->rspq_size = RSPQ_Q_SIZE;
  579                 q->fl_size = fl_q_size;
  580                 q->jumbo_size = jumbo_q_size;
  581                 q->jumbo_buf_size = jumbo_buf_size;
  582                 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
  583                 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
  584                 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
  585                 q->cong_thres = 0;
  586         }
  587 }
  588 
  589 int
  590 t3_sge_alloc(adapter_t *sc)
  591 {
  592 
  593         /* The parent tag. */
  594         if (bus_dma_tag_create( NULL,                   /* parent */
  595                                 1, 0,                   /* algnmnt, boundary */
  596                                 BUS_SPACE_MAXADDR,      /* lowaddr */
  597                                 BUS_SPACE_MAXADDR,      /* highaddr */
  598                                 NULL, NULL,             /* filter, filterarg */
  599                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
  600                                 BUS_SPACE_UNRESTRICTED, /* nsegments */
  601                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
  602                                 0,                      /* flags */
  603                                 NULL, NULL,             /* lock, lockarg */
  604                                 &sc->parent_dmat)) {
  605                 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
  606                 return (ENOMEM);
  607         }
  608 
  609         /*
  610          * DMA tag for normal sized RX frames
  611          */
  612         if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
  613                 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
  614                 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
  615                 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
  616                 return (ENOMEM);
  617         }
  618 
  619         /* 
  620          * DMA tag for jumbo sized RX frames.
  621          */
  622         if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
  623                 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
  624                 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
  625                 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
  626                 return (ENOMEM);
  627         }
  628 
  629         /* 
  630          * DMA tag for TX frames.
  631          */
  632         if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
  633                 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  634                 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  635                 NULL, NULL, &sc->tx_dmat)) {
  636                 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
  637                 return (ENOMEM);
  638         }
  639 
  640         return (0);
  641 }
  642 
  643 int
  644 t3_sge_free(struct adapter * sc)
  645 {
  646 
  647         if (sc->tx_dmat != NULL)
  648                 bus_dma_tag_destroy(sc->tx_dmat);
  649 
  650         if (sc->rx_jumbo_dmat != NULL)
  651                 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
  652 
  653         if (sc->rx_dmat != NULL)
  654                 bus_dma_tag_destroy(sc->rx_dmat);
  655 
  656         if (sc->parent_dmat != NULL)
  657                 bus_dma_tag_destroy(sc->parent_dmat);
  658 
  659         return (0);
  660 }
  661 
  662 void
  663 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
  664 {
  665 
  666         qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
  667         qs->rspq.polling = 0 /* p->polling */;
  668 }
  669 
  670 #if !defined(__i386__) && !defined(__amd64__)
  671 static void
  672 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
  673 {
  674         struct refill_fl_cb_arg *cb_arg = arg;
  675         
  676         cb_arg->error = error;
  677         cb_arg->seg = segs[0];
  678         cb_arg->nseg = nseg;
  679 
  680 }
  681 #endif
  682 /**
  683  *      refill_fl - refill an SGE free-buffer list
  684  *      @sc: the controller softc
  685  *      @q: the free-list to refill
  686  *      @n: the number of new buffers to allocate
  687  *
  688  *      (Re)populate an SGE free-buffer list with up to @n new packet buffers.
  689  *      The caller must assure that @n does not exceed the queue's capacity.
  690  */
  691 static void
  692 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
  693 {
  694         struct rx_sw_desc *sd = &q->sdesc[q->pidx];
  695         struct rx_desc *d = &q->desc[q->pidx];
  696         struct refill_fl_cb_arg cb_arg;
  697         struct mbuf *m;
  698         caddr_t cl;
  699         int err;
  700         
  701         cb_arg.error = 0;
  702         while (n--) {
  703                 /*
  704                  * We only allocate a cluster, mbuf allocation happens after rx
  705                  */
  706                 if (q->zone == zone_pack) {
  707                         if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
  708                                 break;
  709                         cl = m->m_ext.ext_buf;                  
  710                 } else {
  711                         if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
  712                                 break;
  713                         if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
  714                                 uma_zfree(q->zone, cl);
  715                                 break;
  716                         }
  717                 }
  718                 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
  719                         if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
  720                                 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
  721                                 uma_zfree(q->zone, cl);
  722                                 goto done;
  723                         }
  724                         sd->flags |= RX_SW_DESC_MAP_CREATED;
  725                 }
  726 #if !defined(__i386__) && !defined(__amd64__)
  727                 err = bus_dmamap_load(q->entry_tag, sd->map,
  728                     cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
  729                 
  730                 if (err != 0 || cb_arg.error) {
  731                         if (q->zone == zone_pack)
  732                                 uma_zfree(q->zone, cl);
  733                         m_free(m);
  734                         goto done;
  735                 }
  736 #else
  737                 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
  738 #endif          
  739                 sd->flags |= RX_SW_DESC_INUSE;
  740                 sd->rxsd_cl = cl;
  741                 sd->m = m;
  742                 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
  743                 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
  744                 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
  745                 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
  746 
  747                 d++;
  748                 sd++;
  749 
  750                 if (++q->pidx == q->size) {
  751                         q->pidx = 0;
  752                         q->gen ^= 1;
  753                         sd = q->sdesc;
  754                         d = q->desc;
  755                 }
  756                 q->credits++;
  757                 q->db_pending++;
  758         }
  759 
  760 done:
  761         if (q->db_pending >= 32) {
  762                 q->db_pending = 0;
  763                 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  764         }
  765 }
  766 
  767 
  768 /**
  769  *      free_rx_bufs - free the Rx buffers on an SGE free list
  770  *      @sc: the controle softc
  771  *      @q: the SGE free list to clean up
  772  *
  773  *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
  774  *      this queue should be stopped before calling this function.
  775  */
  776 static void
  777 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
  778 {
  779         u_int cidx = q->cidx;
  780 
  781         while (q->credits--) {
  782                 struct rx_sw_desc *d = &q->sdesc[cidx];
  783 
  784                 if (d->flags & RX_SW_DESC_INUSE) {
  785                         bus_dmamap_unload(q->entry_tag, d->map);
  786                         bus_dmamap_destroy(q->entry_tag, d->map);
  787                         if (q->zone == zone_pack) {
  788                                 m_init(d->m, zone_pack, MCLBYTES,
  789                                     M_NOWAIT, MT_DATA, M_EXT);
  790                                 uma_zfree(zone_pack, d->m);
  791                         } else {
  792                                 m_init(d->m, zone_mbuf, MLEN,
  793                                     M_NOWAIT, MT_DATA, 0);
  794                                 uma_zfree(zone_mbuf, d->m);
  795                                 uma_zfree(q->zone, d->rxsd_cl);
  796                         }                       
  797                 }
  798                 
  799                 d->rxsd_cl = NULL;
  800                 d->m = NULL;
  801                 if (++cidx == q->size)
  802                         cidx = 0;
  803         }
  804 }
  805 
  806 static __inline void
  807 __refill_fl(adapter_t *adap, struct sge_fl *fl)
  808 {
  809         refill_fl(adap, fl, min(16U, fl->size - fl->credits));
  810 }
  811 
  812 static __inline void
  813 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
  814 {
  815         uint32_t reclaimable = fl->size - fl->credits;
  816 
  817         if (reclaimable > 0)
  818                 refill_fl(adap, fl, min(max, reclaimable));
  819 }
  820 
  821 /**
  822  *      recycle_rx_buf - recycle a receive buffer
  823  *      @adapter: the adapter
  824  *      @q: the SGE free list
  825  *      @idx: index of buffer to recycle
  826  *
  827  *      Recycles the specified buffer on the given free list by adding it at
  828  *      the next available slot on the list.
  829  */
  830 static void
  831 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
  832 {
  833         struct rx_desc *from = &q->desc[idx];
  834         struct rx_desc *to   = &q->desc[q->pidx];
  835 
  836         q->sdesc[q->pidx] = q->sdesc[idx];
  837         to->addr_lo = from->addr_lo;        // already big endian
  838         to->addr_hi = from->addr_hi;        // likewise
  839         wmb();  /* necessary ? */
  840         to->len_gen = htobe32(V_FLD_GEN1(q->gen));
  841         to->gen2 = htobe32(V_FLD_GEN2(q->gen));
  842         q->credits++;
  843 
  844         if (++q->pidx == q->size) {
  845                 q->pidx = 0;
  846                 q->gen ^= 1;
  847         }
  848         t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  849 }
  850 
  851 static void
  852 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
  853 {
  854         uint32_t *addr;
  855 
  856         addr = arg;
  857         *addr = segs[0].ds_addr;
  858 }
  859 
  860 static int
  861 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
  862     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
  863     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
  864 {
  865         size_t len = nelem * elem_size;
  866         void *s = NULL;
  867         void *p = NULL;
  868         int err;
  869 
  870         if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
  871                                       BUS_SPACE_MAXADDR_32BIT,
  872                                       BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
  873                                       len, 0, NULL, NULL, tag)) != 0) {
  874                 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
  875                 return (ENOMEM);
  876         }
  877 
  878         if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
  879                                     map)) != 0) {
  880                 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
  881                 return (ENOMEM);
  882         }
  883 
  884         bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
  885         bzero(p, len);
  886         *(void **)desc = p;
  887 
  888         if (sw_size) {
  889                 len = nelem * sw_size;
  890                 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
  891                 *(void **)sdesc = s;
  892         }
  893         if (parent_entry_tag == NULL)
  894                 return (0);
  895             
  896         if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
  897                                       BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
  898                                       NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  899                                       TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  900                                       NULL, NULL, entry_tag)) != 0) {
  901                 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
  902                 return (ENOMEM);
  903         }
  904         return (0);
  905 }
  906 
  907 static void
  908 sge_slow_intr_handler(void *arg, int ncount)
  909 {
  910         adapter_t *sc = arg;
  911 
  912         t3_slow_intr_handler(sc);
  913         t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
  914         (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
  915 }
  916 
  917 /**
  918  *      sge_timer_cb - perform periodic maintenance of an SGE qset
  919  *      @data: the SGE queue set to maintain
  920  *
  921  *      Runs periodically from a timer to perform maintenance of an SGE queue
  922  *      set.  It performs two tasks:
  923  *
  924  *      a) Cleans up any completed Tx descriptors that may still be pending.
  925  *      Normal descriptor cleanup happens when new packets are added to a Tx
  926  *      queue so this timer is relatively infrequent and does any cleanup only
  927  *      if the Tx queue has not seen any new packets in a while.  We make a
  928  *      best effort attempt to reclaim descriptors, in that we don't wait
  929  *      around if we cannot get a queue's lock (which most likely is because
  930  *      someone else is queueing new packets and so will also handle the clean
  931  *      up).  Since control queues use immediate data exclusively we don't
  932  *      bother cleaning them up here.
  933  *
  934  *      b) Replenishes Rx queues that have run out due to memory shortage.
  935  *      Normally new Rx buffers are added when existing ones are consumed but
  936  *      when out of memory a queue can become empty.  We try to add only a few
  937  *      buffers here, the queue will be replenished fully as these new buffers
  938  *      are used up if memory shortage has subsided.
  939  *      
  940  *      c) Return coalesced response queue credits in case a response queue is
  941  *      starved.
  942  *
  943  *      d) Ring doorbells for T304 tunnel queues since we have seen doorbell 
  944  *      fifo overflows and the FW doesn't implement any recovery scheme yet.
  945  */
  946 static void
  947 sge_timer_cb(void *arg)
  948 {
  949         adapter_t *sc = arg;
  950         if ((sc->flags & USING_MSIX) == 0) {
  951                 
  952                 struct port_info *pi;
  953                 struct sge_qset *qs;
  954                 struct sge_txq  *txq;
  955                 int i, j;
  956                 int reclaim_ofl, refill_rx;
  957 
  958                 if (sc->open_device_map == 0) 
  959                         return;
  960 
  961                 for (i = 0; i < sc->params.nports; i++) {
  962                         pi = &sc->port[i];
  963                         for (j = 0; j < pi->nqsets; j++) {
  964                                 qs = &sc->sge.qs[pi->first_qset + j];
  965                                 txq = &qs->txq[0];
  966                                 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
  967                                 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 
  968                                     (qs->fl[1].credits < qs->fl[1].size));
  969                                 if (reclaim_ofl || refill_rx) {
  970                                         taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
  971                                         break;
  972                                 }
  973                         }
  974                 }
  975         }
  976         
  977         if (sc->params.nports > 2) {
  978                 int i;
  979 
  980                 for_each_port(sc, i) {
  981                         struct port_info *pi = &sc->port[i];
  982 
  983                         t3_write_reg(sc, A_SG_KDOORBELL, 
  984                                      F_SELEGRCNTX | 
  985                                      (FW_TUNNEL_SGEEC_START + pi->first_qset));
  986                 }
  987         }       
  988         if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
  989             sc->open_device_map != 0)
  990                 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
  991 }
  992 
  993 /*
  994  * This is meant to be a catch-all function to keep sge state private
  995  * to sge.c
  996  *
  997  */
  998 int
  999 t3_sge_init_adapter(adapter_t *sc)
 1000 {
 1001         callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
 1002         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1003         TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
 1004         return (0);
 1005 }
 1006 
 1007 int
 1008 t3_sge_reset_adapter(adapter_t *sc)
 1009 {
 1010         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1011         return (0);
 1012 }
 1013 
 1014 int
 1015 t3_sge_init_port(struct port_info *pi)
 1016 {
 1017         TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
 1018         return (0);
 1019 }
 1020 
 1021 /**
 1022  *      refill_rspq - replenish an SGE response queue
 1023  *      @adapter: the adapter
 1024  *      @q: the response queue to replenish
 1025  *      @credits: how many new responses to make available
 1026  *
 1027  *      Replenishes a response queue by making the supplied number of responses
 1028  *      available to HW.
 1029  */
 1030 static __inline void
 1031 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
 1032 {
 1033 
 1034         /* mbufs are allocated on demand when a rspq entry is processed. */
 1035         t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
 1036                      V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 1037 }
 1038 
 1039 static void
 1040 sge_txq_reclaim_handler(void *arg, int ncount)
 1041 {
 1042         struct sge_qset *qs = arg;
 1043         int i;
 1044 
 1045         for (i = 0; i < 3; i++)
 1046                 reclaim_completed_tx(qs, 16, i);
 1047 }
 1048 
 1049 static void
 1050 sge_timer_reclaim(void *arg, int ncount)
 1051 {
 1052         struct port_info *pi = arg;
 1053         int i, nqsets = pi->nqsets;
 1054         adapter_t *sc = pi->adapter;
 1055         struct sge_qset *qs;
 1056         struct mtx *lock;
 1057         
 1058         KASSERT((sc->flags & USING_MSIX) == 0,
 1059             ("can't call timer reclaim for msi-x"));
 1060 
 1061         for (i = 0; i < nqsets; i++) {
 1062                 qs = &sc->sge.qs[pi->first_qset + i];
 1063 
 1064                 reclaim_completed_tx(qs, 16, TXQ_OFLD);
 1065                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 1066                             &sc->sge.qs[0].rspq.lock;
 1067 
 1068                 if (mtx_trylock(lock)) {
 1069                         /* XXX currently assume that we are *NOT* polling */
 1070                         uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
 1071 
 1072                         if (qs->fl[0].credits < qs->fl[0].size - 16)
 1073                                 __refill_fl(sc, &qs->fl[0]);
 1074                         if (qs->fl[1].credits < qs->fl[1].size - 16)
 1075                                 __refill_fl(sc, &qs->fl[1]);
 1076                         
 1077                         if (status & (1 << qs->rspq.cntxt_id)) {
 1078                                 if (qs->rspq.credits) {
 1079                                         refill_rspq(sc, &qs->rspq, 1);
 1080                                         qs->rspq.credits--;
 1081                                         t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 
 1082                                             1 << qs->rspq.cntxt_id);
 1083                                 }
 1084                         }
 1085                         mtx_unlock(lock);
 1086                 }
 1087         }
 1088 }
 1089 
 1090 /**
 1091  *      init_qset_cntxt - initialize an SGE queue set context info
 1092  *      @qs: the queue set
 1093  *      @id: the queue set id
 1094  *
 1095  *      Initializes the TIDs and context ids for the queues of a queue set.
 1096  */
 1097 static void
 1098 init_qset_cntxt(struct sge_qset *qs, u_int id)
 1099 {
 1100 
 1101         qs->rspq.cntxt_id = id;
 1102         qs->fl[0].cntxt_id = 2 * id;
 1103         qs->fl[1].cntxt_id = 2 * id + 1;
 1104         qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
 1105         qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
 1106         qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 1107         qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 1108         qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
 1109 
 1110         mbufq_init(&qs->txq[TXQ_ETH].sendq);
 1111         mbufq_init(&qs->txq[TXQ_OFLD].sendq);
 1112         mbufq_init(&qs->txq[TXQ_CTRL].sendq);
 1113 }
 1114 
 1115 
 1116 static void
 1117 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
 1118 {
 1119         txq->in_use += ndesc;
 1120         /*
 1121          * XXX we don't handle stopping of queue
 1122          * presumably start handles this when we bump against the end
 1123          */
 1124         txqs->gen = txq->gen;
 1125         txq->unacked += ndesc;
 1126         txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
 1127         txq->unacked &= 31;
 1128         txqs->pidx = txq->pidx;
 1129         txq->pidx += ndesc;
 1130 #ifdef INVARIANTS
 1131         if (((txqs->pidx > txq->cidx) &&
 1132                 (txq->pidx < txqs->pidx) &&
 1133                 (txq->pidx >= txq->cidx)) ||
 1134             ((txqs->pidx < txq->cidx) &&
 1135                 (txq->pidx >= txq-> cidx)) ||
 1136             ((txqs->pidx < txq->cidx) &&
 1137                 (txq->cidx < txqs->pidx)))
 1138                 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
 1139                     txqs->pidx, txq->pidx, txq->cidx);
 1140 #endif
 1141         if (txq->pidx >= txq->size) {
 1142                 txq->pidx -= txq->size;
 1143                 txq->gen ^= 1;
 1144         }
 1145 
 1146 }
 1147 
 1148 /**
 1149  *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 1150  *      @m: the packet mbufs
 1151  *      @nsegs: the number of segments 
 1152  *
 1153  *      Returns the number of Tx descriptors needed for the given Ethernet
 1154  *      packet.  Ethernet packets require addition of WR and CPL headers.
 1155  */
 1156 static __inline unsigned int
 1157 calc_tx_descs(const struct mbuf *m, int nsegs)
 1158 {
 1159         unsigned int flits;
 1160 
 1161         if (m->m_pkthdr.len <= PIO_LEN)
 1162                 return 1;
 1163 
 1164         flits = sgl_len(nsegs) + 2;
 1165         if (m->m_pkthdr.csum_flags & CSUM_TSO)
 1166                 flits++;
 1167 
 1168         return flits_to_desc(flits);
 1169 }
 1170 
 1171 static unsigned int
 1172 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
 1173     struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
 1174 {
 1175         struct mbuf *m0;
 1176         int err, pktlen, pass = 0;
 1177         bus_dma_tag_t tag = txq->entry_tag;
 1178 
 1179 retry:
 1180         err = 0;
 1181         m0 = *m;
 1182         pktlen = m0->m_pkthdr.len;
 1183 #if defined(__i386__) || defined(__amd64__)
 1184         if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) {
 1185                 goto done;
 1186         } else
 1187 #endif
 1188                 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0);
 1189 
 1190         if (err == 0) {
 1191                 goto done;
 1192         }
 1193         if (err == EFBIG && pass == 0) {
 1194                 pass = 1;
 1195                 /* Too many segments, try to defrag */
 1196                 m0 = m_defrag(m0, M_DONTWAIT);
 1197                 if (m0 == NULL) {
 1198                         m_freem(*m);
 1199                         *m = NULL;
 1200                         return (ENOBUFS);
 1201                 }
 1202                 *m = m0;
 1203                 goto retry;
 1204         } else if (err == ENOMEM) {
 1205                 return (err);
 1206         } if (err) {
 1207                 if (cxgb_debug)
 1208                         printf("map failure err=%d pktlen=%d\n", err, pktlen);
 1209                 m_freem(m0);
 1210                 *m = NULL;
 1211                 return (err);
 1212         }
 1213 done:
 1214 #if !defined(__i386__) && !defined(__amd64__)
 1215         bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE);
 1216 #endif  
 1217         txsd->flags |= TX_SW_DESC_MAPPED;
 1218 
 1219         return (0);
 1220 }
 1221 
 1222 /**
 1223  *      make_sgl - populate a scatter/gather list for a packet
 1224  *      @sgp: the SGL to populate
 1225  *      @segs: the packet dma segments
 1226  *      @nsegs: the number of segments
 1227  *
 1228  *      Generates a scatter/gather list for the buffers that make up a packet
 1229  *      and returns the SGL size in 8-byte words.  The caller must size the SGL
 1230  *      appropriately.
 1231  */
 1232 static __inline void
 1233 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
 1234 {
 1235         int i, idx;
 1236         
 1237         for (idx = 0, i = 0; i < nsegs; i++) {
 1238                 /*
 1239                  * firmware doesn't like empty segments
 1240                  */
 1241                 if (segs[i].ds_len == 0)
 1242                         continue;
 1243                 if (i && idx == 0) 
 1244                         ++sgp;
 1245                 
 1246                 sgp->len[idx] = htobe32(segs[i].ds_len);
 1247                 sgp->addr[idx] = htobe64(segs[i].ds_addr);
 1248                 idx ^= 1;
 1249         }
 1250         
 1251         if (idx) {
 1252                 sgp->len[idx] = 0;
 1253                 sgp->addr[idx] = 0;
 1254         }
 1255 }
 1256         
 1257 /**
 1258  *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
 1259  *      @adap: the adapter
 1260  *      @q: the Tx queue
 1261  *
 1262  *      Ring the doorbell if a Tx queue is asleep.  There is a natural race,
 1263  *      where the HW is going to sleep just after we checked, however,
 1264  *      then the interrupt handler will detect the outstanding TX packet
 1265  *      and ring the doorbell for us.
 1266  *
 1267  *      When GTS is disabled we unconditionally ring the doorbell.
 1268  */
 1269 static __inline void
 1270 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
 1271 {
 1272 #if USE_GTS
 1273         clear_bit(TXQ_LAST_PKT_DB, &q->flags);
 1274         if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
 1275                 set_bit(TXQ_LAST_PKT_DB, &q->flags);
 1276 #ifdef T3_TRACE
 1277                 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
 1278                           q->cntxt_id);
 1279 #endif
 1280                 t3_write_reg(adap, A_SG_KDOORBELL,
 1281                              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1282         }
 1283 #else
 1284         if (mustring || ++q->db_pending >= 32) {
 1285                 wmb();            /* write descriptors before telling HW */
 1286                 t3_write_reg(adap, A_SG_KDOORBELL,
 1287                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1288                 q->db_pending = 0;
 1289         }
 1290 #endif
 1291 }
 1292 
 1293 static __inline void
 1294 wr_gen2(struct tx_desc *d, unsigned int gen)
 1295 {
 1296 #if SGE_NUM_GENBITS == 2
 1297         d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
 1298 #endif
 1299 }
 1300 
 1301 /**
 1302  *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
 1303  *      @ndesc: number of Tx descriptors spanned by the SGL
 1304  *      @txd: first Tx descriptor to be written
 1305  *      @txqs: txq state (generation and producer index)
 1306  *      @txq: the SGE Tx queue
 1307  *      @sgl: the SGL
 1308  *      @flits: number of flits to the start of the SGL in the first descriptor
 1309  *      @sgl_flits: the SGL size in flits
 1310  *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
 1311  *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
 1312  *
 1313  *      Write a work request header and an associated SGL.  If the SGL is
 1314  *      small enough to fit into one Tx descriptor it has already been written
 1315  *      and we just need to write the WR header.  Otherwise we distribute the
 1316  *      SGL across the number of descriptors it spans.
 1317  */
 1318 static void
 1319 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
 1320     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
 1321     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
 1322 {
 1323 
 1324         struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
 1325         struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
 1326         
 1327         if (__predict_true(ndesc == 1)) {
 1328                 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1329                         V_WR_SGLSFLT(flits)) | wr_hi,
 1330                     htonl(V_WR_LEN(flits + sgl_flits) |
 1331                         V_WR_GEN(txqs->gen)) | wr_lo);
 1332                 /* XXX gen? */
 1333                 wr_gen2(txd, txqs->gen);
 1334                 
 1335         } else {
 1336                 unsigned int ogen = txqs->gen;
 1337                 const uint64_t *fp = (const uint64_t *)sgl;
 1338                 struct work_request_hdr *wp = wrp;
 1339                 
 1340                 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
 1341                     V_WR_SGLSFLT(flits)) | wr_hi;
 1342                 
 1343                 while (sgl_flits) {
 1344                         unsigned int avail = WR_FLITS - flits;
 1345 
 1346                         if (avail > sgl_flits)
 1347                                 avail = sgl_flits;
 1348                         memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
 1349                         sgl_flits -= avail;
 1350                         ndesc--;
 1351                         if (!sgl_flits)
 1352                                 break;
 1353                         
 1354                         fp += avail;
 1355                         txd++;
 1356                         txsd++;
 1357                         if (++txqs->pidx == txq->size) {
 1358                                 txqs->pidx = 0;
 1359                                 txqs->gen ^= 1;
 1360                                 txd = txq->desc;
 1361                                 txsd = txq->sdesc;
 1362                         }
 1363 
 1364                         /*
 1365                          * when the head of the mbuf chain
 1366                          * is freed all clusters will be freed
 1367                          * with it
 1368                          */
 1369                         wrp = (struct work_request_hdr *)txd;
 1370                         wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
 1371                             V_WR_SGLSFLT(1)) | wr_hi;
 1372                         wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
 1373                                     sgl_flits + 1)) |
 1374                             V_WR_GEN(txqs->gen)) | wr_lo;
 1375                         wr_gen2(txd, txqs->gen);
 1376                         flits = 1;
 1377                 }
 1378                 wrp->wrh_hi |= htonl(F_WR_EOP);
 1379                 wmb();
 1380                 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
 1381                 wr_gen2((struct tx_desc *)wp, ogen);
 1382         }
 1383 }
 1384 
 1385 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
 1386 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
 1387 
 1388 #define GET_VTAG(cntrl, m) \
 1389 do { \
 1390         if ((m)->m_flags & M_VLANTAG)                                               \
 1391                 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
 1392 } while (0)
 1393 
 1394 static int
 1395 t3_encap(struct sge_qset *qs, struct mbuf **m)
 1396 {
 1397         adapter_t *sc;
 1398         struct mbuf *m0;
 1399         struct sge_txq *txq;
 1400         struct txq_state txqs;
 1401         struct port_info *pi;
 1402         unsigned int ndesc, flits, cntrl, mlen;
 1403         int err, nsegs, tso_info = 0;
 1404 
 1405         struct work_request_hdr *wrp;
 1406         struct tx_sw_desc *txsd;
 1407         struct sg_ent *sgp, *sgl;
 1408         uint32_t wr_hi, wr_lo, sgl_flits; 
 1409         bus_dma_segment_t segs[TX_MAX_SEGS];
 1410 
 1411         struct tx_desc *txd;
 1412                 
 1413         pi = qs->port;
 1414         sc = pi->adapter;
 1415         txq = &qs->txq[TXQ_ETH];
 1416         txd = &txq->desc[txq->pidx];
 1417         txsd = &txq->sdesc[txq->pidx];
 1418         sgl = txq->txq_sgl;
 1419 
 1420         prefetch(txd);
 1421         m0 = *m;
 1422 
 1423         mtx_assert(&qs->lock, MA_OWNED);
 1424         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1425         KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
 1426         
 1427         if  (m0->m_nextpkt == NULL && m0->m_next != NULL &&
 1428             m0->m_pkthdr.csum_flags & (CSUM_TSO))
 1429                 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
 1430 
 1431         if (m0->m_nextpkt != NULL) {
 1432                 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
 1433                 ndesc = 1;
 1434                 mlen = 0;
 1435         } else {
 1436                 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
 1437                     &m0, segs, &nsegs))) {
 1438                         if (cxgb_debug)
 1439                                 printf("failed ... err=%d\n", err);
 1440                         return (err);
 1441                 }
 1442                 mlen = m0->m_pkthdr.len;
 1443                 ndesc = calc_tx_descs(m0, nsegs);
 1444         }
 1445         txq_prod(txq, ndesc, &txqs);
 1446 
 1447         KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
 1448         txsd->m = m0;
 1449 
 1450         if (m0->m_nextpkt != NULL) {
 1451                 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
 1452                 int i, fidx;
 1453 
 1454                 if (nsegs > 7)
 1455                         panic("trying to coalesce %d packets in to one WR", nsegs);
 1456                 txq->txq_coalesced += nsegs;
 1457                 wrp = (struct work_request_hdr *)txd;
 1458                 flits = nsegs*2 + 1;
 1459 
 1460                 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
 1461                         struct cpl_tx_pkt_batch_entry *cbe;
 1462                         uint64_t flit;
 1463                         uint32_t *hflit = (uint32_t *)&flit;
 1464                         int cflags = m0->m_pkthdr.csum_flags;
 1465 
 1466                         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1467                         GET_VTAG(cntrl, m0);
 1468                         cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1469                         if (__predict_false(!(cflags & CSUM_IP)))
 1470                                 cntrl |= F_TXPKT_IPCSUM_DIS;
 1471                         if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP))))
 1472                                 cntrl |= F_TXPKT_L4CSUM_DIS;
 1473 
 1474                         hflit[0] = htonl(cntrl);
 1475                         hflit[1] = htonl(segs[i].ds_len | 0x80000000);
 1476                         flit |= htobe64(1 << 24);
 1477                         cbe = &cpl_batch->pkt_entry[i];
 1478                         cbe->cntrl = hflit[0];
 1479                         cbe->len = hflit[1];
 1480                         cbe->addr = htobe64(segs[i].ds_addr);
 1481                 }
 1482 
 1483                 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1484                     V_WR_SGLSFLT(flits)) |
 1485                     htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1486                 wr_lo = htonl(V_WR_LEN(flits) |
 1487                     V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
 1488                 set_wr_hdr(wrp, wr_hi, wr_lo);
 1489                 wmb();
 1490                 ETHER_BPF_MTAP(pi->ifp, m0);
 1491                 wr_gen2(txd, txqs.gen);
 1492                 check_ring_tx_db(sc, txq, 0);
 1493                 return (0);             
 1494         } else if (tso_info) {
 1495                 int eth_type;
 1496                 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
 1497                 struct ether_header *eh;
 1498                 struct ip *ip;
 1499                 struct tcphdr *tcp;
 1500 
 1501                 txd->flit[2] = 0;
 1502                 GET_VTAG(cntrl, m0);
 1503                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
 1504                 hdr->cntrl = htonl(cntrl);
 1505                 hdr->len = htonl(mlen | 0x80000000);
 1506 
 1507                 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
 1508                         printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
 1509                             m0, mlen, m0->m_pkthdr.tso_segsz,
 1510                             m0->m_pkthdr.csum_flags, m0->m_flags);
 1511                         panic("tx tso packet too small");
 1512                 }
 1513 
 1514                 /* Make sure that ether, ip, tcp headers are all in m0 */
 1515                 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
 1516                         m0 = m_pullup(m0, TCPPKTHDRSIZE);
 1517                         if (__predict_false(m0 == NULL)) {
 1518                                 /* XXX panic probably an overreaction */
 1519                                 panic("couldn't fit header into mbuf");
 1520                         }
 1521                 }
 1522 
 1523                 eh = mtod(m0, struct ether_header *);
 1524                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 1525                         eth_type = CPL_ETH_II_VLAN;
 1526                         ip = (struct ip *)((struct ether_vlan_header *)eh + 1);
 1527                 } else {
 1528                         eth_type = CPL_ETH_II;
 1529                         ip = (struct ip *)(eh + 1);
 1530                 }
 1531                 tcp = (struct tcphdr *)(ip + 1);
 1532 
 1533                 tso_info |= V_LSO_ETH_TYPE(eth_type) |
 1534                             V_LSO_IPHDR_WORDS(ip->ip_hl) |
 1535                             V_LSO_TCPHDR_WORDS(tcp->th_off);
 1536                 hdr->lso_info = htonl(tso_info);
 1537 
 1538                 if (__predict_false(mlen <= PIO_LEN)) {
 1539                         /*
 1540                          * pkt not undersized but fits in PIO_LEN
 1541                          * Indicates a TSO bug at the higher levels.
 1542                          */
 1543                         txsd->m = NULL;
 1544                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
 1545                         flits = (mlen + 7) / 8 + 3;
 1546                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1547                                           V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1548                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1549                         wr_lo = htonl(V_WR_LEN(flits) |
 1550                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1551                         set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
 1552                         wmb();
 1553                         ETHER_BPF_MTAP(pi->ifp, m0);
 1554                         wr_gen2(txd, txqs.gen);
 1555                         check_ring_tx_db(sc, txq, 0);
 1556                         m_freem(m0);
 1557                         return (0);
 1558                 }
 1559                 flits = 3;      
 1560         } else {
 1561                 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
 1562                 
 1563                 GET_VTAG(cntrl, m0);
 1564                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1565                 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
 1566                         cntrl |= F_TXPKT_IPCSUM_DIS;
 1567                 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
 1568                         cntrl |= F_TXPKT_L4CSUM_DIS;
 1569                 cpl->cntrl = htonl(cntrl);
 1570                 cpl->len = htonl(mlen | 0x80000000);
 1571 
 1572                 if (mlen <= PIO_LEN) {
 1573                         txsd->m = NULL;
 1574                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
 1575                         flits = (mlen + 7) / 8 + 2;
 1576                         
 1577                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1578                             V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1579                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1580                         wr_lo = htonl(V_WR_LEN(flits) |
 1581                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1582                         set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
 1583                         wmb();
 1584                         ETHER_BPF_MTAP(pi->ifp, m0);
 1585                         wr_gen2(txd, txqs.gen);
 1586                         check_ring_tx_db(sc, txq, 0);
 1587                         m_freem(m0);
 1588                         return (0);
 1589                 }
 1590                 flits = 2;
 1591         }
 1592         wrp = (struct work_request_hdr *)txd;
 1593         sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
 1594         make_sgl(sgp, segs, nsegs);
 1595 
 1596         sgl_flits = sgl_len(nsegs);
 1597 
 1598         ETHER_BPF_MTAP(pi->ifp, m0);
 1599 
 1600         KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
 1601         wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1602         wr_lo = htonl(V_WR_TID(txq->token));
 1603         write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
 1604             sgl_flits, wr_hi, wr_lo);
 1605         check_ring_tx_db(sc, txq, 0);
 1606 
 1607         return (0);
 1608 }
 1609 
 1610 void
 1611 cxgb_tx_watchdog(void *arg)
 1612 {
 1613         struct sge_qset *qs = arg;
 1614         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1615 
 1616         if (qs->coalescing != 0 &&
 1617             (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
 1618             TXQ_RING_EMPTY(qs))
 1619                 qs->coalescing = 0; 
 1620         else if (qs->coalescing == 0 &&
 1621             (txq->in_use >= cxgb_tx_coalesce_enable_start))
 1622                 qs->coalescing = 1;
 1623         if (TXQ_TRYLOCK(qs)) {
 1624                 qs->qs_flags |= QS_FLUSHING;
 1625                 cxgb_start_locked(qs);
 1626                 qs->qs_flags &= ~QS_FLUSHING;
 1627                 TXQ_UNLOCK(qs);
 1628         }
 1629         if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
 1630                 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
 1631                     qs, txq->txq_watchdog.c_cpu);
 1632 }
 1633 
 1634 static void
 1635 cxgb_tx_timeout(void *arg)
 1636 {
 1637         struct sge_qset *qs = arg;
 1638         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1639 
 1640         if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
 1641                 qs->coalescing = 1;     
 1642         if (TXQ_TRYLOCK(qs)) {
 1643                 qs->qs_flags |= QS_TIMEOUT;
 1644                 cxgb_start_locked(qs);
 1645                 qs->qs_flags &= ~QS_TIMEOUT;
 1646                 TXQ_UNLOCK(qs);
 1647         }
 1648 }
 1649 
 1650 static void
 1651 cxgb_start_locked(struct sge_qset *qs)
 1652 {
 1653         struct mbuf *m_head = NULL;
 1654         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1655         struct port_info *pi = qs->port;
 1656         struct ifnet *ifp = pi->ifp;
 1657 
 1658         if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
 1659                 reclaim_completed_tx(qs, 0, TXQ_ETH);
 1660 
 1661         if (!pi->link_config.link_ok) {
 1662                 TXQ_RING_FLUSH(qs);
 1663                 return;
 1664         }
 1665         TXQ_LOCK_ASSERT(qs);
 1666         while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
 1667             pi->link_config.link_ok) {
 1668                 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1669 
 1670                 if (txq->size - txq->in_use <= TX_MAX_DESC)
 1671                         break;
 1672 
 1673                 if ((m_head = cxgb_dequeue(qs)) == NULL)
 1674                         break;
 1675                 /*
 1676                  *  Encapsulation can modify our pointer, and or make it
 1677                  *  NULL on failure.  In that event, we can't requeue.
 1678                  */
 1679                 if (t3_encap(qs, &m_head) || m_head == NULL)
 1680                         break;
 1681 
 1682                 m_head = NULL;
 1683         }
 1684 
 1685         if (txq->db_pending)
 1686                 check_ring_tx_db(pi->adapter, txq, 1);
 1687 
 1688         if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
 1689             pi->link_config.link_ok)
 1690                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1691                     qs, txq->txq_timer.c_cpu);
 1692         if (m_head != NULL)
 1693                 m_freem(m_head);
 1694 }
 1695 
 1696 static int
 1697 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
 1698 {
 1699         struct port_info *pi = qs->port;
 1700         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1701         struct buf_ring *br = txq->txq_mr;
 1702         int error, avail;
 1703 
 1704         avail = txq->size - txq->in_use;
 1705         TXQ_LOCK_ASSERT(qs);
 1706 
 1707         /*
 1708          * We can only do a direct transmit if the following are true:
 1709          * - we aren't coalescing (ring < 3/4 full)
 1710          * - the link is up -- checked in caller
 1711          * - there are no packets enqueued already
 1712          * - there is space in hardware transmit queue 
 1713          */
 1714         if (check_pkt_coalesce(qs) == 0 &&
 1715             !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
 1716                 if (t3_encap(qs, &m)) {
 1717                         if (m != NULL &&
 1718                             (error = drbr_enqueue(ifp, br, m)) != 0) 
 1719                                 return (error);
 1720                 } else {
 1721                         if (txq->db_pending)
 1722                                 check_ring_tx_db(pi->adapter, txq, 1);
 1723 
 1724                         /*
 1725                          * We've bypassed the buf ring so we need to update
 1726                          * the stats directly
 1727                          */
 1728                         txq->txq_direct_packets++;
 1729                         txq->txq_direct_bytes += m->m_pkthdr.len;
 1730                 }
 1731         } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
 1732                 return (error);
 1733 
 1734         reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1735         if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
 1736             (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
 1737                 cxgb_start_locked(qs);
 1738         else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
 1739                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1740                     qs, txq->txq_timer.c_cpu);
 1741         return (0);
 1742 }
 1743 
 1744 int
 1745 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
 1746 {
 1747         struct sge_qset *qs;
 1748         struct port_info *pi = ifp->if_softc;
 1749         int error, qidx = pi->first_qset;
 1750 
 1751         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
 1752             ||(!pi->link_config.link_ok)) {
 1753                 m_freem(m);
 1754                 return (0);
 1755         }
 1756         
 1757         if (m->m_flags & M_FLOWID)
 1758                 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
 1759 
 1760         qs = &pi->adapter->sge.qs[qidx];
 1761         
 1762         if (TXQ_TRYLOCK(qs)) {
 1763                 /* XXX running */
 1764                 error = cxgb_transmit_locked(ifp, qs, m);
 1765                 TXQ_UNLOCK(qs);
 1766         } else
 1767                 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
 1768         return (error);
 1769 }
 1770 
 1771 void
 1772 cxgb_qflush(struct ifnet *ifp)
 1773 {
 1774         /*
 1775          * flush any enqueued mbufs in the buf_rings
 1776          * and in the transmit queues
 1777          * no-op for now
 1778          */
 1779         return;
 1780 }
 1781 
 1782 /**
 1783  *      write_imm - write a packet into a Tx descriptor as immediate data
 1784  *      @d: the Tx descriptor to write
 1785  *      @m: the packet
 1786  *      @len: the length of packet data to write as immediate data
 1787  *      @gen: the generation bit value to write
 1788  *
 1789  *      Writes a packet as immediate data into a Tx descriptor.  The packet
 1790  *      contains a work request at its beginning.  We must write the packet
 1791  *      carefully so the SGE doesn't read accidentally before it's written in
 1792  *      its entirety.
 1793  */
 1794 static __inline void
 1795 write_imm(struct tx_desc *d, struct mbuf *m,
 1796           unsigned int len, unsigned int gen)
 1797 {
 1798         struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
 1799         struct work_request_hdr *to = (struct work_request_hdr *)d;
 1800         uint32_t wr_hi, wr_lo;
 1801 
 1802         if (len > WR_LEN)
 1803                 panic("len too big %d\n", len);
 1804         if (len < sizeof(*from))
 1805                 panic("len too small %d", len);
 1806         
 1807         memcpy(&to[1], &from[1], len - sizeof(*from));
 1808         wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
 1809                                         V_WR_BCNTLFLT(len & 7));
 1810         wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) |
 1811                                         V_WR_LEN((len + 7) / 8));
 1812         set_wr_hdr(to, wr_hi, wr_lo);
 1813         wmb();
 1814         wr_gen2(d, gen);
 1815 
 1816         /*
 1817          * This check is a hack we should really fix the logic so
 1818          * that this can't happen
 1819          */
 1820         if (m->m_type != MT_DONTFREE)
 1821                 m_freem(m);
 1822         
 1823 }
 1824 
 1825 /**
 1826  *      check_desc_avail - check descriptor availability on a send queue
 1827  *      @adap: the adapter
 1828  *      @q: the TX queue
 1829  *      @m: the packet needing the descriptors
 1830  *      @ndesc: the number of Tx descriptors needed
 1831  *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
 1832  *
 1833  *      Checks if the requested number of Tx descriptors is available on an
 1834  *      SGE send queue.  If the queue is already suspended or not enough
 1835  *      descriptors are available the packet is queued for later transmission.
 1836  *      Must be called with the Tx queue locked.
 1837  *
 1838  *      Returns 0 if enough descriptors are available, 1 if there aren't
 1839  *      enough descriptors and the packet has been queued, and 2 if the caller
 1840  *      needs to retry because there weren't enough descriptors at the
 1841  *      beginning of the call but some freed up in the mean time.
 1842  */
 1843 static __inline int
 1844 check_desc_avail(adapter_t *adap, struct sge_txq *q,
 1845                  struct mbuf *m, unsigned int ndesc,
 1846                  unsigned int qid)
 1847 {
 1848         /* 
 1849          * XXX We currently only use this for checking the control queue
 1850          * the control queue is only used for binding qsets which happens
 1851          * at init time so we are guaranteed enough descriptors
 1852          */
 1853         if (__predict_false(!mbufq_empty(&q->sendq))) {
 1854 addq_exit:      mbufq_tail(&q->sendq, m);
 1855                 return 1;
 1856         }
 1857         if (__predict_false(q->size - q->in_use < ndesc)) {
 1858 
 1859                 struct sge_qset *qs = txq_to_qset(q, qid);
 1860 
 1861                 setbit(&qs->txq_stopped, qid);
 1862                 if (should_restart_tx(q) &&
 1863                     test_and_clear_bit(qid, &qs->txq_stopped))
 1864                         return 2;
 1865 
 1866                 q->stops++;
 1867                 goto addq_exit;
 1868         }
 1869         return 0;
 1870 }
 1871 
 1872 
 1873 /**
 1874  *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
 1875  *      @q: the SGE control Tx queue
 1876  *
 1877  *      This is a variant of reclaim_completed_tx() that is used for Tx queues
 1878  *      that send only immediate data (presently just the control queues) and
 1879  *      thus do not have any mbufs
 1880  */
 1881 static __inline void
 1882 reclaim_completed_tx_imm(struct sge_txq *q)
 1883 {
 1884         unsigned int reclaim = q->processed - q->cleaned;
 1885 
 1886         q->in_use -= reclaim;
 1887         q->cleaned += reclaim;
 1888 }
 1889 
 1890 static __inline int
 1891 immediate(const struct mbuf *m)
 1892 {
 1893         return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
 1894 }
 1895 
 1896 /**
 1897  *      ctrl_xmit - send a packet through an SGE control Tx queue
 1898  *      @adap: the adapter
 1899  *      @q: the control queue
 1900  *      @m: the packet
 1901  *
 1902  *      Send a packet through an SGE control Tx queue.  Packets sent through
 1903  *      a control queue must fit entirely as immediate data in a single Tx
 1904  *      descriptor and have no page fragments.
 1905  */
 1906 static int
 1907 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 1908 {
 1909         int ret;
 1910         struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
 1911         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1912         
 1913         if (__predict_false(!immediate(m))) {
 1914                 m_freem(m);
 1915                 return 0;
 1916         }
 1917         
 1918         wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
 1919         wrp->wrh_lo = htonl(V_WR_TID(q->token));
 1920 
 1921         TXQ_LOCK(qs);
 1922 again:  reclaim_completed_tx_imm(q);
 1923 
 1924         ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
 1925         if (__predict_false(ret)) {
 1926                 if (ret == 1) {
 1927                         TXQ_UNLOCK(qs);
 1928                         return (ENOSPC);
 1929                 }
 1930                 goto again;
 1931         }
 1932         write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
 1933         
 1934         q->in_use++;
 1935         if (++q->pidx >= q->size) {
 1936                 q->pidx = 0;
 1937                 q->gen ^= 1;
 1938         }
 1939         TXQ_UNLOCK(qs);
 1940         wmb();
 1941         t3_write_reg(adap, A_SG_KDOORBELL,
 1942                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1943         return (0);
 1944 }
 1945 
 1946 
 1947 /**
 1948  *      restart_ctrlq - restart a suspended control queue
 1949  *      @qs: the queue set cotaining the control queue
 1950  *
 1951  *      Resumes transmission on a suspended Tx control queue.
 1952  */
 1953 static void
 1954 restart_ctrlq(void *data, int npending)
 1955 {
 1956         struct mbuf *m;
 1957         struct sge_qset *qs = (struct sge_qset *)data;
 1958         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1959         adapter_t *adap = qs->port->adapter;
 1960 
 1961         TXQ_LOCK(qs);
 1962 again:  reclaim_completed_tx_imm(q);
 1963 
 1964         while (q->in_use < q->size &&
 1965                (m = mbufq_dequeue(&q->sendq)) != NULL) {
 1966 
 1967                 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
 1968 
 1969                 if (++q->pidx >= q->size) {
 1970                         q->pidx = 0;
 1971                         q->gen ^= 1;
 1972                 }
 1973                 q->in_use++;
 1974         }
 1975         if (!mbufq_empty(&q->sendq)) {
 1976                 setbit(&qs->txq_stopped, TXQ_CTRL);
 1977 
 1978                 if (should_restart_tx(q) &&
 1979                     test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
 1980                         goto again;
 1981                 q->stops++;
 1982         }
 1983         TXQ_UNLOCK(qs);
 1984         t3_write_reg(adap, A_SG_KDOORBELL,
 1985                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1986 }
 1987 
 1988 
 1989 /*
 1990  * Send a management message through control queue 0
 1991  */
 1992 int
 1993 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
 1994 {
 1995         return ctrl_xmit(adap, &adap->sge.qs[0], m);
 1996 }
 1997 
 1998 /**
 1999  *      free_qset - free the resources of an SGE queue set
 2000  *      @sc: the controller owning the queue set
 2001  *      @q: the queue set
 2002  *
 2003  *      Release the HW and SW resources associated with an SGE queue set, such
 2004  *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 2005  *      queue set must be quiesced prior to calling this.
 2006  */
 2007 static void
 2008 t3_free_qset(adapter_t *sc, struct sge_qset *q)
 2009 {
 2010         int i;
 2011         
 2012         reclaim_completed_tx(q, 0, TXQ_ETH);
 2013         if (q->txq[TXQ_ETH].txq_mr != NULL) 
 2014                 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
 2015         if (q->txq[TXQ_ETH].txq_ifq != NULL) {
 2016                 ifq_delete(q->txq[TXQ_ETH].txq_ifq);
 2017                 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
 2018         }
 2019 
 2020         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2021                 if (q->fl[i].desc) {
 2022                         mtx_lock_spin(&sc->sge.reg_lock);
 2023                         t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
 2024                         mtx_unlock_spin(&sc->sge.reg_lock);
 2025                         bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
 2026                         bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
 2027                                         q->fl[i].desc_map);
 2028                         bus_dma_tag_destroy(q->fl[i].desc_tag);
 2029                         bus_dma_tag_destroy(q->fl[i].entry_tag);
 2030                 }
 2031                 if (q->fl[i].sdesc) {
 2032                         free_rx_bufs(sc, &q->fl[i]);
 2033                         free(q->fl[i].sdesc, M_DEVBUF);
 2034                 }
 2035         }
 2036 
 2037         mtx_unlock(&q->lock);
 2038         MTX_DESTROY(&q->lock);
 2039         for (i = 0; i < SGE_TXQ_PER_SET; i++) {
 2040                 if (q->txq[i].desc) {
 2041                         mtx_lock_spin(&sc->sge.reg_lock);
 2042                         t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
 2043                         mtx_unlock_spin(&sc->sge.reg_lock);
 2044                         bus_dmamap_unload(q->txq[i].desc_tag,
 2045                                         q->txq[i].desc_map);
 2046                         bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
 2047                                         q->txq[i].desc_map);
 2048                         bus_dma_tag_destroy(q->txq[i].desc_tag);
 2049                         bus_dma_tag_destroy(q->txq[i].entry_tag);
 2050                 }
 2051                 if (q->txq[i].sdesc) {
 2052                         free(q->txq[i].sdesc, M_DEVBUF);
 2053                 }
 2054         }
 2055 
 2056         if (q->rspq.desc) {
 2057                 mtx_lock_spin(&sc->sge.reg_lock);
 2058                 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
 2059                 mtx_unlock_spin(&sc->sge.reg_lock);
 2060                 
 2061                 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
 2062                 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
 2063                                 q->rspq.desc_map);
 2064                 bus_dma_tag_destroy(q->rspq.desc_tag);
 2065                 MTX_DESTROY(&q->rspq.lock);
 2066         }
 2067 
 2068 #ifdef INET
 2069         tcp_lro_free(&q->lro.ctrl);
 2070 #endif
 2071 
 2072         bzero(q, sizeof(*q));
 2073 }
 2074 
 2075 /**
 2076  *      t3_free_sge_resources - free SGE resources
 2077  *      @sc: the adapter softc
 2078  *
 2079  *      Frees resources used by the SGE queue sets.
 2080  */
 2081 void
 2082 t3_free_sge_resources(adapter_t *sc, int nqsets)
 2083 {
 2084         int i;
 2085 
 2086         for (i = 0; i < nqsets; ++i) {
 2087                 TXQ_LOCK(&sc->sge.qs[i]);
 2088                 t3_free_qset(sc, &sc->sge.qs[i]);
 2089         }
 2090 }
 2091 
 2092 /**
 2093  *      t3_sge_start - enable SGE
 2094  *      @sc: the controller softc
 2095  *
 2096  *      Enables the SGE for DMAs.  This is the last step in starting packet
 2097  *      transfers.
 2098  */
 2099 void
 2100 t3_sge_start(adapter_t *sc)
 2101 {
 2102         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
 2103 }
 2104 
 2105 /**
 2106  *      t3_sge_stop - disable SGE operation
 2107  *      @sc: the adapter
 2108  *
 2109  *      Disables the DMA engine.  This can be called in emeregencies (e.g.,
 2110  *      from error interrupts) or from normal process context.  In the latter
 2111  *      case it also disables any pending queue restart tasklets.  Note that
 2112  *      if it is called in interrupt context it cannot disable the restart
 2113  *      tasklets as it cannot wait, however the tasklets will have no effect
 2114  *      since the doorbells are disabled and the driver will call this again
 2115  *      later from process context, at which time the tasklets will be stopped
 2116  *      if they are still running.
 2117  */
 2118 void
 2119 t3_sge_stop(adapter_t *sc)
 2120 {
 2121         int i, nqsets;
 2122         
 2123         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
 2124 
 2125         if (sc->tq == NULL)
 2126                 return;
 2127         
 2128         for (nqsets = i = 0; i < (sc)->params.nports; i++) 
 2129                 nqsets += sc->port[i].nqsets;
 2130 #ifdef notyet
 2131         /*
 2132          * 
 2133          * XXX
 2134          */
 2135         for (i = 0; i < nqsets; ++i) {
 2136                 struct sge_qset *qs = &sc->sge.qs[i];
 2137                 
 2138                 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2139                 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2140         }
 2141 #endif
 2142 }
 2143 
 2144 /**
 2145  *      t3_free_tx_desc - reclaims Tx descriptors and their buffers
 2146  *      @adapter: the adapter
 2147  *      @q: the Tx queue to reclaim descriptors from
 2148  *      @reclaimable: the number of descriptors to reclaim
 2149  *      @m_vec_size: maximum number of buffers to reclaim
 2150  *      @desc_reclaimed: returns the number of descriptors reclaimed
 2151  *
 2152  *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 2153  *      Tx buffers.  Called with the Tx queue lock held.
 2154  *
 2155  *      Returns number of buffers of reclaimed   
 2156  */
 2157 void
 2158 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
 2159 {
 2160         struct tx_sw_desc *txsd;
 2161         unsigned int cidx, mask;
 2162         struct sge_txq *q = &qs->txq[queue];
 2163 
 2164 #ifdef T3_TRACE
 2165         T3_TRACE2(sc->tb[q->cntxt_id & 7],
 2166                   "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
 2167 #endif
 2168         cidx = q->cidx;
 2169         mask = q->size - 1;
 2170         txsd = &q->sdesc[cidx];
 2171 
 2172         mtx_assert(&qs->lock, MA_OWNED);
 2173         while (reclaimable--) {
 2174                 prefetch(q->sdesc[(cidx + 1) & mask].m);
 2175                 prefetch(q->sdesc[(cidx + 2) & mask].m);
 2176 
 2177                 if (txsd->m != NULL) {
 2178                         if (txsd->flags & TX_SW_DESC_MAPPED) {
 2179                                 bus_dmamap_unload(q->entry_tag, txsd->map);
 2180                                 txsd->flags &= ~TX_SW_DESC_MAPPED;
 2181                         }
 2182                         m_freem_list(txsd->m);
 2183                         txsd->m = NULL;
 2184                 } else
 2185                         q->txq_skipped++;
 2186                 
 2187                 ++txsd;
 2188                 if (++cidx == q->size) {
 2189                         cidx = 0;
 2190                         txsd = q->sdesc;
 2191                 }
 2192         }
 2193         q->cidx = cidx;
 2194 
 2195 }
 2196 
 2197 /**
 2198  *      is_new_response - check if a response is newly written
 2199  *      @r: the response descriptor
 2200  *      @q: the response queue
 2201  *
 2202  *      Returns true if a response descriptor contains a yet unprocessed
 2203  *      response.
 2204  */
 2205 static __inline int
 2206 is_new_response(const struct rsp_desc *r,
 2207     const struct sge_rspq *q)
 2208 {
 2209         return (r->intr_gen & F_RSPD_GEN2) == q->gen;
 2210 }
 2211 
 2212 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
 2213 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
 2214                         V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
 2215                         V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
 2216                         V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
 2217 
 2218 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
 2219 #define NOMEM_INTR_DELAY 2500
 2220 
 2221 /**
 2222  *      write_ofld_wr - write an offload work request
 2223  *      @adap: the adapter
 2224  *      @m: the packet to send
 2225  *      @q: the Tx queue
 2226  *      @pidx: index of the first Tx descriptor to write
 2227  *      @gen: the generation value to use
 2228  *      @ndesc: number of descriptors the packet will occupy
 2229  *
 2230  *      Write an offload work request to send the supplied packet.  The packet
 2231  *      data already carry the work request with most fields populated.
 2232  */
 2233 static void
 2234 write_ofld_wr(adapter_t *adap, struct mbuf *m,
 2235     struct sge_txq *q, unsigned int pidx,
 2236     unsigned int gen, unsigned int ndesc,
 2237     bus_dma_segment_t *segs, unsigned int nsegs)
 2238 {
 2239         unsigned int sgl_flits, flits;
 2240         struct work_request_hdr *from;
 2241         struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
 2242         struct tx_desc *d = &q->desc[pidx];
 2243         struct txq_state txqs;
 2244         
 2245         if (immediate(m) && nsegs == 0) {
 2246                 write_imm(d, m, m->m_len, gen);
 2247                 return;
 2248         }
 2249 
 2250         /* Only TX_DATA builds SGLs */
 2251         from = mtod(m, struct work_request_hdr *);
 2252         memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
 2253 
 2254         flits = m->m_len / 8;
 2255         sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
 2256 
 2257         make_sgl(sgp, segs, nsegs);
 2258         sgl_flits = sgl_len(nsegs);
 2259 
 2260         txqs.gen = gen;
 2261         txqs.pidx = pidx;
 2262         txqs.compl = 0;
 2263 
 2264         write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
 2265             from->wrh_hi, from->wrh_lo);
 2266 }
 2267 
 2268 /**
 2269  *      calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
 2270  *      @m: the packet
 2271  *
 2272  *      Returns the number of Tx descriptors needed for the given offload
 2273  *      packet.  These packets are already fully constructed.
 2274  */
 2275 static __inline unsigned int
 2276 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
 2277 {
 2278         unsigned int flits, cnt = 0;
 2279         int ndescs;
 2280 
 2281         if (m->m_len <= WR_LEN && nsegs == 0)
 2282                 return (1);                 /* packet fits as immediate data */
 2283 
 2284         /*
 2285          * This needs to be re-visited for TOE
 2286          */
 2287 
 2288         cnt = nsegs;
 2289                 
 2290         /* headers */
 2291         flits = m->m_len / 8;
 2292 
 2293         ndescs = flits_to_desc(flits + sgl_len(cnt));
 2294 
 2295         return (ndescs);
 2296 }
 2297 
 2298 /**
 2299  *      ofld_xmit - send a packet through an offload queue
 2300  *      @adap: the adapter
 2301  *      @q: the Tx offload queue
 2302  *      @m: the packet
 2303  *
 2304  *      Send an offload packet through an SGE offload queue.
 2305  */
 2306 static int
 2307 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 2308 {
 2309         int ret, nsegs;
 2310         unsigned int ndesc;
 2311         unsigned int pidx, gen;
 2312         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2313         bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
 2314         struct tx_sw_desc *stx;
 2315 
 2316         nsegs = m_get_sgllen(m);
 2317         vsegs = m_get_sgl(m);
 2318         ndesc = calc_tx_descs_ofld(m, nsegs);
 2319         busdma_map_sgl(vsegs, segs, nsegs);
 2320 
 2321         stx = &q->sdesc[q->pidx];
 2322         
 2323         TXQ_LOCK(qs);
 2324 again:  reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2325         ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
 2326         if (__predict_false(ret)) {
 2327                 if (ret == 1) {
 2328                         printf("no ofld desc avail\n");
 2329                         
 2330                         m_set_priority(m, ndesc);     /* save for restart */
 2331                         TXQ_UNLOCK(qs);
 2332                         return (EINTR);
 2333                 }
 2334                 goto again;
 2335         }
 2336 
 2337         gen = q->gen;
 2338         q->in_use += ndesc;
 2339         pidx = q->pidx;
 2340         q->pidx += ndesc;
 2341         if (q->pidx >= q->size) {
 2342                 q->pidx -= q->size;
 2343                 q->gen ^= 1;
 2344         }
 2345 #ifdef T3_TRACE
 2346         T3_TRACE5(adap->tb[q->cntxt_id & 7],
 2347                   "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
 2348                   ndesc, pidx, skb->len, skb->len - skb->data_len,
 2349                   skb_shinfo(skb)->nr_frags);
 2350 #endif
 2351         TXQ_UNLOCK(qs);
 2352 
 2353         write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
 2354         check_ring_tx_db(adap, q, 1);
 2355         return (0);
 2356 }
 2357 
 2358 /**
 2359  *      restart_offloadq - restart a suspended offload queue
 2360  *      @qs: the queue set cotaining the offload queue
 2361  *
 2362  *      Resumes transmission on a suspended Tx offload queue.
 2363  */
 2364 static void
 2365 restart_offloadq(void *data, int npending)
 2366 {
 2367         struct mbuf *m;
 2368         struct sge_qset *qs = data;
 2369         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2370         adapter_t *adap = qs->port->adapter;
 2371         bus_dma_segment_t segs[TX_MAX_SEGS];
 2372         struct tx_sw_desc *stx = &q->sdesc[q->pidx];
 2373         int nsegs, cleaned;
 2374                 
 2375         TXQ_LOCK(qs);
 2376 again:  cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2377 
 2378         while ((m = mbufq_peek(&q->sendq)) != NULL) {
 2379                 unsigned int gen, pidx;
 2380                 unsigned int ndesc = m_get_priority(m);
 2381 
 2382                 if (__predict_false(q->size - q->in_use < ndesc)) {
 2383                         setbit(&qs->txq_stopped, TXQ_OFLD);
 2384                         if (should_restart_tx(q) &&
 2385                             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
 2386                                 goto again;
 2387                         q->stops++;
 2388                         break;
 2389                 }
 2390 
 2391                 gen = q->gen;
 2392                 q->in_use += ndesc;
 2393                 pidx = q->pidx;
 2394                 q->pidx += ndesc;
 2395                 if (q->pidx >= q->size) {
 2396                         q->pidx -= q->size;
 2397                         q->gen ^= 1;
 2398                 }
 2399                 
 2400                 (void)mbufq_dequeue(&q->sendq);
 2401                 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
 2402                 TXQ_UNLOCK(qs);
 2403                 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
 2404                 TXQ_LOCK(qs);
 2405         }
 2406 #if USE_GTS
 2407         set_bit(TXQ_RUNNING, &q->flags);
 2408         set_bit(TXQ_LAST_PKT_DB, &q->flags);
 2409 #endif
 2410         TXQ_UNLOCK(qs);
 2411         wmb();
 2412         t3_write_reg(adap, A_SG_KDOORBELL,
 2413                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 2414 }
 2415 
 2416 /**
 2417  *      queue_set - return the queue set a packet should use
 2418  *      @m: the packet
 2419  *
 2420  *      Maps a packet to the SGE queue set it should use.  The desired queue
 2421  *      set is carried in bits 1-3 in the packet's priority.
 2422  */
 2423 static __inline int
 2424 queue_set(const struct mbuf *m)
 2425 {
 2426         return m_get_priority(m) >> 1;
 2427 }
 2428 
 2429 /**
 2430  *      is_ctrl_pkt - return whether an offload packet is a control packet
 2431  *      @m: the packet
 2432  *
 2433  *      Determines whether an offload packet should use an OFLD or a CTRL
 2434  *      Tx queue.  This is indicated by bit 0 in the packet's priority.
 2435  */
 2436 static __inline int
 2437 is_ctrl_pkt(const struct mbuf *m)
 2438 {
 2439         return m_get_priority(m) & 1;
 2440 }
 2441 
 2442 /**
 2443  *      t3_offload_tx - send an offload packet
 2444  *      @tdev: the offload device to send to
 2445  *      @m: the packet
 2446  *
 2447  *      Sends an offload packet.  We use the packet priority to select the
 2448  *      appropriate Tx queue as follows: bit 0 indicates whether the packet
 2449  *      should be sent as regular or control, bits 1-3 select the queue set.
 2450  */
 2451 int
 2452 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
 2453 {
 2454         adapter_t *adap = tdev2adap(tdev);
 2455         struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
 2456 
 2457         if (__predict_false(is_ctrl_pkt(m))) 
 2458                 return ctrl_xmit(adap, qs, m);
 2459 
 2460         return ofld_xmit(adap, qs, m);
 2461 }
 2462 
 2463 /**
 2464  *      deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
 2465  *      @tdev: the offload device that will be receiving the packets
 2466  *      @q: the SGE response queue that assembled the bundle
 2467  *      @m: the partial bundle
 2468  *      @n: the number of packets in the bundle
 2469  *
 2470  *      Delivers a (partial) bundle of Rx offload packets to an offload device.
 2471  */
 2472 static __inline void
 2473 deliver_partial_bundle(struct t3cdev *tdev,
 2474                         struct sge_rspq *q,
 2475                         struct mbuf *mbufs[], int n)
 2476 {
 2477         if (n) {
 2478                 q->offload_bundles++;
 2479                 cxgb_ofld_recv(tdev, mbufs, n);
 2480         }
 2481 }
 2482 
 2483 static __inline int
 2484 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
 2485     struct mbuf *m, struct mbuf *rx_gather[],
 2486     unsigned int gather_idx)
 2487 {
 2488         
 2489         rq->offload_pkts++;
 2490         m->m_pkthdr.header = mtod(m, void *);
 2491         rx_gather[gather_idx++] = m;
 2492         if (gather_idx == RX_BUNDLE_SIZE) {
 2493                 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
 2494                 gather_idx = 0;
 2495                 rq->offload_bundles++;
 2496         }
 2497         return (gather_idx);
 2498 }
 2499 
 2500 static void
 2501 restart_tx(struct sge_qset *qs)
 2502 {
 2503         struct adapter *sc = qs->port->adapter;
 2504         
 2505         
 2506         if (isset(&qs->txq_stopped, TXQ_OFLD) &&
 2507             should_restart_tx(&qs->txq[TXQ_OFLD]) &&
 2508             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
 2509                 qs->txq[TXQ_OFLD].restarts++;
 2510                 DPRINTF("restarting TXQ_OFLD\n");
 2511                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2512         }
 2513         DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
 2514             qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
 2515             qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
 2516             qs->txq[TXQ_CTRL].in_use);
 2517         
 2518         if (isset(&qs->txq_stopped, TXQ_CTRL) &&
 2519             should_restart_tx(&qs->txq[TXQ_CTRL]) &&
 2520             test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
 2521                 qs->txq[TXQ_CTRL].restarts++;
 2522                 DPRINTF("restarting TXQ_CTRL\n");
 2523                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2524         }
 2525 }
 2526 
 2527 /**
 2528  *      t3_sge_alloc_qset - initialize an SGE queue set
 2529  *      @sc: the controller softc
 2530  *      @id: the queue set id
 2531  *      @nports: how many Ethernet ports will be using this queue set
 2532  *      @irq_vec_idx: the IRQ vector index for response queue interrupts
 2533  *      @p: configuration parameters for this queue set
 2534  *      @ntxq: number of Tx queues for the queue set
 2535  *      @pi: port info for queue set
 2536  *
 2537  *      Allocate resources and initialize an SGE queue set.  A queue set
 2538  *      comprises a response queue, two Rx free-buffer queues, and up to 3
 2539  *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
 2540  *      queue, offload queue, and control queue.
 2541  */
 2542 int
 2543 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
 2544                   const struct qset_params *p, int ntxq, struct port_info *pi)
 2545 {
 2546         struct sge_qset *q = &sc->sge.qs[id];
 2547         int i, ret = 0;
 2548 
 2549         MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
 2550         q->port = pi;
 2551 
 2552         if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
 2553             M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
 2554                 device_printf(sc->dev, "failed to allocate mbuf ring\n");
 2555                 goto err;
 2556         }
 2557         if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
 2558             M_NOWAIT | M_ZERO)) == NULL) {
 2559                 device_printf(sc->dev, "failed to allocate ifq\n");
 2560                 goto err;
 2561         }
 2562         ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);     
 2563         callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
 2564         callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
 2565         q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
 2566         q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
 2567 
 2568         init_qset_cntxt(q, id);
 2569         q->idx = id;
 2570         if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
 2571                     sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
 2572                     &q->fl[0].desc, &q->fl[0].sdesc,
 2573                     &q->fl[0].desc_tag, &q->fl[0].desc_map,
 2574                     sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
 2575                 printf("error %d from alloc ring fl0\n", ret);
 2576                 goto err;
 2577         }
 2578 
 2579         if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
 2580                     sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
 2581                     &q->fl[1].desc, &q->fl[1].sdesc,
 2582                     &q->fl[1].desc_tag, &q->fl[1].desc_map,
 2583                     sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
 2584                 printf("error %d from alloc ring fl1\n", ret);
 2585                 goto err;
 2586         }
 2587 
 2588         if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
 2589                     &q->rspq.phys_addr, &q->rspq.desc, NULL,
 2590                     &q->rspq.desc_tag, &q->rspq.desc_map,
 2591                     NULL, NULL)) != 0) {
 2592                 printf("error %d from alloc ring rspq\n", ret);
 2593                 goto err;
 2594         }
 2595 
 2596         for (i = 0; i < ntxq; ++i) {
 2597                 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
 2598 
 2599                 if ((ret = alloc_ring(sc, p->txq_size[i],
 2600                             sizeof(struct tx_desc), sz,
 2601                             &q->txq[i].phys_addr, &q->txq[i].desc,
 2602                             &q->txq[i].sdesc, &q->txq[i].desc_tag,
 2603                             &q->txq[i].desc_map,
 2604                             sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
 2605                         printf("error %d from alloc ring tx %i\n", ret, i);
 2606                         goto err;
 2607                 }
 2608                 mbufq_init(&q->txq[i].sendq);
 2609                 q->txq[i].gen = 1;
 2610                 q->txq[i].size = p->txq_size[i];
 2611         }
 2612         
 2613         TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
 2614         TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
 2615         TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2616         TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2617 
 2618         q->fl[0].gen = q->fl[1].gen = 1;
 2619         q->fl[0].size = p->fl_size;
 2620         q->fl[1].size = p->jumbo_size;
 2621 
 2622         q->rspq.gen = 1;
 2623         q->rspq.cidx = 0;
 2624         q->rspq.size = p->rspq_size;
 2625 
 2626         q->txq[TXQ_ETH].stop_thres = nports *
 2627             flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
 2628 
 2629         q->fl[0].buf_size = MCLBYTES;
 2630         q->fl[0].zone = zone_pack;
 2631         q->fl[0].type = EXT_PACKET;
 2632 
 2633         if (p->jumbo_buf_size ==  MJUM16BYTES) {
 2634                 q->fl[1].zone = zone_jumbo16;
 2635                 q->fl[1].type = EXT_JUMBO16;
 2636         } else if (p->jumbo_buf_size ==  MJUM9BYTES) {
 2637                 q->fl[1].zone = zone_jumbo9;
 2638                 q->fl[1].type = EXT_JUMBO9;             
 2639         } else if (p->jumbo_buf_size ==  MJUMPAGESIZE) {
 2640                 q->fl[1].zone = zone_jumbop;
 2641                 q->fl[1].type = EXT_JUMBOP;
 2642         } else {
 2643                 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
 2644                 ret = EDOOFUS;
 2645                 goto err;
 2646         }
 2647         q->fl[1].buf_size = p->jumbo_buf_size;
 2648 
 2649         /* Allocate and setup the lro_ctrl structure */
 2650         q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
 2651 #ifdef INET
 2652         ret = tcp_lro_init(&q->lro.ctrl);
 2653         if (ret) {
 2654                 printf("error %d from tcp_lro_init\n", ret);
 2655                 goto err;
 2656         }
 2657 #endif
 2658         q->lro.ctrl.ifp = pi->ifp;
 2659 
 2660         mtx_lock_spin(&sc->sge.reg_lock);
 2661         ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
 2662                                    q->rspq.phys_addr, q->rspq.size,
 2663                                    q->fl[0].buf_size, 1, 0);
 2664         if (ret) {
 2665                 printf("error %d from t3_sge_init_rspcntxt\n", ret);
 2666                 goto err_unlock;
 2667         }
 2668 
 2669         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2670                 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
 2671                                           q->fl[i].phys_addr, q->fl[i].size,
 2672                                           q->fl[i].buf_size, p->cong_thres, 1,
 2673                                           0);
 2674                 if (ret) {
 2675                         printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
 2676                         goto err_unlock;
 2677                 }
 2678         }
 2679 
 2680         ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
 2681                                  SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
 2682                                  q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
 2683                                  1, 0);
 2684         if (ret) {
 2685                 printf("error %d from t3_sge_init_ecntxt\n", ret);
 2686                 goto err_unlock;
 2687         }
 2688 
 2689         if (ntxq > 1) {
 2690                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
 2691                                          USE_GTS, SGE_CNTXT_OFLD, id,
 2692                                          q->txq[TXQ_OFLD].phys_addr,
 2693                                          q->txq[TXQ_OFLD].size, 0, 1, 0);
 2694                 if (ret) {
 2695                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2696                         goto err_unlock;
 2697                 }
 2698         }
 2699 
 2700         if (ntxq > 2) {
 2701                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
 2702                                          SGE_CNTXT_CTRL, id,
 2703                                          q->txq[TXQ_CTRL].phys_addr,
 2704                                          q->txq[TXQ_CTRL].size,
 2705                                          q->txq[TXQ_CTRL].token, 1, 0);
 2706                 if (ret) {
 2707                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2708                         goto err_unlock;
 2709                 }
 2710         }
 2711         
 2712         snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
 2713             device_get_unit(sc->dev), irq_vec_idx);
 2714         MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
 2715         
 2716         mtx_unlock_spin(&sc->sge.reg_lock);
 2717         t3_update_qset_coalesce(q, p);
 2718         q->port = pi;
 2719         
 2720         refill_fl(sc, &q->fl[0], q->fl[0].size);
 2721         refill_fl(sc, &q->fl[1], q->fl[1].size);
 2722         refill_rspq(sc, &q->rspq, q->rspq.size - 1);
 2723 
 2724         t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
 2725                      V_NEWTIMER(q->rspq.holdoff_tmr));
 2726 
 2727         return (0);
 2728 
 2729 err_unlock:
 2730         mtx_unlock_spin(&sc->sge.reg_lock);
 2731 err:    
 2732         TXQ_LOCK(q);
 2733         t3_free_qset(sc, q);
 2734 
 2735         return (ret);
 2736 }
 2737 
 2738 /*
 2739  * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
 2740  * ethernet data.  Hardware assistance with various checksums and any vlan tag
 2741  * will also be taken into account here.
 2742  */
 2743 void
 2744 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
 2745 {
 2746         struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
 2747         struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
 2748         struct ifnet *ifp = pi->ifp;
 2749         
 2750         DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
 2751 
 2752         if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
 2753             cpl->csum_valid && cpl->csum == 0xffff) {
 2754                 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
 2755                 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
 2756                 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
 2757                 m->m_pkthdr.csum_data = 0xffff;
 2758         }
 2759 
 2760         if (cpl->vlan_valid) {
 2761                 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
 2762                 m->m_flags |= M_VLANTAG;
 2763         } 
 2764 
 2765         m->m_pkthdr.rcvif = ifp;
 2766         m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
 2767         /*
 2768          * adjust after conversion to mbuf chain
 2769          */
 2770         m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
 2771         m->m_len -= (sizeof(*cpl) + ethpad);
 2772         m->m_data += (sizeof(*cpl) + ethpad);
 2773 }
 2774 
 2775 /**
 2776  *      get_packet - return the next ingress packet buffer from a free list
 2777  *      @adap: the adapter that received the packet
 2778  *      @drop_thres: # of remaining buffers before we start dropping packets
 2779  *      @qs: the qset that the SGE free list holding the packet belongs to
 2780  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
 2781  *      @r: response descriptor 
 2782  *
 2783  *      Get the next packet from a free list and complete setup of the
 2784  *      sk_buff.  If the packet is small we make a copy and recycle the
 2785  *      original buffer, otherwise we use the original buffer itself.  If a
 2786  *      positive drop threshold is supplied packets are dropped and their
 2787  *      buffers recycled if (a) the number of remaining buffers is under the
 2788  *      threshold and the packet is too big to copy, or (b) the packet should
 2789  *      be copied but there is no memory for the copy.
 2790  */
 2791 static int
 2792 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
 2793     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
 2794 {
 2795 
 2796         unsigned int len_cq =  ntohl(r->len_cq);
 2797         struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
 2798         int mask, cidx = fl->cidx;
 2799         struct rx_sw_desc *sd = &fl->sdesc[cidx];
 2800         uint32_t len = G_RSPD_LEN(len_cq);
 2801         uint32_t flags = M_EXT;
 2802         uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
 2803         caddr_t cl;
 2804         struct mbuf *m;
 2805         int ret = 0;
 2806 
 2807         mask = fl->size - 1;
 2808         prefetch(fl->sdesc[(cidx + 1) & mask].m);
 2809         prefetch(fl->sdesc[(cidx + 2) & mask].m);
 2810         prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
 2811         prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 
 2812 
 2813         fl->credits--;
 2814         bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
 2815         
 2816         if (recycle_enable && len <= SGE_RX_COPY_THRES &&
 2817             sopeop == RSPQ_SOP_EOP) {
 2818                 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
 2819                         goto skip_recycle;
 2820                 cl = mtod(m, void *);
 2821                 memcpy(cl, sd->rxsd_cl, len);
 2822                 recycle_rx_buf(adap, fl, fl->cidx);
 2823                 m->m_pkthdr.len = m->m_len = len;
 2824                 m->m_flags = 0;
 2825                 mh->mh_head = mh->mh_tail = m;
 2826                 ret = 1;
 2827                 goto done;
 2828         } else {
 2829         skip_recycle:
 2830                 bus_dmamap_unload(fl->entry_tag, sd->map);
 2831                 cl = sd->rxsd_cl;
 2832                 m = sd->m;
 2833 
 2834                 if ((sopeop == RSPQ_SOP_EOP) ||
 2835                     (sopeop == RSPQ_SOP))
 2836                         flags |= M_PKTHDR;
 2837                 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags);
 2838                 if (fl->zone == zone_pack) {
 2839                         /*
 2840                          * restore clobbered data pointer
 2841                          */
 2842                         m->m_data = m->m_ext.ext_buf;
 2843                 } else {
 2844                         m_cljset(m, cl, fl->type);
 2845                 }
 2846                 m->m_len = len;
 2847         }               
 2848         switch(sopeop) {
 2849         case RSPQ_SOP_EOP:
 2850                 ret = 1;
 2851                 /* FALLTHROUGH */
 2852         case RSPQ_SOP:
 2853                 mh->mh_head = mh->mh_tail = m;
 2854                 m->m_pkthdr.len = len;
 2855                 break;
 2856         case RSPQ_EOP:
 2857                 ret = 1;
 2858                 /* FALLTHROUGH */
 2859         case RSPQ_NSOP_NEOP:
 2860                 if (mh->mh_tail == NULL) {
 2861                         log(LOG_ERR, "discarding intermediate descriptor entry\n");
 2862                         m_freem(m);
 2863                         break;
 2864                 }
 2865                 mh->mh_tail->m_next = m;
 2866                 mh->mh_tail = m;
 2867                 mh->mh_head->m_pkthdr.len += len;
 2868                 break;
 2869         }
 2870         if (cxgb_debug)
 2871                 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
 2872 done:
 2873         if (++fl->cidx == fl->size)
 2874                 fl->cidx = 0;
 2875 
 2876         return (ret);
 2877 }
 2878 
 2879 /**
 2880  *      handle_rsp_cntrl_info - handles control information in a response
 2881  *      @qs: the queue set corresponding to the response
 2882  *      @flags: the response control flags
 2883  *
 2884  *      Handles the control information of an SGE response, such as GTS
 2885  *      indications and completion credits for the queue set's Tx queues.
 2886  *      HW coalesces credits, we don't do any extra SW coalescing.
 2887  */
 2888 static __inline void
 2889 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
 2890 {
 2891         unsigned int credits;
 2892 
 2893 #if USE_GTS
 2894         if (flags & F_RSPD_TXQ0_GTS)
 2895                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
 2896 #endif
 2897         credits = G_RSPD_TXQ0_CR(flags);
 2898         if (credits) 
 2899                 qs->txq[TXQ_ETH].processed += credits;
 2900 
 2901         credits = G_RSPD_TXQ2_CR(flags);
 2902         if (credits)
 2903                 qs->txq[TXQ_CTRL].processed += credits;
 2904 
 2905 # if USE_GTS
 2906         if (flags & F_RSPD_TXQ1_GTS)
 2907                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
 2908 # endif
 2909         credits = G_RSPD_TXQ1_CR(flags);
 2910         if (credits)
 2911                 qs->txq[TXQ_OFLD].processed += credits;
 2912 
 2913 }
 2914 
 2915 static void
 2916 check_ring_db(adapter_t *adap, struct sge_qset *qs,
 2917     unsigned int sleeping)
 2918 {
 2919         ;
 2920 }
 2921 
 2922 /**
 2923  *      process_responses - process responses from an SGE response queue
 2924  *      @adap: the adapter
 2925  *      @qs: the queue set to which the response queue belongs
 2926  *      @budget: how many responses can be processed in this round
 2927  *
 2928  *      Process responses from an SGE response queue up to the supplied budget.
 2929  *      Responses include received packets as well as credits and other events
 2930  *      for the queues that belong to the response queue's queue set.
 2931  *      A negative budget is effectively unlimited.
 2932  *
 2933  *      Additionally choose the interrupt holdoff time for the next interrupt
 2934  *      on this queue.  If the system is under memory shortage use a fairly
 2935  *      long delay to help recovery.
 2936  */
 2937 static int
 2938 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
 2939 {
 2940         struct sge_rspq *rspq = &qs->rspq;
 2941         struct rsp_desc *r = &rspq->desc[rspq->cidx];
 2942         int budget_left = budget;
 2943         unsigned int sleeping = 0;
 2944         int lro_enabled = qs->lro.enabled;
 2945         int skip_lro;
 2946         struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
 2947         struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
 2948         int ngathered = 0;
 2949         struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
 2950 #ifdef DEBUG    
 2951         static int last_holdoff = 0;
 2952         if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
 2953                 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
 2954                 last_holdoff = rspq->holdoff_tmr;
 2955         }
 2956 #endif
 2957         rspq->next_holdoff = rspq->holdoff_tmr;
 2958 
 2959         while (__predict_true(budget_left && is_new_response(r, rspq))) {
 2960                 int eth, eop = 0, ethpad = 0;
 2961                 uint32_t flags = ntohl(r->flags);
 2962                 uint32_t rss_csum = *(const uint32_t *)r;
 2963                 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
 2964                 
 2965                 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
 2966                 
 2967                 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
 2968                         struct mbuf *m;
 2969 
 2970                         if (cxgb_debug)
 2971                                 printf("async notification\n");
 2972 
 2973                         if (mh->mh_head == NULL) {
 2974                                 mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
 2975                                 m = mh->mh_head;
 2976                         } else {
 2977                                 m = m_gethdr(M_DONTWAIT, MT_DATA);
 2978                         }
 2979                         if (m == NULL)
 2980                                 goto no_mem;
 2981 
 2982                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
 2983                         m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
 2984                         *mtod(m, char *) = CPL_ASYNC_NOTIF;
 2985                         rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
 2986                         eop = 1;
 2987                         rspq->async_notif++;
 2988                         goto skip;
 2989                 } else if  (flags & F_RSPD_IMM_DATA_VALID) {
 2990                         struct mbuf *m = NULL;
 2991 
 2992                         DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
 2993                             r->rss_hdr.opcode, rspq->cidx);
 2994                         if (mh->mh_head == NULL)
 2995                                 mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
 2996                         else 
 2997                                 m = m_gethdr(M_DONTWAIT, MT_DATA);
 2998 
 2999                         if (mh->mh_head == NULL &&  m == NULL) {        
 3000                 no_mem:
 3001                                 rspq->next_holdoff = NOMEM_INTR_DELAY;
 3002                                 budget_left--;
 3003                                 break;
 3004                         }
 3005                         get_imm_packet(adap, r, mh->mh_head);
 3006                         eop = 1;
 3007                         rspq->imm_data++;
 3008                 } else if (r->len_cq) {
 3009                         int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
 3010                         
 3011                         eop = get_packet(adap, drop_thresh, qs, mh, r);
 3012                         if (eop) {
 3013                                 if (r->rss_hdr.hash_type && !adap->timestamp)
 3014                                         mh->mh_head->m_flags |= M_FLOWID;
 3015                                 mh->mh_head->m_pkthdr.flowid = rss_hash;
 3016                         }
 3017                         
 3018                         ethpad = 2;
 3019                 } else {
 3020                         rspq->pure_rsps++;
 3021                 }
 3022         skip:
 3023                 if (flags & RSPD_CTRL_MASK) {
 3024                         sleeping |= flags & RSPD_GTS_MASK;
 3025                         handle_rsp_cntrl_info(qs, flags);
 3026                 }
 3027 
 3028                 r++;
 3029                 if (__predict_false(++rspq->cidx == rspq->size)) {
 3030                         rspq->cidx = 0;
 3031                         rspq->gen ^= 1;
 3032                         r = rspq->desc;
 3033                 }
 3034 
 3035                 if (++rspq->credits >= 64) {
 3036                         refill_rspq(adap, rspq, rspq->credits);
 3037                         rspq->credits = 0;
 3038                 }
 3039                 if (!eth && eop) {
 3040                         mh->mh_head->m_pkthdr.csum_data = rss_csum;
 3041                         /*
 3042                          * XXX size mismatch
 3043                          */
 3044                         m_set_priority(mh->mh_head, rss_hash);
 3045 
 3046                         
 3047                         ngathered = rx_offload(&adap->tdev, rspq,
 3048                             mh->mh_head, offload_mbufs, ngathered);
 3049                         mh->mh_head = NULL;
 3050                         DPRINTF("received offload packet\n");
 3051                         
 3052                 } else if (eth && eop) {
 3053                         struct mbuf *m = mh->mh_head;
 3054 
 3055                         t3_rx_eth(adap, rspq, m, ethpad);
 3056 
 3057                         /*
 3058                          * The T304 sends incoming packets on any qset.  If LRO
 3059                          * is also enabled, we could end up sending packet up
 3060                          * lro_ctrl->ifp's input.  That is incorrect.
 3061                          *
 3062                          * The mbuf's rcvif was derived from the cpl header and
 3063                          * is accurate.  Skip LRO and just use that.
 3064                          */
 3065                         skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
 3066 
 3067                         if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
 3068 #ifdef INET
 3069                             && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
 3070 #endif
 3071                             ) {
 3072                                 /* successfully queue'd for LRO */
 3073                         } else {
 3074                                 /*
 3075                                  * LRO not enabled, packet unsuitable for LRO,
 3076                                  * or unable to queue.  Pass it up right now in
 3077                                  * either case.
 3078                                  */
 3079                                 struct ifnet *ifp = m->m_pkthdr.rcvif;
 3080                                 (*ifp->if_input)(ifp, m);
 3081                         }
 3082                         mh->mh_head = NULL;
 3083 
 3084                 }
 3085                 __refill_fl_lt(adap, &qs->fl[0], 32);
 3086                 __refill_fl_lt(adap, &qs->fl[1], 32);
 3087                 --budget_left;
 3088         }
 3089 
 3090         deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
 3091 
 3092 #ifdef INET
 3093         /* Flush LRO */
 3094         while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
 3095                 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
 3096                 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
 3097                 tcp_lro_flush(lro_ctrl, queued);
 3098         }
 3099 #endif
 3100 
 3101         if (sleeping)
 3102                 check_ring_db(adap, qs, sleeping);
 3103 
 3104         mb();  /* commit Tx queue processed updates */
 3105         if (__predict_false(qs->txq_stopped > 1))
 3106                 restart_tx(qs);
 3107 
 3108         __refill_fl_lt(adap, &qs->fl[0], 512);
 3109         __refill_fl_lt(adap, &qs->fl[1], 512);
 3110         budget -= budget_left;
 3111         return (budget);
 3112 }
 3113 
 3114 /*
 3115  * A helper function that processes responses and issues GTS.
 3116  */
 3117 static __inline int
 3118 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
 3119 {
 3120         int work;
 3121         static int last_holdoff = 0;
 3122         
 3123         work = process_responses(adap, rspq_to_qset(rq), -1);
 3124 
 3125         if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
 3126                 printf("next_holdoff=%d\n", rq->next_holdoff);
 3127                 last_holdoff = rq->next_holdoff;
 3128         }
 3129         t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
 3130             V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
 3131         
 3132         return (work);
 3133 }
 3134 
 3135 
 3136 /*
 3137  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
 3138  * Handles data events from SGE response queues as well as error and other
 3139  * async events as they all use the same interrupt pin.  We use one SGE
 3140  * response queue per port in this mode and protect all response queues with
 3141  * queue 0's lock.
 3142  */
 3143 void
 3144 t3b_intr(void *data)
 3145 {
 3146         uint32_t i, map;
 3147         adapter_t *adap = data;
 3148         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3149         
 3150         t3_write_reg(adap, A_PL_CLI, 0);
 3151         map = t3_read_reg(adap, A_SG_DATA_INTR);
 3152 
 3153         if (!map) 
 3154                 return;
 3155 
 3156         if (__predict_false(map & F_ERRINTR)) {
 3157                 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
 3158                 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
 3159                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3160         }
 3161 
 3162         mtx_lock(&q0->lock);
 3163         for_each_port(adap, i)
 3164             if (map & (1 << i))
 3165                         process_responses_gts(adap, &adap->sge.qs[i].rspq);
 3166         mtx_unlock(&q0->lock);
 3167 }
 3168 
 3169 /*
 3170  * The MSI interrupt handler.  This needs to handle data events from SGE
 3171  * response queues as well as error and other async events as they all use
 3172  * the same MSI vector.  We use one SGE response queue per port in this mode
 3173  * and protect all response queues with queue 0's lock.
 3174  */
 3175 void
 3176 t3_intr_msi(void *data)
 3177 {
 3178         adapter_t *adap = data;
 3179         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3180         int i, new_packets = 0;
 3181 
 3182         mtx_lock(&q0->lock);
 3183 
 3184         for_each_port(adap, i)
 3185             if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 
 3186                     new_packets = 1;
 3187         mtx_unlock(&q0->lock);
 3188         if (new_packets == 0) {
 3189                 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
 3190                 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
 3191                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3192         }
 3193 }
 3194 
 3195 void
 3196 t3_intr_msix(void *data)
 3197 {
 3198         struct sge_qset *qs = data;
 3199         adapter_t *adap = qs->port->adapter;
 3200         struct sge_rspq *rspq = &qs->rspq;
 3201 
 3202         if (process_responses_gts(adap, rspq) == 0)
 3203                 rspq->unhandled_irqs++;
 3204 }
 3205 
 3206 #define QDUMP_SBUF_SIZE         32 * 400
 3207 static int
 3208 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
 3209 {
 3210         struct sge_rspq *rspq;
 3211         struct sge_qset *qs;
 3212         int i, err, dump_end, idx;
 3213         struct sbuf *sb;
 3214         struct rsp_desc *rspd;
 3215         uint32_t data[4];
 3216         
 3217         rspq = arg1;
 3218         qs = rspq_to_qset(rspq);
 3219         if (rspq->rspq_dump_count == 0) 
 3220                 return (0);
 3221         if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
 3222                 log(LOG_WARNING,
 3223                     "dump count is too large %d\n", rspq->rspq_dump_count);
 3224                 rspq->rspq_dump_count = 0;
 3225                 return (EINVAL);
 3226         }
 3227         if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
 3228                 log(LOG_WARNING,
 3229                     "dump start of %d is greater than queue size\n",
 3230                     rspq->rspq_dump_start);
 3231                 rspq->rspq_dump_start = 0;
 3232                 return (EINVAL);
 3233         }
 3234         err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
 3235         if (err)
 3236                 return (err);
 3237         err = sysctl_wire_old_buffer(req, 0);
 3238         if (err)
 3239                 return (err);
 3240         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3241 
 3242         sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
 3243             (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
 3244             ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
 3245         sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
 3246             ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
 3247         
 3248         sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
 3249             (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
 3250         
 3251         dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
 3252         for (i = rspq->rspq_dump_start; i < dump_end; i++) {
 3253                 idx = i & (RSPQ_Q_SIZE-1);
 3254                 
 3255                 rspd = &rspq->desc[idx];
 3256                 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
 3257                     idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
 3258                     rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
 3259                 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
 3260                     rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
 3261                     be32toh(rspd->len_cq), rspd->intr_gen);
 3262         }
 3263 
 3264         err = sbuf_finish(sb);
 3265         /* Output a trailing NUL. */
 3266         if (err == 0)
 3267                 err = SYSCTL_OUT(req, "", 1);
 3268         sbuf_delete(sb);
 3269         return (err);
 3270 }       
 3271 
 3272 static int
 3273 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
 3274 {
 3275         struct sge_txq *txq;
 3276         struct sge_qset *qs;
 3277         int i, j, err, dump_end;
 3278         struct sbuf *sb;
 3279         struct tx_desc *txd;
 3280         uint32_t *WR, wr_hi, wr_lo, gen;
 3281         uint32_t data[4];
 3282         
 3283         txq = arg1;
 3284         qs = txq_to_qset(txq, TXQ_ETH);
 3285         if (txq->txq_dump_count == 0) {
 3286                 return (0);
 3287         }
 3288         if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
 3289                 log(LOG_WARNING,
 3290                     "dump count is too large %d\n", txq->txq_dump_count);
 3291                 txq->txq_dump_count = 1;
 3292                 return (EINVAL);
 3293         }
 3294         if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
 3295                 log(LOG_WARNING,
 3296                     "dump start of %d is greater than queue size\n",
 3297                     txq->txq_dump_start);
 3298                 txq->txq_dump_start = 0;
 3299                 return (EINVAL);
 3300         }
 3301         err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
 3302         if (err)
 3303                 return (err);
 3304         err = sysctl_wire_old_buffer(req, 0);
 3305         if (err)
 3306                 return (err);
 3307         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3308 
 3309         sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
 3310             (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 
 3311             (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
 3312         sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
 3313             ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
 3314             ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
 3315         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3316             txq->txq_dump_start,
 3317             (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
 3318 
 3319         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3320         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3321                 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
 3322                 WR = (uint32_t *)txd->flit;
 3323                 wr_hi = ntohl(WR[0]);
 3324                 wr_lo = ntohl(WR[1]);           
 3325                 gen = G_WR_GEN(wr_lo);
 3326                 
 3327                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3328                     wr_hi, wr_lo, gen);
 3329                 for (j = 2; j < 30; j += 4) 
 3330                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3331                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3332 
 3333         }
 3334         err = sbuf_finish(sb);
 3335         /* Output a trailing NUL. */
 3336         if (err == 0)
 3337                 err = SYSCTL_OUT(req, "", 1);
 3338         sbuf_delete(sb);
 3339         return (err);
 3340 }
 3341 
 3342 static int
 3343 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
 3344 {
 3345         struct sge_txq *txq;
 3346         struct sge_qset *qs;
 3347         int i, j, err, dump_end;
 3348         struct sbuf *sb;
 3349         struct tx_desc *txd;
 3350         uint32_t *WR, wr_hi, wr_lo, gen;
 3351         
 3352         txq = arg1;
 3353         qs = txq_to_qset(txq, TXQ_CTRL);
 3354         if (txq->txq_dump_count == 0) {
 3355                 return (0);
 3356         }
 3357         if (txq->txq_dump_count > 256) {
 3358                 log(LOG_WARNING,
 3359                     "dump count is too large %d\n", txq->txq_dump_count);
 3360                 txq->txq_dump_count = 1;
 3361                 return (EINVAL);
 3362         }
 3363         if (txq->txq_dump_start > 255) {
 3364                 log(LOG_WARNING,
 3365                     "dump start of %d is greater than queue size\n",
 3366                     txq->txq_dump_start);
 3367                 txq->txq_dump_start = 0;
 3368                 return (EINVAL);
 3369         }
 3370 
 3371         err = sysctl_wire_old_buffer(req, 0);
 3372         if (err != 0)
 3373                 return (err);
 3374         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3375         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3376             txq->txq_dump_start,
 3377             (txq->txq_dump_start + txq->txq_dump_count) & 255);
 3378 
 3379         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3380         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3381                 txd = &txq->desc[i & (255)];
 3382                 WR = (uint32_t *)txd->flit;
 3383                 wr_hi = ntohl(WR[0]);
 3384                 wr_lo = ntohl(WR[1]);           
 3385                 gen = G_WR_GEN(wr_lo);
 3386                 
 3387                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3388                     wr_hi, wr_lo, gen);
 3389                 for (j = 2; j < 30; j += 4) 
 3390                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3391                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3392 
 3393         }
 3394         err = sbuf_finish(sb);
 3395         /* Output a trailing NUL. */
 3396         if (err == 0)
 3397                 err = SYSCTL_OUT(req, "", 1);
 3398         sbuf_delete(sb);
 3399         return (err);
 3400 }
 3401 
 3402 static int
 3403 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
 3404 {
 3405         adapter_t *sc = arg1;
 3406         struct qset_params *qsp = &sc->params.sge.qset[0]; 
 3407         int coalesce_usecs;     
 3408         struct sge_qset *qs;
 3409         int i, j, err, nqsets = 0;
 3410         struct mtx *lock;
 3411 
 3412         if ((sc->flags & FULL_INIT_DONE) == 0)
 3413                 return (ENXIO);
 3414                 
 3415         coalesce_usecs = qsp->coalesce_usecs;
 3416         err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
 3417 
 3418         if (err != 0) {
 3419                 return (err);
 3420         }
 3421         if (coalesce_usecs == qsp->coalesce_usecs)
 3422                 return (0);
 3423 
 3424         for (i = 0; i < sc->params.nports; i++) 
 3425                 for (j = 0; j < sc->port[i].nqsets; j++)
 3426                         nqsets++;
 3427 
 3428         coalesce_usecs = max(1, coalesce_usecs);
 3429 
 3430         for (i = 0; i < nqsets; i++) {
 3431                 qs = &sc->sge.qs[i];
 3432                 qsp = &sc->params.sge.qset[i];
 3433                 qsp->coalesce_usecs = coalesce_usecs;
 3434                 
 3435                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 3436                             &sc->sge.qs[0].rspq.lock;
 3437 
 3438                 mtx_lock(lock);
 3439                 t3_update_qset_coalesce(qs, qsp);
 3440                 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
 3441                     V_NEWTIMER(qs->rspq.holdoff_tmr));
 3442                 mtx_unlock(lock);
 3443         }
 3444 
 3445         return (0);
 3446 }
 3447 
 3448 static int
 3449 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)
 3450 {
 3451         adapter_t *sc = arg1;
 3452         int rc, timestamp;
 3453 
 3454         if ((sc->flags & FULL_INIT_DONE) == 0)
 3455                 return (ENXIO);
 3456 
 3457         timestamp = sc->timestamp;
 3458         rc = sysctl_handle_int(oidp, &timestamp, arg2, req);
 3459 
 3460         if (rc != 0)
 3461                 return (rc);
 3462 
 3463         if (timestamp != sc->timestamp) {
 3464                 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS,
 3465                     timestamp ? F_ENABLERXPKTTMSTPRSS : 0);
 3466                 sc->timestamp = timestamp;
 3467         }
 3468 
 3469         return (0);
 3470 }
 3471 
 3472 void
 3473 t3_add_attach_sysctls(adapter_t *sc)
 3474 {
 3475         struct sysctl_ctx_list *ctx;
 3476         struct sysctl_oid_list *children;
 3477 
 3478         ctx = device_get_sysctl_ctx(sc->dev);
 3479         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3480 
 3481         /* random information */
 3482         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3483             "firmware_version",
 3484             CTLFLAG_RD, &sc->fw_version,
 3485             0, "firmware version");
 3486         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3487             "hw_revision",
 3488             CTLFLAG_RD, &sc->params.rev,
 3489             0, "chip model");
 3490         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3491             "port_types",
 3492             CTLFLAG_RD, &sc->port_types,
 3493             0, "type of ports");
 3494         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3495             "enable_debug",
 3496             CTLFLAG_RW, &cxgb_debug,
 3497             0, "enable verbose debugging output");
 3498         SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce",
 3499             CTLFLAG_RD, &sc->tunq_coalesce,
 3500             "#tunneled packets freed");
 3501         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3502             "txq_overrun",
 3503             CTLFLAG_RD, &txq_fills,
 3504             0, "#times txq overrun");
 3505         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3506             "core_clock",
 3507             CTLFLAG_RD, &sc->params.vpd.cclk,
 3508             0, "core clock frequency (in KHz)");
 3509 }
 3510 
 3511 
 3512 static const char *rspq_name = "rspq";
 3513 static const char *txq_names[] =
 3514 {
 3515         "txq_eth",
 3516         "txq_ofld",
 3517         "txq_ctrl"      
 3518 };
 3519 
 3520 static int
 3521 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
 3522 {
 3523         struct port_info *p = arg1;
 3524         uint64_t *parg;
 3525 
 3526         if (!p)
 3527                 return (EINVAL);
 3528 
 3529         parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
 3530         PORT_LOCK(p);
 3531         t3_mac_update_stats(&p->mac);
 3532         PORT_UNLOCK(p);
 3533 
 3534         return (sysctl_handle_quad(oidp, parg, 0, req));
 3535 }
 3536 
 3537 void
 3538 t3_add_configured_sysctls(adapter_t *sc)
 3539 {
 3540         struct sysctl_ctx_list *ctx;
 3541         struct sysctl_oid_list *children;
 3542         int i, j;
 3543         
 3544         ctx = device_get_sysctl_ctx(sc->dev);
 3545         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3546 
 3547         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3548             "intr_coal",
 3549             CTLTYPE_INT|CTLFLAG_RW, sc,
 3550             0, t3_set_coalesce_usecs,
 3551             "I", "interrupt coalescing timer (us)");
 3552 
 3553         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3554             "pkt_timestamp",
 3555             CTLTYPE_INT | CTLFLAG_RW, sc,
 3556             0, t3_pkt_timestamp,
 3557             "I", "provide packet timestamp instead of connection hash");
 3558 
 3559         for (i = 0; i < sc->params.nports; i++) {
 3560                 struct port_info *pi = &sc->port[i];
 3561                 struct sysctl_oid *poid;
 3562                 struct sysctl_oid_list *poidlist;
 3563                 struct mac_stats *mstats = &pi->mac.stats;
 3564                 
 3565                 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
 3566                 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 
 3567                     pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
 3568                 poidlist = SYSCTL_CHILDREN(poid);
 3569                 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO, 
 3570                     "nqsets", CTLFLAG_RD, &pi->nqsets,
 3571                     0, "#queue sets");
 3572 
 3573                 for (j = 0; j < pi->nqsets; j++) {
 3574                         struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
 3575                         struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
 3576                                           *ctrlqpoid, *lropoid;
 3577                         struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
 3578                                                *txqpoidlist, *ctrlqpoidlist,
 3579                                                *lropoidlist;
 3580                         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 3581                         
 3582                         snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
 3583                         
 3584                         qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 
 3585                             qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
 3586                         qspoidlist = SYSCTL_CHILDREN(qspoid);
 3587 
 3588                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
 3589                                         CTLFLAG_RD, &qs->fl[0].empty, 0,
 3590                                         "freelist #0 empty");
 3591                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
 3592                                         CTLFLAG_RD, &qs->fl[1].empty, 0,
 3593                                         "freelist #1 empty");
 3594 
 3595                         rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3596                             rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
 3597                         rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
 3598 
 3599                         txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3600                             txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
 3601                         txqpoidlist = SYSCTL_CHILDREN(txqpoid);
 3602 
 3603                         ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3604                             txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
 3605                         ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
 3606 
 3607                         lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3608                             "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
 3609                         lropoidlist = SYSCTL_CHILDREN(lropoid);
 3610 
 3611                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
 3612                             CTLFLAG_RD, &qs->rspq.size,
 3613                             0, "#entries in response queue");
 3614                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
 3615                             CTLFLAG_RD, &qs->rspq.cidx,
 3616                             0, "consumer index");
 3617                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
 3618                             CTLFLAG_RD, &qs->rspq.credits,
 3619                             0, "#credits");
 3620                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
 3621                             CTLFLAG_RD, &qs->rspq.starved,
 3622                             0, "#times starved");
 3623                         SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
 3624                             CTLFLAG_RD, &qs->rspq.phys_addr,
 3625                             "physical_address_of the queue");
 3626                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
 3627                             CTLFLAG_RW, &qs->rspq.rspq_dump_start,
 3628                             0, "start rspq dump entry");
 3629                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
 3630                             CTLFLAG_RW, &qs->rspq.rspq_dump_count,
 3631                             0, "#rspq entries to dump");
 3632                         SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
 3633                             CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
 3634                             0, t3_dump_rspq, "A", "dump of the response queue");
 3635 
 3636                         SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
 3637                             CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
 3638                             "#tunneled packets dropped");
 3639                         SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
 3640                             CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
 3641                             0, "#tunneled packets waiting to be sent");
 3642 #if 0                   
 3643                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
 3644                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
 3645                             0, "#tunneled packets queue producer index");
 3646                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
 3647                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
 3648                             0, "#tunneled packets queue consumer index");
 3649 #endif                  
 3650                         SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
 3651                             CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
 3652                             0, "#tunneled packets processed by the card");
 3653                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
 3654                             CTLFLAG_RD, &txq->cleaned,
 3655                             0, "#tunneled packets cleaned");
 3656                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
 3657                             CTLFLAG_RD, &txq->in_use,
 3658                             0, "#tunneled packet slots in use");
 3659                         SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
 3660                             CTLFLAG_RD, &txq->txq_frees,
 3661                             "#tunneled packets freed");
 3662                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
 3663                             CTLFLAG_RD, &txq->txq_skipped,
 3664                             0, "#tunneled packet descriptors skipped");
 3665                         SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
 3666                             CTLFLAG_RD, &txq->txq_coalesced,
 3667                             "#tunneled packets coalesced");
 3668                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
 3669                             CTLFLAG_RD, &txq->txq_enqueued,
 3670                             0, "#tunneled packets enqueued to hardware");
 3671                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
 3672                             CTLFLAG_RD, &qs->txq_stopped,
 3673                             0, "tx queues stopped");
 3674                         SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
 3675                             CTLFLAG_RD, &txq->phys_addr,
 3676                             "physical_address_of the queue");
 3677                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
 3678                             CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
 3679                             0, "txq generation");
 3680                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
 3681                             CTLFLAG_RD, &txq->cidx,
 3682                             0, "hardware queue cidx");                  
 3683                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
 3684                             CTLFLAG_RD, &txq->pidx,
 3685                             0, "hardware queue pidx");
 3686                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
 3687                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
 3688                             0, "txq start idx for dump");
 3689                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
 3690                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
 3691                             0, "txq #entries to dump");                 
 3692                         SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
 3693                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
 3694                             0, t3_dump_txq_eth, "A", "dump of the transmit queue");
 3695 
 3696                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
 3697                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
 3698                             0, "ctrlq start idx for dump");
 3699                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
 3700                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
 3701                             0, "ctrl #entries to dump");                        
 3702                         SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
 3703                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
 3704                             0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
 3705 
 3706                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
 3707                             CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
 3708                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
 3709                             CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
 3710                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
 3711                             CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
 3712                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
 3713                             CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
 3714                 }
 3715 
 3716                 /* Now add a node for mac stats. */
 3717                 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
 3718                     CTLFLAG_RD, NULL, "MAC statistics");
 3719                 poidlist = SYSCTL_CHILDREN(poid);
 3720 
 3721                 /*
 3722                  * We (ab)use the length argument (arg2) to pass on the offset
 3723                  * of the data that we are interested in.  This is only required
 3724                  * for the quad counters that are updated from the hardware (we
 3725                  * make sure that we return the latest value).
 3726                  * sysctl_handle_macstat first updates *all* the counters from
 3727                  * the hardware, and then returns the latest value of the
 3728                  * requested counter.  Best would be to update only the
 3729                  * requested counter from hardware, but t3_mac_update_stats()
 3730                  * hides all the register details and we don't want to dive into
 3731                  * all that here.
 3732                  */
 3733 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
 3734     (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
 3735     sysctl_handle_macstat, "QU", 0)
 3736                 CXGB_SYSCTL_ADD_QUAD(tx_octets);
 3737                 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
 3738                 CXGB_SYSCTL_ADD_QUAD(tx_frames);
 3739                 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
 3740                 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
 3741                 CXGB_SYSCTL_ADD_QUAD(tx_pause);
 3742                 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
 3743                 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
 3744                 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
 3745                 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
 3746                 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
 3747                 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
 3748                 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
 3749                 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
 3750                 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
 3751                 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
 3752                 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
 3753                 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
 3754                 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
 3755                 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
 3756                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
 3757                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
 3758                 CXGB_SYSCTL_ADD_QUAD(rx_octets);
 3759                 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
 3760                 CXGB_SYSCTL_ADD_QUAD(rx_frames);
 3761                 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
 3762                 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
 3763                 CXGB_SYSCTL_ADD_QUAD(rx_pause);
 3764                 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
 3765                 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
 3766                 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
 3767                 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
 3768                 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
 3769                 CXGB_SYSCTL_ADD_QUAD(rx_runt);
 3770                 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
 3771                 CXGB_SYSCTL_ADD_QUAD(rx_short);
 3772                 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
 3773                 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
 3774                 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
 3775                 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
 3776                 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
 3777                 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
 3778                 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
 3779                 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
 3780                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
 3781                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
 3782 #undef CXGB_SYSCTL_ADD_QUAD
 3783 
 3784 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
 3785     CTLFLAG_RD, &mstats->a, 0)
 3786                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
 3787                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
 3788                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
 3789                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
 3790                 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
 3791                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
 3792                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
 3793                 CXGB_SYSCTL_ADD_ULONG(num_toggled);
 3794                 CXGB_SYSCTL_ADD_ULONG(num_resets);
 3795                 CXGB_SYSCTL_ADD_ULONG(link_faults);
 3796 #undef CXGB_SYSCTL_ADD_ULONG
 3797         }
 3798 }
 3799         
 3800 /**
 3801  *      t3_get_desc - dump an SGE descriptor for debugging purposes
 3802  *      @qs: the queue set
 3803  *      @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
 3804  *      @idx: the descriptor index in the queue
 3805  *      @data: where to dump the descriptor contents
 3806  *
 3807  *      Dumps the contents of a HW descriptor of an SGE queue.  Returns the
 3808  *      size of the descriptor.
 3809  */
 3810 int
 3811 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
 3812                 unsigned char *data)
 3813 {
 3814         if (qnum >= 6)
 3815                 return (EINVAL);
 3816 
 3817         if (qnum < 3) {
 3818                 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
 3819                         return -EINVAL;
 3820                 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
 3821                 return sizeof(struct tx_desc);
 3822         }
 3823 
 3824         if (qnum == 3) {
 3825                 if (!qs->rspq.desc || idx >= qs->rspq.size)
 3826                         return (EINVAL);
 3827                 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
 3828                 return sizeof(struct rsp_desc);
 3829         }
 3830 
 3831         qnum -= 4;
 3832         if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
 3833                 return (EINVAL);
 3834         memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
 3835         return sizeof(struct rx_desc);
 3836 }

Cache object: b1a657a7b1ba77854120f3ce6d507051


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.