The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/cxgb/cxgb_sge.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /**************************************************************************
    2 
    3 Copyright (c) 2007-2009, Chelsio Inc.
    4 All rights reserved.
    5 
    6 Redistribution and use in source and binary forms, with or without
    7 modification, are permitted provided that the following conditions are met:
    8 
    9  1. Redistributions of source code must retain the above copyright notice,
   10     this list of conditions and the following disclaimer.
   11 
   12  2. Neither the name of the Chelsio Corporation nor the names of its
   13     contributors may be used to endorse or promote products derived from
   14     this software without specific prior written permission.
   15  
   16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   26 POSSIBILITY OF SUCH DAMAGE.
   27 
   28 ***************************************************************************/
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD: stable/9/sys/dev/cxgb/cxgb_sge.c 305556 2016-09-07 19:17:03Z dim $");
   32 
   33 #include "opt_inet6.h"
   34 #include "opt_inet.h"
   35 
   36 #include <sys/param.h>
   37 #include <sys/systm.h>
   38 #include <sys/kernel.h>
   39 #include <sys/module.h>
   40 #include <sys/bus.h>
   41 #include <sys/conf.h>
   42 #include <machine/bus.h>
   43 #include <machine/resource.h>
   44 #include <sys/bus_dma.h>
   45 #include <sys/rman.h>
   46 #include <sys/queue.h>
   47 #include <sys/sysctl.h>
   48 #include <sys/taskqueue.h>
   49 
   50 #include <sys/proc.h>
   51 #include <sys/sbuf.h>
   52 #include <sys/sched.h>
   53 #include <sys/smp.h>
   54 #include <sys/systm.h>
   55 #include <sys/syslog.h>
   56 #include <sys/socket.h>
   57 #include <sys/sglist.h>
   58 
   59 #include <net/bpf.h>    
   60 #include <net/ethernet.h>
   61 #include <net/if.h>
   62 #include <net/if_vlan_var.h>
   63 
   64 #include <netinet/in_systm.h>
   65 #include <netinet/in.h>
   66 #include <netinet/ip.h>
   67 #include <netinet/ip6.h>
   68 #include <netinet/tcp.h>
   69 
   70 #include <dev/pci/pcireg.h>
   71 #include <dev/pci/pcivar.h>
   72 
   73 #include <vm/vm.h>
   74 #include <vm/pmap.h>
   75 
   76 #include <cxgb_include.h>
   77 #include <sys/mvec.h>
   78 
   79 int     txq_fills = 0;
   80 int     multiq_tx_enable = 1;
   81 
   82 #ifdef TCP_OFFLOAD
   83 CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS);
   84 #endif
   85 
   86 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
   87 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
   88 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
   89 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
   90     "size of per-queue mbuf ring");
   91 
   92 static int cxgb_tx_coalesce_force = 0;
   93 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force);
   94 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW,
   95     &cxgb_tx_coalesce_force, 0,
   96     "coalesce small packets into a single work request regardless of ring state");
   97 
   98 #define COALESCE_START_DEFAULT          TX_ETH_Q_SIZE>>1
   99 #define COALESCE_START_MAX              (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
  100 #define COALESCE_STOP_DEFAULT           TX_ETH_Q_SIZE>>2
  101 #define COALESCE_STOP_MIN               TX_ETH_Q_SIZE>>5
  102 #define TX_RECLAIM_DEFAULT              TX_ETH_Q_SIZE>>5
  103 #define TX_RECLAIM_MAX                  TX_ETH_Q_SIZE>>2
  104 #define TX_RECLAIM_MIN                  TX_ETH_Q_SIZE>>6
  105 
  106 
  107 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
  108 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start",
  109     &cxgb_tx_coalesce_enable_start);
  110 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW,
  111     &cxgb_tx_coalesce_enable_start, 0,
  112     "coalesce enable threshold");
  113 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
  114 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop);
  115 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW,
  116     &cxgb_tx_coalesce_enable_stop, 0,
  117     "coalesce disable threshold");
  118 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  119 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold);
  120 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW,
  121     &cxgb_tx_reclaim_threshold, 0,
  122     "tx cleaning minimum threshold");
  123 
  124 /*
  125  * XXX don't re-enable this until TOE stops assuming
  126  * we have an m_ext
  127  */
  128 static int recycle_enable = 0;
  129 
  130 extern int cxgb_use_16k_clusters;
  131 extern int nmbjumbop;
  132 extern int nmbjumbo9;
  133 extern int nmbjumbo16;
  134 
  135 #define USE_GTS 0
  136 
  137 #define SGE_RX_SM_BUF_SIZE      1536
  138 #define SGE_RX_DROP_THRES       16
  139 #define SGE_RX_COPY_THRES       128
  140 
  141 /*
  142  * Period of the Tx buffer reclaim timer.  This timer does not need to run
  143  * frequently as Tx buffers are usually reclaimed by new Tx packets.
  144  */
  145 #define TX_RECLAIM_PERIOD       (hz >> 1)
  146 
  147 /* 
  148  * Values for sge_txq.flags
  149  */
  150 enum {
  151         TXQ_RUNNING     = 1 << 0,  /* fetch engine is running */
  152         TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
  153 };
  154 
  155 struct tx_desc {
  156         uint64_t        flit[TX_DESC_FLITS];
  157 } __packed;
  158 
  159 struct rx_desc {
  160         uint32_t        addr_lo;
  161         uint32_t        len_gen;
  162         uint32_t        gen2;
  163         uint32_t        addr_hi;
  164 } __packed;
  165 
  166 struct rsp_desc {               /* response queue descriptor */
  167         struct rss_header       rss_hdr;
  168         uint32_t                flags;
  169         uint32_t                len_cq;
  170         uint8_t                 imm_data[47];
  171         uint8_t                 intr_gen;
  172 } __packed;
  173 
  174 #define RX_SW_DESC_MAP_CREATED  (1 << 0)
  175 #define TX_SW_DESC_MAP_CREATED  (1 << 1)
  176 #define RX_SW_DESC_INUSE        (1 << 3)
  177 #define TX_SW_DESC_MAPPED       (1 << 4)
  178 
  179 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
  180 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
  181 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
  182 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
  183 
  184 struct tx_sw_desc {                /* SW state per Tx descriptor */
  185         struct mbuf     *m;
  186         bus_dmamap_t    map;
  187         int             flags;
  188 };
  189 
  190 struct rx_sw_desc {                /* SW state per Rx descriptor */
  191         caddr_t         rxsd_cl;
  192         struct mbuf     *m;
  193         bus_dmamap_t    map;
  194         int             flags;
  195 };
  196 
  197 struct txq_state {
  198         unsigned int    compl;
  199         unsigned int    gen;
  200         unsigned int    pidx;
  201 };
  202 
  203 struct refill_fl_cb_arg {
  204         int               error;
  205         bus_dma_segment_t seg;
  206         int               nseg;
  207 };
  208 
  209 
  210 /*
  211  * Maps a number of flits to the number of Tx descriptors that can hold them.
  212  * The formula is
  213  *
  214  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
  215  *
  216  * HW allows up to 4 descriptors to be combined into a WR.
  217  */
  218 static uint8_t flit_desc_map[] = {
  219         0,
  220 #if SGE_NUM_GENBITS == 1
  221         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  222         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  223         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  224         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
  225 #elif SGE_NUM_GENBITS == 2
  226         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  227         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  228         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  229         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  230 #else
  231 # error "SGE_NUM_GENBITS must be 1 or 2"
  232 #endif
  233 };
  234 
  235 #define TXQ_LOCK_ASSERT(qs)     mtx_assert(&(qs)->lock, MA_OWNED)
  236 #define TXQ_TRYLOCK(qs)         mtx_trylock(&(qs)->lock)        
  237 #define TXQ_LOCK(qs)            mtx_lock(&(qs)->lock)   
  238 #define TXQ_UNLOCK(qs)          mtx_unlock(&(qs)->lock) 
  239 #define TXQ_RING_EMPTY(qs)      drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  240 #define TXQ_RING_NEEDS_ENQUEUE(qs)                                      \
  241         drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  242 #define TXQ_RING_FLUSH(qs)      drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  243 #define TXQ_RING_DEQUEUE_COND(qs, func, arg)                            \
  244         drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
  245 #define TXQ_RING_DEQUEUE(qs) \
  246         drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  247 
  248 int cxgb_debug = 0;
  249 
  250 static void sge_timer_cb(void *arg);
  251 static void sge_timer_reclaim(void *arg, int ncount);
  252 static void sge_txq_reclaim_handler(void *arg, int ncount);
  253 static void cxgb_start_locked(struct sge_qset *qs);
  254 
  255 /*
  256  * XXX need to cope with bursty scheduling by looking at a wider
  257  * window than we are now for determining the need for coalescing
  258  *
  259  */
  260 static __inline uint64_t
  261 check_pkt_coalesce(struct sge_qset *qs) 
  262 { 
  263         struct adapter *sc; 
  264         struct sge_txq *txq; 
  265         uint8_t *fill;
  266 
  267         if (__predict_false(cxgb_tx_coalesce_force))
  268                 return (1);
  269         txq = &qs->txq[TXQ_ETH]; 
  270         sc = qs->port->adapter; 
  271         fill = &sc->tunq_fill[qs->idx];
  272 
  273         if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
  274                 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
  275         if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
  276                 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
  277         /*
  278          * if the hardware transmit queue is more than 1/8 full
  279          * we mark it as coalescing - we drop back from coalescing
  280          * when we go below 1/32 full and there are no packets enqueued, 
  281          * this provides us with some degree of hysteresis
  282          */
  283         if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
  284             TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
  285                 *fill = 0; 
  286         else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
  287                 *fill = 1; 
  288 
  289         return (sc->tunq_coalesce);
  290 } 
  291 
  292 #ifdef __LP64__
  293 static void
  294 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  295 {
  296         uint64_t wr_hilo;
  297 #if _BYTE_ORDER == _LITTLE_ENDIAN
  298         wr_hilo = wr_hi;
  299         wr_hilo |= (((uint64_t)wr_lo)<<32);
  300 #else
  301         wr_hilo = wr_lo;
  302         wr_hilo |= (((uint64_t)wr_hi)<<32);
  303 #endif  
  304         wrp->wrh_hilo = wr_hilo;
  305 }
  306 #else
  307 static void
  308 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  309 {
  310 
  311         wrp->wrh_hi = wr_hi;
  312         wmb();
  313         wrp->wrh_lo = wr_lo;
  314 }
  315 #endif
  316 
  317 struct coalesce_info {
  318         int count;
  319         int nbytes;
  320 };
  321 
  322 static int
  323 coalesce_check(struct mbuf *m, void *arg)
  324 {
  325         struct coalesce_info *ci = arg;
  326         int *count = &ci->count;
  327         int *nbytes = &ci->nbytes;
  328 
  329         if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
  330                 (*count < 7) && (m->m_next == NULL))) {
  331                 *count += 1;
  332                 *nbytes += m->m_len;
  333                 return (1);
  334         }
  335         return (0);
  336 }
  337 
  338 static struct mbuf *
  339 cxgb_dequeue(struct sge_qset *qs)
  340 {
  341         struct mbuf *m, *m_head, *m_tail;
  342         struct coalesce_info ci;
  343 
  344         
  345         if (check_pkt_coalesce(qs) == 0) 
  346                 return TXQ_RING_DEQUEUE(qs);
  347 
  348         m_head = m_tail = NULL;
  349         ci.count = ci.nbytes = 0;
  350         do {
  351                 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
  352                 if (m_head == NULL) {
  353                         m_tail = m_head = m;
  354                 } else if (m != NULL) {
  355                         m_tail->m_nextpkt = m;
  356                         m_tail = m;
  357                 }
  358         } while (m != NULL);
  359         if (ci.count > 7)
  360                 panic("trying to coalesce %d packets in to one WR", ci.count);
  361         return (m_head);
  362 }
  363         
  364 /**
  365  *      reclaim_completed_tx - reclaims completed Tx descriptors
  366  *      @adapter: the adapter
  367  *      @q: the Tx queue to reclaim completed descriptors from
  368  *
  369  *      Reclaims Tx descriptors that the SGE has indicated it has processed,
  370  *      and frees the associated buffers if possible.  Called with the Tx
  371  *      queue's lock held.
  372  */
  373 static __inline int
  374 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
  375 {
  376         struct sge_txq *q = &qs->txq[queue];
  377         int reclaim = desc_reclaimable(q);
  378 
  379         if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
  380             (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
  381                 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  382 
  383         if (reclaim < reclaim_min)
  384                 return (0);
  385 
  386         mtx_assert(&qs->lock, MA_OWNED);
  387         if (reclaim > 0) {
  388                 t3_free_tx_desc(qs, reclaim, queue);
  389                 q->cleaned += reclaim;
  390                 q->in_use -= reclaim;
  391         }
  392         if (isset(&qs->txq_stopped, TXQ_ETH))
  393                 clrbit(&qs->txq_stopped, TXQ_ETH);
  394 
  395         return (reclaim);
  396 }
  397 
  398 /**
  399  *      should_restart_tx - are there enough resources to restart a Tx queue?
  400  *      @q: the Tx queue
  401  *
  402  *      Checks if there are enough descriptors to restart a suspended Tx queue.
  403  */
  404 static __inline int
  405 should_restart_tx(const struct sge_txq *q)
  406 {
  407         unsigned int r = q->processed - q->cleaned;
  408 
  409         return q->in_use - r < (q->size >> 1);
  410 }
  411 
  412 /**
  413  *      t3_sge_init - initialize SGE
  414  *      @adap: the adapter
  415  *      @p: the SGE parameters
  416  *
  417  *      Performs SGE initialization needed every time after a chip reset.
  418  *      We do not initialize any of the queue sets here, instead the driver
  419  *      top-level must request those individually.  We also do not enable DMA
  420  *      here, that should be done after the queues have been set up.
  421  */
  422 void
  423 t3_sge_init(adapter_t *adap, struct sge_params *p)
  424 {
  425         u_int ctrl, ups;
  426 
  427         ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
  428 
  429         ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
  430                F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
  431                V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
  432                V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
  433 #if SGE_NUM_GENBITS == 1
  434         ctrl |= F_EGRGENCTRL;
  435 #endif
  436         if (adap->params.rev > 0) {
  437                 if (!(adap->flags & (USING_MSIX | USING_MSI)))
  438                         ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
  439         }
  440         t3_write_reg(adap, A_SG_CONTROL, ctrl);
  441         t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
  442                      V_LORCQDRBTHRSH(512));
  443         t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
  444         t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
  445                      V_TIMEOUT(200 * core_ticks_per_usec(adap)));
  446         t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
  447                      adap->params.rev < T3_REV_C ? 1000 : 500);
  448         t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
  449         t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
  450         t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
  451         t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
  452         t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
  453 }
  454 
  455 
  456 /**
  457  *      sgl_len - calculates the size of an SGL of the given capacity
  458  *      @n: the number of SGL entries
  459  *
  460  *      Calculates the number of flits needed for a scatter/gather list that
  461  *      can hold the given number of entries.
  462  */
  463 static __inline unsigned int
  464 sgl_len(unsigned int n)
  465 {
  466         return ((3 * n) / 2 + (n & 1));
  467 }
  468 
  469 /**
  470  *      get_imm_packet - return the next ingress packet buffer from a response
  471  *      @resp: the response descriptor containing the packet data
  472  *
  473  *      Return a packet containing the immediate data of the given response.
  474  */
  475 static int
  476 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
  477 {
  478 
  479         if (resp->rss_hdr.opcode == CPL_RX_DATA) {
  480                 const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0];
  481                 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
  482         } else if (resp->rss_hdr.opcode == CPL_RX_PKT) {
  483                 const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0];
  484                 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
  485         } else
  486                 m->m_len = IMMED_PKT_SIZE;
  487         m->m_ext.ext_buf = NULL;
  488         m->m_ext.ext_type = 0;
  489         memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len); 
  490         return (0);     
  491 }
  492 
  493 static __inline u_int
  494 flits_to_desc(u_int n)
  495 {
  496         return (flit_desc_map[n]);
  497 }
  498 
  499 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
  500                     F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
  501                     V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
  502                     F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
  503                     F_HIRCQPARITYERROR)
  504 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
  505 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
  506                       F_RSPQDISABLED)
  507 
  508 /**
  509  *      t3_sge_err_intr_handler - SGE async event interrupt handler
  510  *      @adapter: the adapter
  511  *
  512  *      Interrupt handler for SGE asynchronous (non-data) events.
  513  */
  514 void
  515 t3_sge_err_intr_handler(adapter_t *adapter)
  516 {
  517         unsigned int v, status;
  518 
  519         status = t3_read_reg(adapter, A_SG_INT_CAUSE);
  520         if (status & SGE_PARERR)
  521                 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
  522                          status & SGE_PARERR);
  523         if (status & SGE_FRAMINGERR)
  524                 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
  525                          status & SGE_FRAMINGERR);
  526         if (status & F_RSPQCREDITOVERFOW)
  527                 CH_ALERT(adapter, "SGE response queue credit overflow\n");
  528 
  529         if (status & F_RSPQDISABLED) {
  530                 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
  531 
  532                 CH_ALERT(adapter,
  533                          "packet delivered to disabled response queue (0x%x)\n",
  534                          (v >> S_RSPQ0DISABLED) & 0xff);
  535         }
  536 
  537         t3_write_reg(adapter, A_SG_INT_CAUSE, status);
  538         if (status & SGE_FATALERR)
  539                 t3_fatal_err(adapter);
  540 }
  541 
  542 void
  543 t3_sge_prep(adapter_t *adap, struct sge_params *p)
  544 {
  545         int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
  546 
  547         nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
  548         nqsets *= adap->params.nports;
  549 
  550         fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
  551 
  552         while (!powerof2(fl_q_size))
  553                 fl_q_size--;
  554 
  555         use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
  556             is_offload(adap);
  557 
  558 #if __FreeBSD_version >= 700111
  559         if (use_16k) {
  560                 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
  561                 jumbo_buf_size = MJUM16BYTES;
  562         } else {
  563                 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
  564                 jumbo_buf_size = MJUM9BYTES;
  565         }
  566 #else
  567         jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE);
  568         jumbo_buf_size = MJUMPAGESIZE;
  569 #endif
  570         while (!powerof2(jumbo_q_size))
  571                 jumbo_q_size--;
  572 
  573         if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
  574                 device_printf(adap->dev,
  575                     "Insufficient clusters and/or jumbo buffers.\n");
  576 
  577         p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
  578 
  579         for (i = 0; i < SGE_QSETS; ++i) {
  580                 struct qset_params *q = p->qset + i;
  581 
  582                 if (adap->params.nports > 2) {
  583                         q->coalesce_usecs = 50;
  584                 } else {
  585 #ifdef INVARIANTS                       
  586                         q->coalesce_usecs = 10;
  587 #else
  588                         q->coalesce_usecs = 5;
  589 #endif                  
  590                 }
  591                 q->polling = 0;
  592                 q->rspq_size = RSPQ_Q_SIZE;
  593                 q->fl_size = fl_q_size;
  594                 q->jumbo_size = jumbo_q_size;
  595                 q->jumbo_buf_size = jumbo_buf_size;
  596                 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
  597                 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
  598                 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
  599                 q->cong_thres = 0;
  600         }
  601 }
  602 
  603 int
  604 t3_sge_alloc(adapter_t *sc)
  605 {
  606 
  607         /* The parent tag. */
  608         if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */
  609                                 1, 0,                   /* algnmnt, boundary */
  610                                 BUS_SPACE_MAXADDR,      /* lowaddr */
  611                                 BUS_SPACE_MAXADDR,      /* highaddr */
  612                                 NULL, NULL,             /* filter, filterarg */
  613                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
  614                                 BUS_SPACE_UNRESTRICTED, /* nsegments */
  615                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
  616                                 0,                      /* flags */
  617                                 NULL, NULL,             /* lock, lockarg */
  618                                 &sc->parent_dmat)) {
  619                 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
  620                 return (ENOMEM);
  621         }
  622 
  623         /*
  624          * DMA tag for normal sized RX frames
  625          */
  626         if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
  627                 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
  628                 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
  629                 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
  630                 return (ENOMEM);
  631         }
  632 
  633         /* 
  634          * DMA tag for jumbo sized RX frames.
  635          */
  636         if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
  637                 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
  638                 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
  639                 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
  640                 return (ENOMEM);
  641         }
  642 
  643         /* 
  644          * DMA tag for TX frames.
  645          */
  646         if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
  647                 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  648                 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  649                 NULL, NULL, &sc->tx_dmat)) {
  650                 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
  651                 return (ENOMEM);
  652         }
  653 
  654         return (0);
  655 }
  656 
  657 int
  658 t3_sge_free(struct adapter * sc)
  659 {
  660 
  661         if (sc->tx_dmat != NULL)
  662                 bus_dma_tag_destroy(sc->tx_dmat);
  663 
  664         if (sc->rx_jumbo_dmat != NULL)
  665                 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
  666 
  667         if (sc->rx_dmat != NULL)
  668                 bus_dma_tag_destroy(sc->rx_dmat);
  669 
  670         if (sc->parent_dmat != NULL)
  671                 bus_dma_tag_destroy(sc->parent_dmat);
  672 
  673         return (0);
  674 }
  675 
  676 void
  677 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
  678 {
  679 
  680         qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
  681         qs->rspq.polling = 0 /* p->polling */;
  682 }
  683 
  684 #if !defined(__i386__) && !defined(__amd64__)
  685 static void
  686 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
  687 {
  688         struct refill_fl_cb_arg *cb_arg = arg;
  689         
  690         cb_arg->error = error;
  691         cb_arg->seg = segs[0];
  692         cb_arg->nseg = nseg;
  693 
  694 }
  695 #endif
  696 /**
  697  *      refill_fl - refill an SGE free-buffer list
  698  *      @sc: the controller softc
  699  *      @q: the free-list to refill
  700  *      @n: the number of new buffers to allocate
  701  *
  702  *      (Re)populate an SGE free-buffer list with up to @n new packet buffers.
  703  *      The caller must assure that @n does not exceed the queue's capacity.
  704  */
  705 static void
  706 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
  707 {
  708         struct rx_sw_desc *sd = &q->sdesc[q->pidx];
  709         struct rx_desc *d = &q->desc[q->pidx];
  710         struct refill_fl_cb_arg cb_arg;
  711         struct mbuf *m;
  712         caddr_t cl;
  713         int err;
  714         
  715         cb_arg.error = 0;
  716         while (n--) {
  717                 /*
  718                  * We allocate an uninitialized mbuf + cluster, mbuf is
  719                  * initialized after rx.
  720                  */
  721                 if (q->zone == zone_pack) {
  722                         if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
  723                                 break;
  724                         cl = m->m_ext.ext_buf;                  
  725                 } else {
  726                         if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
  727                                 break;
  728                         if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
  729                                 uma_zfree(q->zone, cl);
  730                                 break;
  731                         }
  732                 }
  733                 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
  734                         if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
  735                                 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
  736                                 uma_zfree(q->zone, cl);
  737                                 goto done;
  738                         }
  739                         sd->flags |= RX_SW_DESC_MAP_CREATED;
  740                 }
  741 #if !defined(__i386__) && !defined(__amd64__)
  742                 err = bus_dmamap_load(q->entry_tag, sd->map,
  743                     cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
  744                 
  745                 if (err != 0 || cb_arg.error) {
  746                         if (q->zone == zone_pack)
  747                                 uma_zfree(q->zone, cl);
  748                         m_free(m);
  749                         goto done;
  750                 }
  751 #else
  752                 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
  753 #endif          
  754                 sd->flags |= RX_SW_DESC_INUSE;
  755                 sd->rxsd_cl = cl;
  756                 sd->m = m;
  757                 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
  758                 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
  759                 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
  760                 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
  761 
  762                 d++;
  763                 sd++;
  764 
  765                 if (++q->pidx == q->size) {
  766                         q->pidx = 0;
  767                         q->gen ^= 1;
  768                         sd = q->sdesc;
  769                         d = q->desc;
  770                 }
  771                 q->credits++;
  772                 q->db_pending++;
  773         }
  774 
  775 done:
  776         if (q->db_pending >= 32) {
  777                 q->db_pending = 0;
  778                 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  779         }
  780 }
  781 
  782 
  783 /**
  784  *      free_rx_bufs - free the Rx buffers on an SGE free list
  785  *      @sc: the controle softc
  786  *      @q: the SGE free list to clean up
  787  *
  788  *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
  789  *      this queue should be stopped before calling this function.
  790  */
  791 static void
  792 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
  793 {
  794         u_int cidx = q->cidx;
  795 
  796         while (q->credits--) {
  797                 struct rx_sw_desc *d = &q->sdesc[cidx];
  798 
  799                 if (d->flags & RX_SW_DESC_INUSE) {
  800                         bus_dmamap_unload(q->entry_tag, d->map);
  801                         bus_dmamap_destroy(q->entry_tag, d->map);
  802                         if (q->zone == zone_pack) {
  803                                 m_init(d->m, zone_pack, MCLBYTES,
  804                                     M_NOWAIT, MT_DATA, M_EXT);
  805                                 uma_zfree(zone_pack, d->m);
  806                         } else {
  807                                 m_init(d->m, zone_mbuf, MLEN,
  808                                     M_NOWAIT, MT_DATA, 0);
  809                                 uma_zfree(zone_mbuf, d->m);
  810                                 uma_zfree(q->zone, d->rxsd_cl);
  811                         }                       
  812                 }
  813                 
  814                 d->rxsd_cl = NULL;
  815                 d->m = NULL;
  816                 if (++cidx == q->size)
  817                         cidx = 0;
  818         }
  819 }
  820 
  821 static __inline void
  822 __refill_fl(adapter_t *adap, struct sge_fl *fl)
  823 {
  824         refill_fl(adap, fl, min(16U, fl->size - fl->credits));
  825 }
  826 
  827 static __inline void
  828 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
  829 {
  830         uint32_t reclaimable = fl->size - fl->credits;
  831 
  832         if (reclaimable > 0)
  833                 refill_fl(adap, fl, min(max, reclaimable));
  834 }
  835 
  836 /**
  837  *      recycle_rx_buf - recycle a receive buffer
  838  *      @adapter: the adapter
  839  *      @q: the SGE free list
  840  *      @idx: index of buffer to recycle
  841  *
  842  *      Recycles the specified buffer on the given free list by adding it at
  843  *      the next available slot on the list.
  844  */
  845 static void
  846 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
  847 {
  848         struct rx_desc *from = &q->desc[idx];
  849         struct rx_desc *to   = &q->desc[q->pidx];
  850 
  851         q->sdesc[q->pidx] = q->sdesc[idx];
  852         to->addr_lo = from->addr_lo;        // already big endian
  853         to->addr_hi = from->addr_hi;        // likewise
  854         wmb();  /* necessary ? */
  855         to->len_gen = htobe32(V_FLD_GEN1(q->gen));
  856         to->gen2 = htobe32(V_FLD_GEN2(q->gen));
  857         q->credits++;
  858 
  859         if (++q->pidx == q->size) {
  860                 q->pidx = 0;
  861                 q->gen ^= 1;
  862         }
  863         t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  864 }
  865 
  866 static void
  867 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
  868 {
  869         uint32_t *addr;
  870 
  871         addr = arg;
  872         *addr = segs[0].ds_addr;
  873 }
  874 
  875 static int
  876 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
  877     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
  878     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
  879 {
  880         size_t len = nelem * elem_size;
  881         void *s = NULL;
  882         void *p = NULL;
  883         int err;
  884 
  885         if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
  886                                       BUS_SPACE_MAXADDR_32BIT,
  887                                       BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
  888                                       len, 0, NULL, NULL, tag)) != 0) {
  889                 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
  890                 return (ENOMEM);
  891         }
  892 
  893         if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
  894                                     map)) != 0) {
  895                 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
  896                 return (ENOMEM);
  897         }
  898 
  899         bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
  900         bzero(p, len);
  901         *(void **)desc = p;
  902 
  903         if (sw_size) {
  904                 len = nelem * sw_size;
  905                 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
  906                 *(void **)sdesc = s;
  907         }
  908         if (parent_entry_tag == NULL)
  909                 return (0);
  910             
  911         if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
  912                                       BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
  913                                       NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  914                                       TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  915                                       NULL, NULL, entry_tag)) != 0) {
  916                 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
  917                 return (ENOMEM);
  918         }
  919         return (0);
  920 }
  921 
  922 static void
  923 sge_slow_intr_handler(void *arg, int ncount)
  924 {
  925         adapter_t *sc = arg;
  926 
  927         t3_slow_intr_handler(sc);
  928         t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
  929         (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
  930 }
  931 
  932 /**
  933  *      sge_timer_cb - perform periodic maintenance of an SGE qset
  934  *      @data: the SGE queue set to maintain
  935  *
  936  *      Runs periodically from a timer to perform maintenance of an SGE queue
  937  *      set.  It performs two tasks:
  938  *
  939  *      a) Cleans up any completed Tx descriptors that may still be pending.
  940  *      Normal descriptor cleanup happens when new packets are added to a Tx
  941  *      queue so this timer is relatively infrequent and does any cleanup only
  942  *      if the Tx queue has not seen any new packets in a while.  We make a
  943  *      best effort attempt to reclaim descriptors, in that we don't wait
  944  *      around if we cannot get a queue's lock (which most likely is because
  945  *      someone else is queueing new packets and so will also handle the clean
  946  *      up).  Since control queues use immediate data exclusively we don't
  947  *      bother cleaning them up here.
  948  *
  949  *      b) Replenishes Rx queues that have run out due to memory shortage.
  950  *      Normally new Rx buffers are added when existing ones are consumed but
  951  *      when out of memory a queue can become empty.  We try to add only a few
  952  *      buffers here, the queue will be replenished fully as these new buffers
  953  *      are used up if memory shortage has subsided.
  954  *      
  955  *      c) Return coalesced response queue credits in case a response queue is
  956  *      starved.
  957  *
  958  *      d) Ring doorbells for T304 tunnel queues since we have seen doorbell 
  959  *      fifo overflows and the FW doesn't implement any recovery scheme yet.
  960  */
  961 static void
  962 sge_timer_cb(void *arg)
  963 {
  964         adapter_t *sc = arg;
  965         if ((sc->flags & USING_MSIX) == 0) {
  966                 
  967                 struct port_info *pi;
  968                 struct sge_qset *qs;
  969                 struct sge_txq  *txq;
  970                 int i, j;
  971                 int reclaim_ofl, refill_rx;
  972 
  973                 if (sc->open_device_map == 0) 
  974                         return;
  975 
  976                 for (i = 0; i < sc->params.nports; i++) {
  977                         pi = &sc->port[i];
  978                         for (j = 0; j < pi->nqsets; j++) {
  979                                 qs = &sc->sge.qs[pi->first_qset + j];
  980                                 txq = &qs->txq[0];
  981                                 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
  982                                 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 
  983                                     (qs->fl[1].credits < qs->fl[1].size));
  984                                 if (reclaim_ofl || refill_rx) {
  985                                         taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
  986                                         break;
  987                                 }
  988                         }
  989                 }
  990         }
  991         
  992         if (sc->params.nports > 2) {
  993                 int i;
  994 
  995                 for_each_port(sc, i) {
  996                         struct port_info *pi = &sc->port[i];
  997 
  998                         t3_write_reg(sc, A_SG_KDOORBELL, 
  999                                      F_SELEGRCNTX | 
 1000                                      (FW_TUNNEL_SGEEC_START + pi->first_qset));
 1001                 }
 1002         }       
 1003         if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
 1004             sc->open_device_map != 0)
 1005                 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1006 }
 1007 
 1008 /*
 1009  * This is meant to be a catch-all function to keep sge state private
 1010  * to sge.c
 1011  *
 1012  */
 1013 int
 1014 t3_sge_init_adapter(adapter_t *sc)
 1015 {
 1016         callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
 1017         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1018         TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
 1019         return (0);
 1020 }
 1021 
 1022 int
 1023 t3_sge_reset_adapter(adapter_t *sc)
 1024 {
 1025         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1026         return (0);
 1027 }
 1028 
 1029 int
 1030 t3_sge_init_port(struct port_info *pi)
 1031 {
 1032         TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
 1033         return (0);
 1034 }
 1035 
 1036 /**
 1037  *      refill_rspq - replenish an SGE response queue
 1038  *      @adapter: the adapter
 1039  *      @q: the response queue to replenish
 1040  *      @credits: how many new responses to make available
 1041  *
 1042  *      Replenishes a response queue by making the supplied number of responses
 1043  *      available to HW.
 1044  */
 1045 static __inline void
 1046 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
 1047 {
 1048 
 1049         /* mbufs are allocated on demand when a rspq entry is processed. */
 1050         t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
 1051                      V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 1052 }
 1053 
 1054 static void
 1055 sge_txq_reclaim_handler(void *arg, int ncount)
 1056 {
 1057         struct sge_qset *qs = arg;
 1058         int i;
 1059 
 1060         for (i = 0; i < 3; i++)
 1061                 reclaim_completed_tx(qs, 16, i);
 1062 }
 1063 
 1064 static void
 1065 sge_timer_reclaim(void *arg, int ncount)
 1066 {
 1067         struct port_info *pi = arg;
 1068         int i, nqsets = pi->nqsets;
 1069         adapter_t *sc = pi->adapter;
 1070         struct sge_qset *qs;
 1071         struct mtx *lock;
 1072         
 1073         KASSERT((sc->flags & USING_MSIX) == 0,
 1074             ("can't call timer reclaim for msi-x"));
 1075 
 1076         for (i = 0; i < nqsets; i++) {
 1077                 qs = &sc->sge.qs[pi->first_qset + i];
 1078 
 1079                 reclaim_completed_tx(qs, 16, TXQ_OFLD);
 1080                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 1081                             &sc->sge.qs[0].rspq.lock;
 1082 
 1083                 if (mtx_trylock(lock)) {
 1084                         /* XXX currently assume that we are *NOT* polling */
 1085                         uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
 1086 
 1087                         if (qs->fl[0].credits < qs->fl[0].size - 16)
 1088                                 __refill_fl(sc, &qs->fl[0]);
 1089                         if (qs->fl[1].credits < qs->fl[1].size - 16)
 1090                                 __refill_fl(sc, &qs->fl[1]);
 1091                         
 1092                         if (status & (1 << qs->rspq.cntxt_id)) {
 1093                                 if (qs->rspq.credits) {
 1094                                         refill_rspq(sc, &qs->rspq, 1);
 1095                                         qs->rspq.credits--;
 1096                                         t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 
 1097                                             1 << qs->rspq.cntxt_id);
 1098                                 }
 1099                         }
 1100                         mtx_unlock(lock);
 1101                 }
 1102         }
 1103 }
 1104 
 1105 /**
 1106  *      init_qset_cntxt - initialize an SGE queue set context info
 1107  *      @qs: the queue set
 1108  *      @id: the queue set id
 1109  *
 1110  *      Initializes the TIDs and context ids for the queues of a queue set.
 1111  */
 1112 static void
 1113 init_qset_cntxt(struct sge_qset *qs, u_int id)
 1114 {
 1115 
 1116         qs->rspq.cntxt_id = id;
 1117         qs->fl[0].cntxt_id = 2 * id;
 1118         qs->fl[1].cntxt_id = 2 * id + 1;
 1119         qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
 1120         qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
 1121         qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 1122         qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 1123         qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
 1124 
 1125         mbufq_init(&qs->txq[TXQ_ETH].sendq);
 1126         mbufq_init(&qs->txq[TXQ_OFLD].sendq);
 1127         mbufq_init(&qs->txq[TXQ_CTRL].sendq);
 1128 }
 1129 
 1130 
 1131 static void
 1132 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
 1133 {
 1134         txq->in_use += ndesc;
 1135         /*
 1136          * XXX we don't handle stopping of queue
 1137          * presumably start handles this when we bump against the end
 1138          */
 1139         txqs->gen = txq->gen;
 1140         txq->unacked += ndesc;
 1141         txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
 1142         txq->unacked &= 31;
 1143         txqs->pidx = txq->pidx;
 1144         txq->pidx += ndesc;
 1145 #ifdef INVARIANTS
 1146         if (((txqs->pidx > txq->cidx) &&
 1147                 (txq->pidx < txqs->pidx) &&
 1148                 (txq->pidx >= txq->cidx)) ||
 1149             ((txqs->pidx < txq->cidx) &&
 1150                 (txq->pidx >= txq-> cidx)) ||
 1151             ((txqs->pidx < txq->cidx) &&
 1152                 (txq->cidx < txqs->pidx)))
 1153                 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
 1154                     txqs->pidx, txq->pidx, txq->cidx);
 1155 #endif
 1156         if (txq->pidx >= txq->size) {
 1157                 txq->pidx -= txq->size;
 1158                 txq->gen ^= 1;
 1159         }
 1160 
 1161 }
 1162 
 1163 /**
 1164  *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 1165  *      @m: the packet mbufs
 1166  *      @nsegs: the number of segments 
 1167  *
 1168  *      Returns the number of Tx descriptors needed for the given Ethernet
 1169  *      packet.  Ethernet packets require addition of WR and CPL headers.
 1170  */
 1171 static __inline unsigned int
 1172 calc_tx_descs(const struct mbuf *m, int nsegs)
 1173 {
 1174         unsigned int flits;
 1175 
 1176         if (m->m_pkthdr.len <= PIO_LEN)
 1177                 return 1;
 1178 
 1179         flits = sgl_len(nsegs) + 2;
 1180         if (m->m_pkthdr.csum_flags & CSUM_TSO)
 1181                 flits++;
 1182 
 1183         return flits_to_desc(flits);
 1184 }
 1185 
 1186 /**
 1187  *      make_sgl - populate a scatter/gather list for a packet
 1188  *      @sgp: the SGL to populate
 1189  *      @segs: the packet dma segments
 1190  *      @nsegs: the number of segments
 1191  *
 1192  *      Generates a scatter/gather list for the buffers that make up a packet
 1193  *      and returns the SGL size in 8-byte words.  The caller must size the SGL
 1194  *      appropriately.
 1195  */
 1196 static __inline void
 1197 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
 1198 {
 1199         int i, idx;
 1200         
 1201         for (idx = 0, i = 0; i < nsegs; i++) {
 1202                 /*
 1203                  * firmware doesn't like empty segments
 1204                  */
 1205                 if (segs[i].ds_len == 0)
 1206                         continue;
 1207                 if (i && idx == 0) 
 1208                         ++sgp;
 1209                 
 1210                 sgp->len[idx] = htobe32(segs[i].ds_len);
 1211                 sgp->addr[idx] = htobe64(segs[i].ds_addr);
 1212                 idx ^= 1;
 1213         }
 1214         
 1215         if (idx) {
 1216                 sgp->len[idx] = 0;
 1217                 sgp->addr[idx] = 0;
 1218         }
 1219 }
 1220         
 1221 /**
 1222  *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
 1223  *      @adap: the adapter
 1224  *      @q: the Tx queue
 1225  *
 1226  *      Ring the doorbell if a Tx queue is asleep.  There is a natural race,
 1227  *      where the HW is going to sleep just after we checked, however,
 1228  *      then the interrupt handler will detect the outstanding TX packet
 1229  *      and ring the doorbell for us.
 1230  *
 1231  *      When GTS is disabled we unconditionally ring the doorbell.
 1232  */
 1233 static __inline void
 1234 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
 1235 {
 1236 #if USE_GTS
 1237         clear_bit(TXQ_LAST_PKT_DB, &q->flags);
 1238         if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
 1239                 set_bit(TXQ_LAST_PKT_DB, &q->flags);
 1240 #ifdef T3_TRACE
 1241                 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
 1242                           q->cntxt_id);
 1243 #endif
 1244                 t3_write_reg(adap, A_SG_KDOORBELL,
 1245                              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1246         }
 1247 #else
 1248         if (mustring || ++q->db_pending >= 32) {
 1249                 wmb();            /* write descriptors before telling HW */
 1250                 t3_write_reg(adap, A_SG_KDOORBELL,
 1251                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1252                 q->db_pending = 0;
 1253         }
 1254 #endif
 1255 }
 1256 
 1257 static __inline void
 1258 wr_gen2(struct tx_desc *d, unsigned int gen)
 1259 {
 1260 #if SGE_NUM_GENBITS == 2
 1261         d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
 1262 #endif
 1263 }
 1264 
 1265 /**
 1266  *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
 1267  *      @ndesc: number of Tx descriptors spanned by the SGL
 1268  *      @txd: first Tx descriptor to be written
 1269  *      @txqs: txq state (generation and producer index)
 1270  *      @txq: the SGE Tx queue
 1271  *      @sgl: the SGL
 1272  *      @flits: number of flits to the start of the SGL in the first descriptor
 1273  *      @sgl_flits: the SGL size in flits
 1274  *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
 1275  *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
 1276  *
 1277  *      Write a work request header and an associated SGL.  If the SGL is
 1278  *      small enough to fit into one Tx descriptor it has already been written
 1279  *      and we just need to write the WR header.  Otherwise we distribute the
 1280  *      SGL across the number of descriptors it spans.
 1281  */
 1282 static void
 1283 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
 1284     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
 1285     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
 1286 {
 1287 
 1288         struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
 1289         struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
 1290         
 1291         if (__predict_true(ndesc == 1)) {
 1292                 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1293                     V_WR_SGLSFLT(flits)) | wr_hi,
 1294                     htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) |
 1295                     wr_lo);
 1296 
 1297                 wr_gen2(txd, txqs->gen);
 1298                 
 1299         } else {
 1300                 unsigned int ogen = txqs->gen;
 1301                 const uint64_t *fp = (const uint64_t *)sgl;
 1302                 struct work_request_hdr *wp = wrp;
 1303                 
 1304                 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
 1305                     V_WR_SGLSFLT(flits)) | wr_hi;
 1306                 
 1307                 while (sgl_flits) {
 1308                         unsigned int avail = WR_FLITS - flits;
 1309 
 1310                         if (avail > sgl_flits)
 1311                                 avail = sgl_flits;
 1312                         memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
 1313                         sgl_flits -= avail;
 1314                         ndesc--;
 1315                         if (!sgl_flits)
 1316                                 break;
 1317                         
 1318                         fp += avail;
 1319                         txd++;
 1320                         txsd++;
 1321                         if (++txqs->pidx == txq->size) {
 1322                                 txqs->pidx = 0;
 1323                                 txqs->gen ^= 1;
 1324                                 txd = txq->desc;
 1325                                 txsd = txq->sdesc;
 1326                         }
 1327 
 1328                         /*
 1329                          * when the head of the mbuf chain
 1330                          * is freed all clusters will be freed
 1331                          * with it
 1332                          */
 1333                         wrp = (struct work_request_hdr *)txd;
 1334                         wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
 1335                             V_WR_SGLSFLT(1)) | wr_hi;
 1336                         wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
 1337                                     sgl_flits + 1)) |
 1338                             V_WR_GEN(txqs->gen)) | wr_lo;
 1339                         wr_gen2(txd, txqs->gen);
 1340                         flits = 1;
 1341                 }
 1342                 wrp->wrh_hi |= htonl(F_WR_EOP);
 1343                 wmb();
 1344                 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
 1345                 wr_gen2((struct tx_desc *)wp, ogen);
 1346         }
 1347 }
 1348 
 1349 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
 1350 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
 1351 
 1352 #define GET_VTAG(cntrl, m) \
 1353 do { \
 1354         if ((m)->m_flags & M_VLANTAG)                                               \
 1355                 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
 1356 } while (0)
 1357 
 1358 static int
 1359 t3_encap(struct sge_qset *qs, struct mbuf **m)
 1360 {
 1361         adapter_t *sc;
 1362         struct mbuf *m0;
 1363         struct sge_txq *txq;
 1364         struct txq_state txqs;
 1365         struct port_info *pi;
 1366         unsigned int ndesc, flits, cntrl, mlen;
 1367         int err, nsegs, tso_info = 0;
 1368 
 1369         struct work_request_hdr *wrp;
 1370         struct tx_sw_desc *txsd;
 1371         struct sg_ent *sgp, *sgl;
 1372         uint32_t wr_hi, wr_lo, sgl_flits; 
 1373         bus_dma_segment_t segs[TX_MAX_SEGS];
 1374 
 1375         struct tx_desc *txd;
 1376                 
 1377         pi = qs->port;
 1378         sc = pi->adapter;
 1379         txq = &qs->txq[TXQ_ETH];
 1380         txd = &txq->desc[txq->pidx];
 1381         txsd = &txq->sdesc[txq->pidx];
 1382         sgl = txq->txq_sgl;
 1383 
 1384         prefetch(txd);
 1385         m0 = *m;
 1386 
 1387         mtx_assert(&qs->lock, MA_OWNED);
 1388         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1389         KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
 1390         
 1391         if  (m0->m_nextpkt == NULL && m0->m_next != NULL &&
 1392             m0->m_pkthdr.csum_flags & (CSUM_TSO))
 1393                 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
 1394 
 1395         if (m0->m_nextpkt != NULL) {
 1396                 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
 1397                 ndesc = 1;
 1398                 mlen = 0;
 1399         } else {
 1400                 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
 1401                     &m0, segs, &nsegs))) {
 1402                         if (cxgb_debug)
 1403                                 printf("failed ... err=%d\n", err);
 1404                         return (err);
 1405                 }
 1406                 mlen = m0->m_pkthdr.len;
 1407                 ndesc = calc_tx_descs(m0, nsegs);
 1408         }
 1409         txq_prod(txq, ndesc, &txqs);
 1410 
 1411         KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
 1412         txsd->m = m0;
 1413 
 1414         if (m0->m_nextpkt != NULL) {
 1415                 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
 1416                 int i, fidx;
 1417 
 1418                 if (nsegs > 7)
 1419                         panic("trying to coalesce %d packets in to one WR", nsegs);
 1420                 txq->txq_coalesced += nsegs;
 1421                 wrp = (struct work_request_hdr *)txd;
 1422                 flits = nsegs*2 + 1;
 1423 
 1424                 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
 1425                         struct cpl_tx_pkt_batch_entry *cbe;
 1426                         uint64_t flit;
 1427                         uint32_t *hflit = (uint32_t *)&flit;
 1428                         int cflags = m0->m_pkthdr.csum_flags;
 1429 
 1430                         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1431                         GET_VTAG(cntrl, m0);
 1432                         cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1433                         if (__predict_false(!(cflags & CSUM_IP)))
 1434                                 cntrl |= F_TXPKT_IPCSUM_DIS;
 1435                         if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP |
 1436                             CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
 1437                                 cntrl |= F_TXPKT_L4CSUM_DIS;
 1438 
 1439                         hflit[0] = htonl(cntrl);
 1440                         hflit[1] = htonl(segs[i].ds_len | 0x80000000);
 1441                         flit |= htobe64(1 << 24);
 1442                         cbe = &cpl_batch->pkt_entry[i];
 1443                         cbe->cntrl = hflit[0];
 1444                         cbe->len = hflit[1];
 1445                         cbe->addr = htobe64(segs[i].ds_addr);
 1446                 }
 1447 
 1448                 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1449                     V_WR_SGLSFLT(flits)) |
 1450                     htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1451                 wr_lo = htonl(V_WR_LEN(flits) |
 1452                     V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
 1453                 set_wr_hdr(wrp, wr_hi, wr_lo);
 1454                 wmb();
 1455                 ETHER_BPF_MTAP(pi->ifp, m0);
 1456                 wr_gen2(txd, txqs.gen);
 1457                 check_ring_tx_db(sc, txq, 0);
 1458                 return (0);             
 1459         } else if (tso_info) {
 1460                 uint16_t eth_type;
 1461                 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
 1462                 struct ether_header *eh;
 1463                 void *l3hdr;
 1464                 struct tcphdr *tcp;
 1465 
 1466                 txd->flit[2] = 0;
 1467                 GET_VTAG(cntrl, m0);
 1468                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
 1469                 hdr->cntrl = htonl(cntrl);
 1470                 hdr->len = htonl(mlen | 0x80000000);
 1471 
 1472                 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
 1473                         printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
 1474                             m0, mlen, m0->m_pkthdr.tso_segsz,
 1475                             m0->m_pkthdr.csum_flags, m0->m_flags);
 1476                         panic("tx tso packet too small");
 1477                 }
 1478 
 1479                 /* Make sure that ether, ip, tcp headers are all in m0 */
 1480                 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
 1481                         m0 = m_pullup(m0, TCPPKTHDRSIZE);
 1482                         if (__predict_false(m0 == NULL)) {
 1483                                 /* XXX panic probably an overreaction */
 1484                                 panic("couldn't fit header into mbuf");
 1485                         }
 1486                 }
 1487 
 1488                 eh = mtod(m0, struct ether_header *);
 1489                 eth_type = eh->ether_type;
 1490                 if (eth_type == htons(ETHERTYPE_VLAN)) {
 1491                         struct ether_vlan_header *evh = (void *)eh;
 1492 
 1493                         tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN);
 1494                         l3hdr = evh + 1;
 1495                         eth_type = evh->evl_proto;
 1496                 } else {
 1497                         tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II);
 1498                         l3hdr = eh + 1;
 1499                 }
 1500 
 1501                 if (eth_type == htons(ETHERTYPE_IP)) {
 1502                         struct ip *ip = l3hdr;
 1503 
 1504                         tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl);
 1505                         tcp = (struct tcphdr *)(ip + 1);
 1506                 } else if (eth_type == htons(ETHERTYPE_IPV6)) {
 1507                         struct ip6_hdr *ip6 = l3hdr;
 1508 
 1509                         KASSERT(ip6->ip6_nxt == IPPROTO_TCP,
 1510                             ("%s: CSUM_TSO with ip6_nxt %d",
 1511                             __func__, ip6->ip6_nxt));
 1512 
 1513                         tso_info |= F_LSO_IPV6;
 1514                         tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2);
 1515                         tcp = (struct tcphdr *)(ip6 + 1);
 1516                 } else
 1517                         panic("%s: CSUM_TSO but neither ip nor ip6", __func__);
 1518 
 1519                 tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off);
 1520                 hdr->lso_info = htonl(tso_info);
 1521 
 1522                 if (__predict_false(mlen <= PIO_LEN)) {
 1523                         /*
 1524                          * pkt not undersized but fits in PIO_LEN
 1525                          * Indicates a TSO bug at the higher levels.
 1526                          */
 1527                         txsd->m = NULL;
 1528                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
 1529                         flits = (mlen + 7) / 8 + 3;
 1530                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1531                                           V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1532                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1533                         wr_lo = htonl(V_WR_LEN(flits) |
 1534                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1535                         set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
 1536                         wmb();
 1537                         ETHER_BPF_MTAP(pi->ifp, m0);
 1538                         wr_gen2(txd, txqs.gen);
 1539                         check_ring_tx_db(sc, txq, 0);
 1540                         m_freem(m0);
 1541                         return (0);
 1542                 }
 1543                 flits = 3;      
 1544         } else {
 1545                 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
 1546                 
 1547                 GET_VTAG(cntrl, m0);
 1548                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1549                 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
 1550                         cntrl |= F_TXPKT_IPCSUM_DIS;
 1551                 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP |
 1552                     CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
 1553                         cntrl |= F_TXPKT_L4CSUM_DIS;
 1554                 cpl->cntrl = htonl(cntrl);
 1555                 cpl->len = htonl(mlen | 0x80000000);
 1556 
 1557                 if (mlen <= PIO_LEN) {
 1558                         txsd->m = NULL;
 1559                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
 1560                         flits = (mlen + 7) / 8 + 2;
 1561                         
 1562                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1563                             V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1564                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1565                         wr_lo = htonl(V_WR_LEN(flits) |
 1566                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1567                         set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
 1568                         wmb();
 1569                         ETHER_BPF_MTAP(pi->ifp, m0);
 1570                         wr_gen2(txd, txqs.gen);
 1571                         check_ring_tx_db(sc, txq, 0);
 1572                         m_freem(m0);
 1573                         return (0);
 1574                 }
 1575                 flits = 2;
 1576         }
 1577         wrp = (struct work_request_hdr *)txd;
 1578         sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
 1579         make_sgl(sgp, segs, nsegs);
 1580 
 1581         sgl_flits = sgl_len(nsegs);
 1582 
 1583         ETHER_BPF_MTAP(pi->ifp, m0);
 1584 
 1585         KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
 1586         wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1587         wr_lo = htonl(V_WR_TID(txq->token));
 1588         write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
 1589             sgl_flits, wr_hi, wr_lo);
 1590         check_ring_tx_db(sc, txq, 0);
 1591 
 1592         return (0);
 1593 }
 1594 
 1595 void
 1596 cxgb_tx_watchdog(void *arg)
 1597 {
 1598         struct sge_qset *qs = arg;
 1599         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1600 
 1601         if (qs->coalescing != 0 &&
 1602             (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
 1603             TXQ_RING_EMPTY(qs))
 1604                 qs->coalescing = 0; 
 1605         else if (qs->coalescing == 0 &&
 1606             (txq->in_use >= cxgb_tx_coalesce_enable_start))
 1607                 qs->coalescing = 1;
 1608         if (TXQ_TRYLOCK(qs)) {
 1609                 qs->qs_flags |= QS_FLUSHING;
 1610                 cxgb_start_locked(qs);
 1611                 qs->qs_flags &= ~QS_FLUSHING;
 1612                 TXQ_UNLOCK(qs);
 1613         }
 1614         if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
 1615                 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
 1616                     qs, txq->txq_watchdog.c_cpu);
 1617 }
 1618 
 1619 static void
 1620 cxgb_tx_timeout(void *arg)
 1621 {
 1622         struct sge_qset *qs = arg;
 1623         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1624 
 1625         if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
 1626                 qs->coalescing = 1;     
 1627         if (TXQ_TRYLOCK(qs)) {
 1628                 qs->qs_flags |= QS_TIMEOUT;
 1629                 cxgb_start_locked(qs);
 1630                 qs->qs_flags &= ~QS_TIMEOUT;
 1631                 TXQ_UNLOCK(qs);
 1632         }
 1633 }
 1634 
 1635 static void
 1636 cxgb_start_locked(struct sge_qset *qs)
 1637 {
 1638         struct mbuf *m_head = NULL;
 1639         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1640         struct port_info *pi = qs->port;
 1641         struct ifnet *ifp = pi->ifp;
 1642 
 1643         if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
 1644                 reclaim_completed_tx(qs, 0, TXQ_ETH);
 1645 
 1646         if (!pi->link_config.link_ok) {
 1647                 TXQ_RING_FLUSH(qs);
 1648                 return;
 1649         }
 1650         TXQ_LOCK_ASSERT(qs);
 1651         while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
 1652             pi->link_config.link_ok) {
 1653                 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1654 
 1655                 if (txq->size - txq->in_use <= TX_MAX_DESC)
 1656                         break;
 1657 
 1658                 if ((m_head = cxgb_dequeue(qs)) == NULL)
 1659                         break;
 1660                 /*
 1661                  *  Encapsulation can modify our pointer, and or make it
 1662                  *  NULL on failure.  In that event, we can't requeue.
 1663                  */
 1664                 if (t3_encap(qs, &m_head) || m_head == NULL)
 1665                         break;
 1666 
 1667                 m_head = NULL;
 1668         }
 1669 
 1670         if (txq->db_pending)
 1671                 check_ring_tx_db(pi->adapter, txq, 1);
 1672 
 1673         if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
 1674             pi->link_config.link_ok)
 1675                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1676                     qs, txq->txq_timer.c_cpu);
 1677         if (m_head != NULL)
 1678                 m_freem(m_head);
 1679 }
 1680 
 1681 static int
 1682 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
 1683 {
 1684         struct port_info *pi = qs->port;
 1685         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1686         struct buf_ring *br = txq->txq_mr;
 1687         int error, avail;
 1688 
 1689         avail = txq->size - txq->in_use;
 1690         TXQ_LOCK_ASSERT(qs);
 1691 
 1692         /*
 1693          * We can only do a direct transmit if the following are true:
 1694          * - we aren't coalescing (ring < 3/4 full)
 1695          * - the link is up -- checked in caller
 1696          * - there are no packets enqueued already
 1697          * - there is space in hardware transmit queue 
 1698          */
 1699         if (check_pkt_coalesce(qs) == 0 &&
 1700             !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
 1701                 if (t3_encap(qs, &m)) {
 1702                         if (m != NULL &&
 1703                             (error = drbr_enqueue(ifp, br, m)) != 0) 
 1704                                 return (error);
 1705                 } else {
 1706                         if (txq->db_pending)
 1707                                 check_ring_tx_db(pi->adapter, txq, 1);
 1708 
 1709                         /*
 1710                          * We've bypassed the buf ring so we need to update
 1711                          * the stats directly
 1712                          */
 1713                         txq->txq_direct_packets++;
 1714                         txq->txq_direct_bytes += m->m_pkthdr.len;
 1715                 }
 1716         } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
 1717                 return (error);
 1718 
 1719         reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1720         if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
 1721             (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
 1722                 cxgb_start_locked(qs);
 1723         else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
 1724                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1725                     qs, txq->txq_timer.c_cpu);
 1726         return (0);
 1727 }
 1728 
 1729 int
 1730 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
 1731 {
 1732         struct sge_qset *qs;
 1733         struct port_info *pi = ifp->if_softc;
 1734         int error, qidx = pi->first_qset;
 1735 
 1736         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
 1737             ||(!pi->link_config.link_ok)) {
 1738                 m_freem(m);
 1739                 return (0);
 1740         }
 1741         
 1742         if (m->m_flags & M_FLOWID)
 1743                 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
 1744 
 1745         qs = &pi->adapter->sge.qs[qidx];
 1746         
 1747         if (TXQ_TRYLOCK(qs)) {
 1748                 /* XXX running */
 1749                 error = cxgb_transmit_locked(ifp, qs, m);
 1750                 TXQ_UNLOCK(qs);
 1751         } else
 1752                 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
 1753         return (error);
 1754 }
 1755 
 1756 void
 1757 cxgb_qflush(struct ifnet *ifp)
 1758 {
 1759         /*
 1760          * flush any enqueued mbufs in the buf_rings
 1761          * and in the transmit queues
 1762          * no-op for now
 1763          */
 1764         return;
 1765 }
 1766 
 1767 /**
 1768  *      write_imm - write a packet into a Tx descriptor as immediate data
 1769  *      @d: the Tx descriptor to write
 1770  *      @m: the packet
 1771  *      @len: the length of packet data to write as immediate data
 1772  *      @gen: the generation bit value to write
 1773  *
 1774  *      Writes a packet as immediate data into a Tx descriptor.  The packet
 1775  *      contains a work request at its beginning.  We must write the packet
 1776  *      carefully so the SGE doesn't read accidentally before it's written in
 1777  *      its entirety.
 1778  */
 1779 static __inline void
 1780 write_imm(struct tx_desc *d, caddr_t src,
 1781           unsigned int len, unsigned int gen)
 1782 {
 1783         struct work_request_hdr *from = (struct work_request_hdr *)src;
 1784         struct work_request_hdr *to = (struct work_request_hdr *)d;
 1785         uint32_t wr_hi, wr_lo;
 1786 
 1787         KASSERT(len <= WR_LEN && len >= sizeof(*from),
 1788             ("%s: invalid len %d", __func__, len));
 1789         
 1790         memcpy(&to[1], &from[1], len - sizeof(*from));
 1791         wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
 1792             V_WR_BCNTLFLT(len & 7));
 1793         wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8));
 1794         set_wr_hdr(to, wr_hi, wr_lo);
 1795         wmb();
 1796         wr_gen2(d, gen);
 1797 }
 1798 
 1799 /**
 1800  *      check_desc_avail - check descriptor availability on a send queue
 1801  *      @adap: the adapter
 1802  *      @q: the TX queue
 1803  *      @m: the packet needing the descriptors
 1804  *      @ndesc: the number of Tx descriptors needed
 1805  *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
 1806  *
 1807  *      Checks if the requested number of Tx descriptors is available on an
 1808  *      SGE send queue.  If the queue is already suspended or not enough
 1809  *      descriptors are available the packet is queued for later transmission.
 1810  *      Must be called with the Tx queue locked.
 1811  *
 1812  *      Returns 0 if enough descriptors are available, 1 if there aren't
 1813  *      enough descriptors and the packet has been queued, and 2 if the caller
 1814  *      needs to retry because there weren't enough descriptors at the
 1815  *      beginning of the call but some freed up in the mean time.
 1816  */
 1817 static __inline int
 1818 check_desc_avail(adapter_t *adap, struct sge_txq *q,
 1819                  struct mbuf *m, unsigned int ndesc,
 1820                  unsigned int qid)
 1821 {
 1822         /* 
 1823          * XXX We currently only use this for checking the control queue
 1824          * the control queue is only used for binding qsets which happens
 1825          * at init time so we are guaranteed enough descriptors
 1826          */
 1827         if (__predict_false(!mbufq_empty(&q->sendq))) {
 1828 addq_exit:      mbufq_tail(&q->sendq, m);
 1829                 return 1;
 1830         }
 1831         if (__predict_false(q->size - q->in_use < ndesc)) {
 1832 
 1833                 struct sge_qset *qs = txq_to_qset(q, qid);
 1834 
 1835                 setbit(&qs->txq_stopped, qid);
 1836                 if (should_restart_tx(q) &&
 1837                     test_and_clear_bit(qid, &qs->txq_stopped))
 1838                         return 2;
 1839 
 1840                 q->stops++;
 1841                 goto addq_exit;
 1842         }
 1843         return 0;
 1844 }
 1845 
 1846 
 1847 /**
 1848  *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
 1849  *      @q: the SGE control Tx queue
 1850  *
 1851  *      This is a variant of reclaim_completed_tx() that is used for Tx queues
 1852  *      that send only immediate data (presently just the control queues) and
 1853  *      thus do not have any mbufs
 1854  */
 1855 static __inline void
 1856 reclaim_completed_tx_imm(struct sge_txq *q)
 1857 {
 1858         unsigned int reclaim = q->processed - q->cleaned;
 1859 
 1860         q->in_use -= reclaim;
 1861         q->cleaned += reclaim;
 1862 }
 1863 
 1864 /**
 1865  *      ctrl_xmit - send a packet through an SGE control Tx queue
 1866  *      @adap: the adapter
 1867  *      @q: the control queue
 1868  *      @m: the packet
 1869  *
 1870  *      Send a packet through an SGE control Tx queue.  Packets sent through
 1871  *      a control queue must fit entirely as immediate data in a single Tx
 1872  *      descriptor and have no page fragments.
 1873  */
 1874 static int
 1875 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 1876 {
 1877         int ret;
 1878         struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
 1879         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1880         
 1881         KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__));
 1882 
 1883         wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
 1884         wrp->wrh_lo = htonl(V_WR_TID(q->token));
 1885 
 1886         TXQ_LOCK(qs);
 1887 again:  reclaim_completed_tx_imm(q);
 1888 
 1889         ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
 1890         if (__predict_false(ret)) {
 1891                 if (ret == 1) {
 1892                         TXQ_UNLOCK(qs);
 1893                         return (ENOSPC);
 1894                 }
 1895                 goto again;
 1896         }
 1897         write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
 1898         
 1899         q->in_use++;
 1900         if (++q->pidx >= q->size) {
 1901                 q->pidx = 0;
 1902                 q->gen ^= 1;
 1903         }
 1904         TXQ_UNLOCK(qs);
 1905         wmb();
 1906         t3_write_reg(adap, A_SG_KDOORBELL,
 1907             F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1908 
 1909         m_free(m);
 1910         return (0);
 1911 }
 1912 
 1913 
 1914 /**
 1915  *      restart_ctrlq - restart a suspended control queue
 1916  *      @qs: the queue set cotaining the control queue
 1917  *
 1918  *      Resumes transmission on a suspended Tx control queue.
 1919  */
 1920 static void
 1921 restart_ctrlq(void *data, int npending)
 1922 {
 1923         struct mbuf *m;
 1924         struct sge_qset *qs = (struct sge_qset *)data;
 1925         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1926         adapter_t *adap = qs->port->adapter;
 1927 
 1928         TXQ_LOCK(qs);
 1929 again:  reclaim_completed_tx_imm(q);
 1930 
 1931         while (q->in_use < q->size &&
 1932                (m = mbufq_dequeue(&q->sendq)) != NULL) {
 1933 
 1934                 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
 1935                 m_free(m);
 1936 
 1937                 if (++q->pidx >= q->size) {
 1938                         q->pidx = 0;
 1939                         q->gen ^= 1;
 1940                 }
 1941                 q->in_use++;
 1942         }
 1943         if (!mbufq_empty(&q->sendq)) {
 1944                 setbit(&qs->txq_stopped, TXQ_CTRL);
 1945 
 1946                 if (should_restart_tx(q) &&
 1947                     test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
 1948                         goto again;
 1949                 q->stops++;
 1950         }
 1951         TXQ_UNLOCK(qs);
 1952         t3_write_reg(adap, A_SG_KDOORBELL,
 1953                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1954 }
 1955 
 1956 
 1957 /*
 1958  * Send a management message through control queue 0
 1959  */
 1960 int
 1961 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
 1962 {
 1963         return ctrl_xmit(adap, &adap->sge.qs[0], m);
 1964 }
 1965 
 1966 /**
 1967  *      free_qset - free the resources of an SGE queue set
 1968  *      @sc: the controller owning the queue set
 1969  *      @q: the queue set
 1970  *
 1971  *      Release the HW and SW resources associated with an SGE queue set, such
 1972  *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 1973  *      queue set must be quiesced prior to calling this.
 1974  */
 1975 static void
 1976 t3_free_qset(adapter_t *sc, struct sge_qset *q)
 1977 {
 1978         int i;
 1979         
 1980         reclaim_completed_tx(q, 0, TXQ_ETH);
 1981         if (q->txq[TXQ_ETH].txq_mr != NULL) 
 1982                 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
 1983         if (q->txq[TXQ_ETH].txq_ifq != NULL) {
 1984                 ifq_delete(q->txq[TXQ_ETH].txq_ifq);
 1985                 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
 1986         }
 1987 
 1988         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 1989                 if (q->fl[i].desc) {
 1990                         mtx_lock_spin(&sc->sge.reg_lock);
 1991                         t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
 1992                         mtx_unlock_spin(&sc->sge.reg_lock);
 1993                         bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
 1994                         bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
 1995                                         q->fl[i].desc_map);
 1996                         bus_dma_tag_destroy(q->fl[i].desc_tag);
 1997                         bus_dma_tag_destroy(q->fl[i].entry_tag);
 1998                 }
 1999                 if (q->fl[i].sdesc) {
 2000                         free_rx_bufs(sc, &q->fl[i]);
 2001                         free(q->fl[i].sdesc, M_DEVBUF);
 2002                 }
 2003         }
 2004 
 2005         mtx_unlock(&q->lock);
 2006         MTX_DESTROY(&q->lock);
 2007         for (i = 0; i < SGE_TXQ_PER_SET; i++) {
 2008                 if (q->txq[i].desc) {
 2009                         mtx_lock_spin(&sc->sge.reg_lock);
 2010                         t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
 2011                         mtx_unlock_spin(&sc->sge.reg_lock);
 2012                         bus_dmamap_unload(q->txq[i].desc_tag,
 2013                                         q->txq[i].desc_map);
 2014                         bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
 2015                                         q->txq[i].desc_map);
 2016                         bus_dma_tag_destroy(q->txq[i].desc_tag);
 2017                         bus_dma_tag_destroy(q->txq[i].entry_tag);
 2018                 }
 2019                 if (q->txq[i].sdesc) {
 2020                         free(q->txq[i].sdesc, M_DEVBUF);
 2021                 }
 2022         }
 2023 
 2024         if (q->rspq.desc) {
 2025                 mtx_lock_spin(&sc->sge.reg_lock);
 2026                 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
 2027                 mtx_unlock_spin(&sc->sge.reg_lock);
 2028                 
 2029                 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
 2030                 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
 2031                                 q->rspq.desc_map);
 2032                 bus_dma_tag_destroy(q->rspq.desc_tag);
 2033                 MTX_DESTROY(&q->rspq.lock);
 2034         }
 2035 
 2036 #if defined(INET6) || defined(INET)
 2037         tcp_lro_free(&q->lro.ctrl);
 2038 #endif
 2039 
 2040         bzero(q, sizeof(*q));
 2041 }
 2042 
 2043 /**
 2044  *      t3_free_sge_resources - free SGE resources
 2045  *      @sc: the adapter softc
 2046  *
 2047  *      Frees resources used by the SGE queue sets.
 2048  */
 2049 void
 2050 t3_free_sge_resources(adapter_t *sc, int nqsets)
 2051 {
 2052         int i;
 2053 
 2054         for (i = 0; i < nqsets; ++i) {
 2055                 TXQ_LOCK(&sc->sge.qs[i]);
 2056                 t3_free_qset(sc, &sc->sge.qs[i]);
 2057         }
 2058 }
 2059 
 2060 /**
 2061  *      t3_sge_start - enable SGE
 2062  *      @sc: the controller softc
 2063  *
 2064  *      Enables the SGE for DMAs.  This is the last step in starting packet
 2065  *      transfers.
 2066  */
 2067 void
 2068 t3_sge_start(adapter_t *sc)
 2069 {
 2070         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
 2071 }
 2072 
 2073 /**
 2074  *      t3_sge_stop - disable SGE operation
 2075  *      @sc: the adapter
 2076  *
 2077  *      Disables the DMA engine.  This can be called in emeregencies (e.g.,
 2078  *      from error interrupts) or from normal process context.  In the latter
 2079  *      case it also disables any pending queue restart tasklets.  Note that
 2080  *      if it is called in interrupt context it cannot disable the restart
 2081  *      tasklets as it cannot wait, however the tasklets will have no effect
 2082  *      since the doorbells are disabled and the driver will call this again
 2083  *      later from process context, at which time the tasklets will be stopped
 2084  *      if they are still running.
 2085  */
 2086 void
 2087 t3_sge_stop(adapter_t *sc)
 2088 {
 2089         int i, nqsets;
 2090         
 2091         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
 2092 
 2093         if (sc->tq == NULL)
 2094                 return;
 2095         
 2096         for (nqsets = i = 0; i < (sc)->params.nports; i++) 
 2097                 nqsets += sc->port[i].nqsets;
 2098 #ifdef notyet
 2099         /*
 2100          * 
 2101          * XXX
 2102          */
 2103         for (i = 0; i < nqsets; ++i) {
 2104                 struct sge_qset *qs = &sc->sge.qs[i];
 2105                 
 2106                 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2107                 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2108         }
 2109 #endif
 2110 }
 2111 
 2112 /**
 2113  *      t3_free_tx_desc - reclaims Tx descriptors and their buffers
 2114  *      @adapter: the adapter
 2115  *      @q: the Tx queue to reclaim descriptors from
 2116  *      @reclaimable: the number of descriptors to reclaim
 2117  *      @m_vec_size: maximum number of buffers to reclaim
 2118  *      @desc_reclaimed: returns the number of descriptors reclaimed
 2119  *
 2120  *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 2121  *      Tx buffers.  Called with the Tx queue lock held.
 2122  *
 2123  *      Returns number of buffers of reclaimed   
 2124  */
 2125 void
 2126 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
 2127 {
 2128         struct tx_sw_desc *txsd;
 2129         unsigned int cidx, mask;
 2130         struct sge_txq *q = &qs->txq[queue];
 2131 
 2132 #ifdef T3_TRACE
 2133         T3_TRACE2(sc->tb[q->cntxt_id & 7],
 2134                   "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
 2135 #endif
 2136         cidx = q->cidx;
 2137         mask = q->size - 1;
 2138         txsd = &q->sdesc[cidx];
 2139 
 2140         mtx_assert(&qs->lock, MA_OWNED);
 2141         while (reclaimable--) {
 2142                 prefetch(q->sdesc[(cidx + 1) & mask].m);
 2143                 prefetch(q->sdesc[(cidx + 2) & mask].m);
 2144 
 2145                 if (txsd->m != NULL) {
 2146                         if (txsd->flags & TX_SW_DESC_MAPPED) {
 2147                                 bus_dmamap_unload(q->entry_tag, txsd->map);
 2148                                 txsd->flags &= ~TX_SW_DESC_MAPPED;
 2149                         }
 2150                         m_freem_list(txsd->m);
 2151                         txsd->m = NULL;
 2152                 } else
 2153                         q->txq_skipped++;
 2154                 
 2155                 ++txsd;
 2156                 if (++cidx == q->size) {
 2157                         cidx = 0;
 2158                         txsd = q->sdesc;
 2159                 }
 2160         }
 2161         q->cidx = cidx;
 2162 
 2163 }
 2164 
 2165 /**
 2166  *      is_new_response - check if a response is newly written
 2167  *      @r: the response descriptor
 2168  *      @q: the response queue
 2169  *
 2170  *      Returns true if a response descriptor contains a yet unprocessed
 2171  *      response.
 2172  */
 2173 static __inline int
 2174 is_new_response(const struct rsp_desc *r,
 2175     const struct sge_rspq *q)
 2176 {
 2177         return (r->intr_gen & F_RSPD_GEN2) == q->gen;
 2178 }
 2179 
 2180 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
 2181 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
 2182                         V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
 2183                         V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
 2184                         V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
 2185 
 2186 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
 2187 #define NOMEM_INTR_DELAY 2500
 2188 
 2189 #ifdef TCP_OFFLOAD
 2190 /**
 2191  *      write_ofld_wr - write an offload work request
 2192  *      @adap: the adapter
 2193  *      @m: the packet to send
 2194  *      @q: the Tx queue
 2195  *      @pidx: index of the first Tx descriptor to write
 2196  *      @gen: the generation value to use
 2197  *      @ndesc: number of descriptors the packet will occupy
 2198  *
 2199  *      Write an offload work request to send the supplied packet.  The packet
 2200  *      data already carry the work request with most fields populated.
 2201  */
 2202 static void
 2203 write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q,
 2204     unsigned int pidx, unsigned int gen, unsigned int ndesc)
 2205 {
 2206         unsigned int sgl_flits, flits;
 2207         int i, idx, nsegs, wrlen;
 2208         struct work_request_hdr *from;
 2209         struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1];
 2210         struct tx_desc *d = &q->desc[pidx];
 2211         struct txq_state txqs;
 2212         struct sglist_seg *segs;
 2213         struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2214         struct sglist *sgl;
 2215 
 2216         from = (void *)(oh + 1);        /* Start of WR within mbuf */
 2217         wrlen = m->m_len - sizeof(*oh);
 2218 
 2219         if (!(oh->flags & F_HDR_SGL)) {
 2220                 write_imm(d, (caddr_t)from, wrlen, gen);
 2221 
 2222                 /*
 2223                  * mbuf with "real" immediate tx data will be enqueue_wr'd by
 2224                  * t3_push_frames and freed in wr_ack.  Others, like those sent
 2225                  * down by close_conn, t3_send_reset, etc. should be freed here.
 2226                  */
 2227                 if (!(oh->flags & F_HDR_DF))
 2228                         m_free(m);
 2229                 return;
 2230         }
 2231 
 2232         memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from));
 2233 
 2234         sgl = oh->sgl;
 2235         flits = wrlen / 8;
 2236         sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl;
 2237 
 2238         nsegs = sgl->sg_nseg;
 2239         segs = sgl->sg_segs;
 2240         for (idx = 0, i = 0; i < nsegs; i++) {
 2241                 KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__));
 2242                 if (i && idx == 0) 
 2243                         ++sgp;
 2244                 sgp->len[idx] = htobe32(segs[i].ss_len);
 2245                 sgp->addr[idx] = htobe64(segs[i].ss_paddr);
 2246                 idx ^= 1;
 2247         }
 2248         if (idx) {
 2249                 sgp->len[idx] = 0;
 2250                 sgp->addr[idx] = 0;
 2251         }
 2252 
 2253         sgl_flits = sgl_len(nsegs);
 2254         txqs.gen = gen;
 2255         txqs.pidx = pidx;
 2256         txqs.compl = 0;
 2257 
 2258         write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits,
 2259             from->wrh_hi, from->wrh_lo);
 2260 }
 2261 
 2262 /**
 2263  *      ofld_xmit - send a packet through an offload queue
 2264  *      @adap: the adapter
 2265  *      @q: the Tx offload queue
 2266  *      @m: the packet
 2267  *
 2268  *      Send an offload packet through an SGE offload queue.
 2269  */
 2270 static int
 2271 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 2272 {
 2273         int ret;
 2274         unsigned int ndesc;
 2275         unsigned int pidx, gen;
 2276         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2277         struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2278 
 2279         ndesc = G_HDR_NDESC(oh->flags);
 2280 
 2281         TXQ_LOCK(qs);
 2282 again:  reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2283         ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
 2284         if (__predict_false(ret)) {
 2285                 if (ret == 1) {
 2286                         TXQ_UNLOCK(qs);
 2287                         return (EINTR);
 2288                 }
 2289                 goto again;
 2290         }
 2291 
 2292         gen = q->gen;
 2293         q->in_use += ndesc;
 2294         pidx = q->pidx;
 2295         q->pidx += ndesc;
 2296         if (q->pidx >= q->size) {
 2297                 q->pidx -= q->size;
 2298                 q->gen ^= 1;
 2299         }
 2300 
 2301         write_ofld_wr(adap, m, q, pidx, gen, ndesc);
 2302         check_ring_tx_db(adap, q, 1);
 2303         TXQ_UNLOCK(qs);
 2304 
 2305         return (0);
 2306 }
 2307 
 2308 /**
 2309  *      restart_offloadq - restart a suspended offload queue
 2310  *      @qs: the queue set cotaining the offload queue
 2311  *
 2312  *      Resumes transmission on a suspended Tx offload queue.
 2313  */
 2314 static void
 2315 restart_offloadq(void *data, int npending)
 2316 {
 2317         struct mbuf *m;
 2318         struct sge_qset *qs = data;
 2319         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2320         adapter_t *adap = qs->port->adapter;
 2321         int cleaned;
 2322                 
 2323         TXQ_LOCK(qs);
 2324 again:  cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2325 
 2326         while ((m = mbufq_peek(&q->sendq)) != NULL) {
 2327                 unsigned int gen, pidx;
 2328                 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2329                 unsigned int ndesc = G_HDR_NDESC(oh->flags);
 2330 
 2331                 if (__predict_false(q->size - q->in_use < ndesc)) {
 2332                         setbit(&qs->txq_stopped, TXQ_OFLD);
 2333                         if (should_restart_tx(q) &&
 2334                             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
 2335                                 goto again;
 2336                         q->stops++;
 2337                         break;
 2338                 }
 2339 
 2340                 gen = q->gen;
 2341                 q->in_use += ndesc;
 2342                 pidx = q->pidx;
 2343                 q->pidx += ndesc;
 2344                 if (q->pidx >= q->size) {
 2345                         q->pidx -= q->size;
 2346                         q->gen ^= 1;
 2347                 }
 2348                 
 2349                 (void)mbufq_dequeue(&q->sendq);
 2350                 TXQ_UNLOCK(qs);
 2351                 write_ofld_wr(adap, m, q, pidx, gen, ndesc);
 2352                 TXQ_LOCK(qs);
 2353         }
 2354 #if USE_GTS
 2355         set_bit(TXQ_RUNNING, &q->flags);
 2356         set_bit(TXQ_LAST_PKT_DB, &q->flags);
 2357 #endif
 2358         TXQ_UNLOCK(qs);
 2359         wmb();
 2360         t3_write_reg(adap, A_SG_KDOORBELL,
 2361                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 2362 }
 2363 
 2364 /**
 2365  *      t3_offload_tx - send an offload packet
 2366  *      @m: the packet
 2367  *
 2368  *      Sends an offload packet.  We use the packet priority to select the
 2369  *      appropriate Tx queue as follows: bit 0 indicates whether the packet
 2370  *      should be sent as regular or control, bits 1-3 select the queue set.
 2371  */
 2372 int
 2373 t3_offload_tx(struct adapter *sc, struct mbuf *m)
 2374 {
 2375         struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2376         struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)];
 2377 
 2378         if (oh->flags & F_HDR_CTRL) {
 2379                 m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */
 2380                 return (ctrl_xmit(sc, qs, m));
 2381         } else
 2382                 return (ofld_xmit(sc, qs, m));
 2383 }
 2384 #endif
 2385 
 2386 static void
 2387 restart_tx(struct sge_qset *qs)
 2388 {
 2389         struct adapter *sc = qs->port->adapter;
 2390 
 2391         if (isset(&qs->txq_stopped, TXQ_OFLD) &&
 2392             should_restart_tx(&qs->txq[TXQ_OFLD]) &&
 2393             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
 2394                 qs->txq[TXQ_OFLD].restarts++;
 2395                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2396         }
 2397 
 2398         if (isset(&qs->txq_stopped, TXQ_CTRL) &&
 2399             should_restart_tx(&qs->txq[TXQ_CTRL]) &&
 2400             test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
 2401                 qs->txq[TXQ_CTRL].restarts++;
 2402                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2403         }
 2404 }
 2405 
 2406 /**
 2407  *      t3_sge_alloc_qset - initialize an SGE queue set
 2408  *      @sc: the controller softc
 2409  *      @id: the queue set id
 2410  *      @nports: how many Ethernet ports will be using this queue set
 2411  *      @irq_vec_idx: the IRQ vector index for response queue interrupts
 2412  *      @p: configuration parameters for this queue set
 2413  *      @ntxq: number of Tx queues for the queue set
 2414  *      @pi: port info for queue set
 2415  *
 2416  *      Allocate resources and initialize an SGE queue set.  A queue set
 2417  *      comprises a response queue, two Rx free-buffer queues, and up to 3
 2418  *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
 2419  *      queue, offload queue, and control queue.
 2420  */
 2421 int
 2422 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
 2423                   const struct qset_params *p, int ntxq, struct port_info *pi)
 2424 {
 2425         struct sge_qset *q = &sc->sge.qs[id];
 2426         int i, ret = 0;
 2427 
 2428         MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
 2429         q->port = pi;
 2430         q->adap = sc;
 2431 
 2432         if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
 2433             M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
 2434                 device_printf(sc->dev, "failed to allocate mbuf ring\n");
 2435                 goto err;
 2436         }
 2437         if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
 2438             M_NOWAIT | M_ZERO)) == NULL) {
 2439                 device_printf(sc->dev, "failed to allocate ifq\n");
 2440                 goto err;
 2441         }
 2442         ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);     
 2443         callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
 2444         callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
 2445         q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
 2446         q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
 2447 
 2448         init_qset_cntxt(q, id);
 2449         q->idx = id;
 2450         if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
 2451                     sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
 2452                     &q->fl[0].desc, &q->fl[0].sdesc,
 2453                     &q->fl[0].desc_tag, &q->fl[0].desc_map,
 2454                     sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
 2455                 printf("error %d from alloc ring fl0\n", ret);
 2456                 goto err;
 2457         }
 2458 
 2459         if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
 2460                     sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
 2461                     &q->fl[1].desc, &q->fl[1].sdesc,
 2462                     &q->fl[1].desc_tag, &q->fl[1].desc_map,
 2463                     sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
 2464                 printf("error %d from alloc ring fl1\n", ret);
 2465                 goto err;
 2466         }
 2467 
 2468         if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
 2469                     &q->rspq.phys_addr, &q->rspq.desc, NULL,
 2470                     &q->rspq.desc_tag, &q->rspq.desc_map,
 2471                     NULL, NULL)) != 0) {
 2472                 printf("error %d from alloc ring rspq\n", ret);
 2473                 goto err;
 2474         }
 2475 
 2476         snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
 2477             device_get_unit(sc->dev), irq_vec_idx);
 2478         MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
 2479 
 2480         for (i = 0; i < ntxq; ++i) {
 2481                 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
 2482 
 2483                 if ((ret = alloc_ring(sc, p->txq_size[i],
 2484                             sizeof(struct tx_desc), sz,
 2485                             &q->txq[i].phys_addr, &q->txq[i].desc,
 2486                             &q->txq[i].sdesc, &q->txq[i].desc_tag,
 2487                             &q->txq[i].desc_map,
 2488                             sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
 2489                         printf("error %d from alloc ring tx %i\n", ret, i);
 2490                         goto err;
 2491                 }
 2492                 mbufq_init(&q->txq[i].sendq);
 2493                 q->txq[i].gen = 1;
 2494                 q->txq[i].size = p->txq_size[i];
 2495         }
 2496 
 2497 #ifdef TCP_OFFLOAD
 2498         TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
 2499 #endif
 2500         TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
 2501         TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2502         TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2503 
 2504         q->fl[0].gen = q->fl[1].gen = 1;
 2505         q->fl[0].size = p->fl_size;
 2506         q->fl[1].size = p->jumbo_size;
 2507 
 2508         q->rspq.gen = 1;
 2509         q->rspq.cidx = 0;
 2510         q->rspq.size = p->rspq_size;
 2511 
 2512         q->txq[TXQ_ETH].stop_thres = nports *
 2513             flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
 2514 
 2515         q->fl[0].buf_size = MCLBYTES;
 2516         q->fl[0].zone = zone_pack;
 2517         q->fl[0].type = EXT_PACKET;
 2518 
 2519         if (p->jumbo_buf_size ==  MJUM16BYTES) {
 2520                 q->fl[1].zone = zone_jumbo16;
 2521                 q->fl[1].type = EXT_JUMBO16;
 2522         } else if (p->jumbo_buf_size ==  MJUM9BYTES) {
 2523                 q->fl[1].zone = zone_jumbo9;
 2524                 q->fl[1].type = EXT_JUMBO9;             
 2525         } else if (p->jumbo_buf_size ==  MJUMPAGESIZE) {
 2526                 q->fl[1].zone = zone_jumbop;
 2527                 q->fl[1].type = EXT_JUMBOP;
 2528         } else {
 2529                 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
 2530                 ret = EDOOFUS;
 2531                 goto err;
 2532         }
 2533         q->fl[1].buf_size = p->jumbo_buf_size;
 2534 
 2535         /* Allocate and setup the lro_ctrl structure */
 2536         q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
 2537 #if defined(INET6) || defined(INET)
 2538         ret = tcp_lro_init(&q->lro.ctrl);
 2539         if (ret) {
 2540                 printf("error %d from tcp_lro_init\n", ret);
 2541                 goto err;
 2542         }
 2543 #endif
 2544         q->lro.ctrl.ifp = pi->ifp;
 2545 
 2546         mtx_lock_spin(&sc->sge.reg_lock);
 2547         ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
 2548                                    q->rspq.phys_addr, q->rspq.size,
 2549                                    q->fl[0].buf_size, 1, 0);
 2550         if (ret) {
 2551                 printf("error %d from t3_sge_init_rspcntxt\n", ret);
 2552                 goto err_unlock;
 2553         }
 2554 
 2555         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2556                 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
 2557                                           q->fl[i].phys_addr, q->fl[i].size,
 2558                                           q->fl[i].buf_size, p->cong_thres, 1,
 2559                                           0);
 2560                 if (ret) {
 2561                         printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
 2562                         goto err_unlock;
 2563                 }
 2564         }
 2565 
 2566         ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
 2567                                  SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
 2568                                  q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
 2569                                  1, 0);
 2570         if (ret) {
 2571                 printf("error %d from t3_sge_init_ecntxt\n", ret);
 2572                 goto err_unlock;
 2573         }
 2574 
 2575         if (ntxq > 1) {
 2576                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
 2577                                          USE_GTS, SGE_CNTXT_OFLD, id,
 2578                                          q->txq[TXQ_OFLD].phys_addr,
 2579                                          q->txq[TXQ_OFLD].size, 0, 1, 0);
 2580                 if (ret) {
 2581                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2582                         goto err_unlock;
 2583                 }
 2584         }
 2585 
 2586         if (ntxq > 2) {
 2587                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
 2588                                          SGE_CNTXT_CTRL, id,
 2589                                          q->txq[TXQ_CTRL].phys_addr,
 2590                                          q->txq[TXQ_CTRL].size,
 2591                                          q->txq[TXQ_CTRL].token, 1, 0);
 2592                 if (ret) {
 2593                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2594                         goto err_unlock;
 2595                 }
 2596         }
 2597 
 2598         mtx_unlock_spin(&sc->sge.reg_lock);
 2599         t3_update_qset_coalesce(q, p);
 2600 
 2601         refill_fl(sc, &q->fl[0], q->fl[0].size);
 2602         refill_fl(sc, &q->fl[1], q->fl[1].size);
 2603         refill_rspq(sc, &q->rspq, q->rspq.size - 1);
 2604 
 2605         t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
 2606                      V_NEWTIMER(q->rspq.holdoff_tmr));
 2607 
 2608         return (0);
 2609 
 2610 err_unlock:
 2611         mtx_unlock_spin(&sc->sge.reg_lock);
 2612 err:    
 2613         TXQ_LOCK(q);
 2614         t3_free_qset(sc, q);
 2615 
 2616         return (ret);
 2617 }
 2618 
 2619 /*
 2620  * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
 2621  * ethernet data.  Hardware assistance with various checksums and any vlan tag
 2622  * will also be taken into account here.
 2623  */
 2624 void
 2625 t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad)
 2626 {
 2627         struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
 2628         struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
 2629         struct ifnet *ifp = pi->ifp;
 2630         
 2631         if (cpl->vlan_valid) {
 2632                 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
 2633                 m->m_flags |= M_VLANTAG;
 2634         } 
 2635 
 2636         m->m_pkthdr.rcvif = ifp;
 2637         m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
 2638         /*
 2639          * adjust after conversion to mbuf chain
 2640          */
 2641         m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
 2642         m->m_len -= (sizeof(*cpl) + ethpad);
 2643         m->m_data += (sizeof(*cpl) + ethpad);
 2644 
 2645         if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) {
 2646                 struct ether_header *eh = mtod(m, void *);
 2647                 uint16_t eh_type;
 2648 
 2649                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 2650                         struct ether_vlan_header *evh = mtod(m, void *);
 2651 
 2652                         eh_type = evh->evl_proto;
 2653                 } else
 2654                         eh_type = eh->ether_type;
 2655 
 2656                 if (ifp->if_capenable & IFCAP_RXCSUM &&
 2657                     eh_type == htons(ETHERTYPE_IP)) {
 2658                         m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
 2659                             CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 2660                         m->m_pkthdr.csum_data = 0xffff;
 2661                 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
 2662                     eh_type == htons(ETHERTYPE_IPV6)) {
 2663                         m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
 2664                             CSUM_PSEUDO_HDR);
 2665                         m->m_pkthdr.csum_data = 0xffff;
 2666                 }
 2667         }
 2668 }
 2669 
 2670 /**
 2671  *      get_packet - return the next ingress packet buffer from a free list
 2672  *      @adap: the adapter that received the packet
 2673  *      @drop_thres: # of remaining buffers before we start dropping packets
 2674  *      @qs: the qset that the SGE free list holding the packet belongs to
 2675  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
 2676  *      @r: response descriptor 
 2677  *
 2678  *      Get the next packet from a free list and complete setup of the
 2679  *      sk_buff.  If the packet is small we make a copy and recycle the
 2680  *      original buffer, otherwise we use the original buffer itself.  If a
 2681  *      positive drop threshold is supplied packets are dropped and their
 2682  *      buffers recycled if (a) the number of remaining buffers is under the
 2683  *      threshold and the packet is too big to copy, or (b) the packet should
 2684  *      be copied but there is no memory for the copy.
 2685  */
 2686 static int
 2687 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
 2688     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
 2689 {
 2690 
 2691         unsigned int len_cq =  ntohl(r->len_cq);
 2692         struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
 2693         int mask, cidx = fl->cidx;
 2694         struct rx_sw_desc *sd = &fl->sdesc[cidx];
 2695         uint32_t len = G_RSPD_LEN(len_cq);
 2696         uint32_t flags = M_EXT;
 2697         uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
 2698         caddr_t cl;
 2699         struct mbuf *m;
 2700         int ret = 0;
 2701 
 2702         mask = fl->size - 1;
 2703         prefetch(fl->sdesc[(cidx + 1) & mask].m);
 2704         prefetch(fl->sdesc[(cidx + 2) & mask].m);
 2705         prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
 2706         prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 
 2707 
 2708         fl->credits--;
 2709         bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
 2710         
 2711         if (recycle_enable && len <= SGE_RX_COPY_THRES &&
 2712             sopeop == RSPQ_SOP_EOP) {
 2713                 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
 2714                         goto skip_recycle;
 2715                 cl = mtod(m, void *);
 2716                 memcpy(cl, sd->rxsd_cl, len);
 2717                 recycle_rx_buf(adap, fl, fl->cidx);
 2718                 m->m_pkthdr.len = m->m_len = len;
 2719                 m->m_flags = 0;
 2720                 mh->mh_head = mh->mh_tail = m;
 2721                 ret = 1;
 2722                 goto done;
 2723         } else {
 2724         skip_recycle:
 2725                 bus_dmamap_unload(fl->entry_tag, sd->map);
 2726                 cl = sd->rxsd_cl;
 2727                 m = sd->m;
 2728 
 2729                 if ((sopeop == RSPQ_SOP_EOP) ||
 2730                     (sopeop == RSPQ_SOP))
 2731                         flags |= M_PKTHDR;
 2732                 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags);
 2733                 if (fl->zone == zone_pack) {
 2734                         /*
 2735                          * restore clobbered data pointer
 2736                          */
 2737                         m->m_data = m->m_ext.ext_buf;
 2738                 } else {
 2739                         m_cljset(m, cl, fl->type);
 2740                 }
 2741                 m->m_len = len;
 2742         }               
 2743         switch(sopeop) {
 2744         case RSPQ_SOP_EOP:
 2745                 ret = 1;
 2746                 /* FALLTHROUGH */
 2747         case RSPQ_SOP:
 2748                 mh->mh_head = mh->mh_tail = m;
 2749                 m->m_pkthdr.len = len;
 2750                 break;
 2751         case RSPQ_EOP:
 2752                 ret = 1;
 2753                 /* FALLTHROUGH */
 2754         case RSPQ_NSOP_NEOP:
 2755                 if (mh->mh_tail == NULL) {
 2756                         log(LOG_ERR, "discarding intermediate descriptor entry\n");
 2757                         m_freem(m);
 2758                         break;
 2759                 }
 2760                 mh->mh_tail->m_next = m;
 2761                 mh->mh_tail = m;
 2762                 mh->mh_head->m_pkthdr.len += len;
 2763                 break;
 2764         }
 2765         if (cxgb_debug)
 2766                 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
 2767 done:
 2768         if (++fl->cidx == fl->size)
 2769                 fl->cidx = 0;
 2770 
 2771         return (ret);
 2772 }
 2773 
 2774 /**
 2775  *      handle_rsp_cntrl_info - handles control information in a response
 2776  *      @qs: the queue set corresponding to the response
 2777  *      @flags: the response control flags
 2778  *
 2779  *      Handles the control information of an SGE response, such as GTS
 2780  *      indications and completion credits for the queue set's Tx queues.
 2781  *      HW coalesces credits, we don't do any extra SW coalescing.
 2782  */
 2783 static __inline void
 2784 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
 2785 {
 2786         unsigned int credits;
 2787 
 2788 #if USE_GTS
 2789         if (flags & F_RSPD_TXQ0_GTS)
 2790                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
 2791 #endif
 2792         credits = G_RSPD_TXQ0_CR(flags);
 2793         if (credits) 
 2794                 qs->txq[TXQ_ETH].processed += credits;
 2795 
 2796         credits = G_RSPD_TXQ2_CR(flags);
 2797         if (credits)
 2798                 qs->txq[TXQ_CTRL].processed += credits;
 2799 
 2800 # if USE_GTS
 2801         if (flags & F_RSPD_TXQ1_GTS)
 2802                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
 2803 # endif
 2804         credits = G_RSPD_TXQ1_CR(flags);
 2805         if (credits)
 2806                 qs->txq[TXQ_OFLD].processed += credits;
 2807 
 2808 }
 2809 
 2810 static void
 2811 check_ring_db(adapter_t *adap, struct sge_qset *qs,
 2812     unsigned int sleeping)
 2813 {
 2814         ;
 2815 }
 2816 
 2817 /**
 2818  *      process_responses - process responses from an SGE response queue
 2819  *      @adap: the adapter
 2820  *      @qs: the queue set to which the response queue belongs
 2821  *      @budget: how many responses can be processed in this round
 2822  *
 2823  *      Process responses from an SGE response queue up to the supplied budget.
 2824  *      Responses include received packets as well as credits and other events
 2825  *      for the queues that belong to the response queue's queue set.
 2826  *      A negative budget is effectively unlimited.
 2827  *
 2828  *      Additionally choose the interrupt holdoff time for the next interrupt
 2829  *      on this queue.  If the system is under memory shortage use a fairly
 2830  *      long delay to help recovery.
 2831  */
 2832 static int
 2833 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
 2834 {
 2835         struct sge_rspq *rspq = &qs->rspq;
 2836         struct rsp_desc *r = &rspq->desc[rspq->cidx];
 2837         int budget_left = budget;
 2838         unsigned int sleeping = 0;
 2839 #if defined(INET6) || defined(INET)
 2840         int lro_enabled = qs->lro.enabled;
 2841         int skip_lro;
 2842         struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
 2843 #endif
 2844         struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
 2845 #ifdef DEBUG    
 2846         static int last_holdoff = 0;
 2847         if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
 2848                 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
 2849                 last_holdoff = rspq->holdoff_tmr;
 2850         }
 2851 #endif
 2852         rspq->next_holdoff = rspq->holdoff_tmr;
 2853 
 2854         while (__predict_true(budget_left && is_new_response(r, rspq))) {
 2855                 int eth, eop = 0, ethpad = 0;
 2856                 uint32_t flags = ntohl(r->flags);
 2857                 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
 2858                 uint8_t opcode = r->rss_hdr.opcode;
 2859                 
 2860                 eth = (opcode == CPL_RX_PKT);
 2861                 
 2862                 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
 2863                         struct mbuf *m;
 2864 
 2865                         if (cxgb_debug)
 2866                                 printf("async notification\n");
 2867 
 2868                         if (mh->mh_head == NULL) {
 2869                                 mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA);
 2870                                 m = mh->mh_head;
 2871                         } else {
 2872                                 m = m_gethdr(M_NOWAIT, MT_DATA);
 2873                         }
 2874                         if (m == NULL)
 2875                                 goto no_mem;
 2876 
 2877                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
 2878                         m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
 2879                         *mtod(m, uint8_t *) = CPL_ASYNC_NOTIF;
 2880                         opcode = CPL_ASYNC_NOTIF;
 2881                         eop = 1;
 2882                         rspq->async_notif++;
 2883                         goto skip;
 2884                 } else if  (flags & F_RSPD_IMM_DATA_VALID) {
 2885                         struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
 2886 
 2887                         if (m == NULL) {        
 2888                 no_mem:
 2889                                 rspq->next_holdoff = NOMEM_INTR_DELAY;
 2890                                 budget_left--;
 2891                                 break;
 2892                         }
 2893                         if (mh->mh_head == NULL)
 2894                                 mh->mh_head = m;
 2895                         else 
 2896                                 mh->mh_tail->m_next = m;
 2897                         mh->mh_tail = m;
 2898 
 2899                         get_imm_packet(adap, r, m);
 2900                         mh->mh_head->m_pkthdr.len += m->m_len;
 2901                         eop = 1;
 2902                         rspq->imm_data++;
 2903                 } else if (r->len_cq) {
 2904                         int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
 2905                         
 2906                         eop = get_packet(adap, drop_thresh, qs, mh, r);
 2907                         if (eop) {
 2908                                 if (r->rss_hdr.hash_type && !adap->timestamp)
 2909                                         mh->mh_head->m_flags |= M_FLOWID;
 2910                                 mh->mh_head->m_pkthdr.flowid = rss_hash;
 2911                         }
 2912                         
 2913                         ethpad = 2;
 2914                 } else {
 2915                         rspq->pure_rsps++;
 2916                 }
 2917         skip:
 2918                 if (flags & RSPD_CTRL_MASK) {
 2919                         sleeping |= flags & RSPD_GTS_MASK;
 2920                         handle_rsp_cntrl_info(qs, flags);
 2921                 }
 2922 
 2923                 if (!eth && eop) {
 2924                         rspq->offload_pkts++;
 2925 #ifdef TCP_OFFLOAD
 2926                         adap->cpl_handler[opcode](qs, r, mh->mh_head);
 2927 #else
 2928                         m_freem(mh->mh_head);
 2929 #endif
 2930                         mh->mh_head = NULL;
 2931                 } else if (eth && eop) {
 2932                         struct mbuf *m = mh->mh_head;
 2933 
 2934                         t3_rx_eth(adap, m, ethpad);
 2935 
 2936                         /*
 2937                          * The T304 sends incoming packets on any qset.  If LRO
 2938                          * is also enabled, we could end up sending packet up
 2939                          * lro_ctrl->ifp's input.  That is incorrect.
 2940                          *
 2941                          * The mbuf's rcvif was derived from the cpl header and
 2942                          * is accurate.  Skip LRO and just use that.
 2943                          */
 2944 #if defined(INET6) || defined(INET)
 2945                         skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
 2946 
 2947                         if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
 2948                             && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
 2949                             ) {
 2950                                 /* successfully queue'd for LRO */
 2951                         } else
 2952 #endif
 2953                         {
 2954                                 /*
 2955                                  * LRO not enabled, packet unsuitable for LRO,
 2956                                  * or unable to queue.  Pass it up right now in
 2957                                  * either case.
 2958                                  */
 2959                                 struct ifnet *ifp = m->m_pkthdr.rcvif;
 2960                                 (*ifp->if_input)(ifp, m);
 2961                         }
 2962                         mh->mh_head = NULL;
 2963 
 2964                 }
 2965 
 2966                 r++;
 2967                 if (__predict_false(++rspq->cidx == rspq->size)) {
 2968                         rspq->cidx = 0;
 2969                         rspq->gen ^= 1;
 2970                         r = rspq->desc;
 2971                 }
 2972 
 2973                 if (++rspq->credits >= 64) {
 2974                         refill_rspq(adap, rspq, rspq->credits);
 2975                         rspq->credits = 0;
 2976                 }
 2977                 __refill_fl_lt(adap, &qs->fl[0], 32);
 2978                 __refill_fl_lt(adap, &qs->fl[1], 32);
 2979                 --budget_left;
 2980         }
 2981 
 2982 #if defined(INET6) || defined(INET)
 2983         /* Flush LRO */
 2984         while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
 2985                 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
 2986                 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
 2987                 tcp_lro_flush(lro_ctrl, queued);
 2988         }
 2989 #endif
 2990 
 2991         if (sleeping)
 2992                 check_ring_db(adap, qs, sleeping);
 2993 
 2994         mb();  /* commit Tx queue processed updates */
 2995         if (__predict_false(qs->txq_stopped > 1))
 2996                 restart_tx(qs);
 2997 
 2998         __refill_fl_lt(adap, &qs->fl[0], 512);
 2999         __refill_fl_lt(adap, &qs->fl[1], 512);
 3000         budget -= budget_left;
 3001         return (budget);
 3002 }
 3003 
 3004 /*
 3005  * A helper function that processes responses and issues GTS.
 3006  */
 3007 static __inline int
 3008 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
 3009 {
 3010         int work;
 3011         static int last_holdoff = 0;
 3012         
 3013         work = process_responses(adap, rspq_to_qset(rq), -1);
 3014 
 3015         if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
 3016                 printf("next_holdoff=%d\n", rq->next_holdoff);
 3017                 last_holdoff = rq->next_holdoff;
 3018         }
 3019         t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
 3020             V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
 3021         
 3022         return (work);
 3023 }
 3024 
 3025 
 3026 /*
 3027  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
 3028  * Handles data events from SGE response queues as well as error and other
 3029  * async events as they all use the same interrupt pin.  We use one SGE
 3030  * response queue per port in this mode and protect all response queues with
 3031  * queue 0's lock.
 3032  */
 3033 void
 3034 t3b_intr(void *data)
 3035 {
 3036         uint32_t i, map;
 3037         adapter_t *adap = data;
 3038         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3039         
 3040         t3_write_reg(adap, A_PL_CLI, 0);
 3041         map = t3_read_reg(adap, A_SG_DATA_INTR);
 3042 
 3043         if (!map) 
 3044                 return;
 3045 
 3046         if (__predict_false(map & F_ERRINTR)) {
 3047                 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
 3048                 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
 3049                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3050         }
 3051 
 3052         mtx_lock(&q0->lock);
 3053         for_each_port(adap, i)
 3054             if (map & (1 << i))
 3055                         process_responses_gts(adap, &adap->sge.qs[i].rspq);
 3056         mtx_unlock(&q0->lock);
 3057 }
 3058 
 3059 /*
 3060  * The MSI interrupt handler.  This needs to handle data events from SGE
 3061  * response queues as well as error and other async events as they all use
 3062  * the same MSI vector.  We use one SGE response queue per port in this mode
 3063  * and protect all response queues with queue 0's lock.
 3064  */
 3065 void
 3066 t3_intr_msi(void *data)
 3067 {
 3068         adapter_t *adap = data;
 3069         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3070         int i, new_packets = 0;
 3071 
 3072         mtx_lock(&q0->lock);
 3073 
 3074         for_each_port(adap, i)
 3075             if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 
 3076                     new_packets = 1;
 3077         mtx_unlock(&q0->lock);
 3078         if (new_packets == 0) {
 3079                 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
 3080                 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
 3081                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3082         }
 3083 }
 3084 
 3085 void
 3086 t3_intr_msix(void *data)
 3087 {
 3088         struct sge_qset *qs = data;
 3089         adapter_t *adap = qs->port->adapter;
 3090         struct sge_rspq *rspq = &qs->rspq;
 3091 
 3092         if (process_responses_gts(adap, rspq) == 0)
 3093                 rspq->unhandled_irqs++;
 3094 }
 3095 
 3096 #define QDUMP_SBUF_SIZE         32 * 400
 3097 static int
 3098 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
 3099 {
 3100         struct sge_rspq *rspq;
 3101         struct sge_qset *qs;
 3102         int i, err, dump_end, idx;
 3103         struct sbuf *sb;
 3104         struct rsp_desc *rspd;
 3105         uint32_t data[4];
 3106         
 3107         rspq = arg1;
 3108         qs = rspq_to_qset(rspq);
 3109         if (rspq->rspq_dump_count == 0) 
 3110                 return (0);
 3111         if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
 3112                 log(LOG_WARNING,
 3113                     "dump count is too large %d\n", rspq->rspq_dump_count);
 3114                 rspq->rspq_dump_count = 0;
 3115                 return (EINVAL);
 3116         }
 3117         if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
 3118                 log(LOG_WARNING,
 3119                     "dump start of %d is greater than queue size\n",
 3120                     rspq->rspq_dump_start);
 3121                 rspq->rspq_dump_start = 0;
 3122                 return (EINVAL);
 3123         }
 3124         err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
 3125         if (err)
 3126                 return (err);
 3127         err = sysctl_wire_old_buffer(req, 0);
 3128         if (err)
 3129                 return (err);
 3130         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3131 
 3132         sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
 3133             (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
 3134             ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
 3135         sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
 3136             ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
 3137         
 3138         sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
 3139             (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
 3140         
 3141         dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
 3142         for (i = rspq->rspq_dump_start; i < dump_end; i++) {
 3143                 idx = i & (RSPQ_Q_SIZE-1);
 3144                 
 3145                 rspd = &rspq->desc[idx];
 3146                 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
 3147                     idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
 3148                     rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
 3149                 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
 3150                     rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
 3151                     be32toh(rspd->len_cq), rspd->intr_gen);
 3152         }
 3153 
 3154         err = sbuf_finish(sb);
 3155         /* Output a trailing NUL. */
 3156         if (err == 0)
 3157                 err = SYSCTL_OUT(req, "", 1);
 3158         sbuf_delete(sb);
 3159         return (err);
 3160 }       
 3161 
 3162 static int
 3163 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
 3164 {
 3165         struct sge_txq *txq;
 3166         struct sge_qset *qs;
 3167         int i, j, err, dump_end;
 3168         struct sbuf *sb;
 3169         struct tx_desc *txd;
 3170         uint32_t *WR, wr_hi, wr_lo, gen;
 3171         uint32_t data[4];
 3172         
 3173         txq = arg1;
 3174         qs = txq_to_qset(txq, TXQ_ETH);
 3175         if (txq->txq_dump_count == 0) {
 3176                 return (0);
 3177         }
 3178         if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
 3179                 log(LOG_WARNING,
 3180                     "dump count is too large %d\n", txq->txq_dump_count);
 3181                 txq->txq_dump_count = 1;
 3182                 return (EINVAL);
 3183         }
 3184         if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
 3185                 log(LOG_WARNING,
 3186                     "dump start of %d is greater than queue size\n",
 3187                     txq->txq_dump_start);
 3188                 txq->txq_dump_start = 0;
 3189                 return (EINVAL);
 3190         }
 3191         err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
 3192         if (err)
 3193                 return (err);
 3194         err = sysctl_wire_old_buffer(req, 0);
 3195         if (err)
 3196                 return (err);
 3197         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3198 
 3199         sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
 3200             (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 
 3201             (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
 3202         sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
 3203             ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
 3204             ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
 3205         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3206             txq->txq_dump_start,
 3207             (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
 3208 
 3209         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3210         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3211                 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
 3212                 WR = (uint32_t *)txd->flit;
 3213                 wr_hi = ntohl(WR[0]);
 3214                 wr_lo = ntohl(WR[1]);           
 3215                 gen = G_WR_GEN(wr_lo);
 3216                 
 3217                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3218                     wr_hi, wr_lo, gen);
 3219                 for (j = 2; j < 30; j += 4) 
 3220                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3221                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3222 
 3223         }
 3224         err = sbuf_finish(sb);
 3225         /* Output a trailing NUL. */
 3226         if (err == 0)
 3227                 err = SYSCTL_OUT(req, "", 1);
 3228         sbuf_delete(sb);
 3229         return (err);
 3230 }
 3231 
 3232 static int
 3233 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
 3234 {
 3235         struct sge_txq *txq;
 3236         struct sge_qset *qs;
 3237         int i, j, err, dump_end;
 3238         struct sbuf *sb;
 3239         struct tx_desc *txd;
 3240         uint32_t *WR, wr_hi, wr_lo, gen;
 3241         
 3242         txq = arg1;
 3243         qs = txq_to_qset(txq, TXQ_CTRL);
 3244         if (txq->txq_dump_count == 0) {
 3245                 return (0);
 3246         }
 3247         if (txq->txq_dump_count > 256) {
 3248                 log(LOG_WARNING,
 3249                     "dump count is too large %d\n", txq->txq_dump_count);
 3250                 txq->txq_dump_count = 1;
 3251                 return (EINVAL);
 3252         }
 3253         if (txq->txq_dump_start > 255) {
 3254                 log(LOG_WARNING,
 3255                     "dump start of %d is greater than queue size\n",
 3256                     txq->txq_dump_start);
 3257                 txq->txq_dump_start = 0;
 3258                 return (EINVAL);
 3259         }
 3260 
 3261         err = sysctl_wire_old_buffer(req, 0);
 3262         if (err != 0)
 3263                 return (err);
 3264         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3265         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3266             txq->txq_dump_start,
 3267             (txq->txq_dump_start + txq->txq_dump_count) & 255);
 3268 
 3269         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3270         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3271                 txd = &txq->desc[i & (255)];
 3272                 WR = (uint32_t *)txd->flit;
 3273                 wr_hi = ntohl(WR[0]);
 3274                 wr_lo = ntohl(WR[1]);           
 3275                 gen = G_WR_GEN(wr_lo);
 3276                 
 3277                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3278                     wr_hi, wr_lo, gen);
 3279                 for (j = 2; j < 30; j += 4) 
 3280                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3281                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3282 
 3283         }
 3284         err = sbuf_finish(sb);
 3285         /* Output a trailing NUL. */
 3286         if (err == 0)
 3287                 err = SYSCTL_OUT(req, "", 1);
 3288         sbuf_delete(sb);
 3289         return (err);
 3290 }
 3291 
 3292 static int
 3293 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
 3294 {
 3295         adapter_t *sc = arg1;
 3296         struct qset_params *qsp = &sc->params.sge.qset[0]; 
 3297         int coalesce_usecs;     
 3298         struct sge_qset *qs;
 3299         int i, j, err, nqsets = 0;
 3300         struct mtx *lock;
 3301 
 3302         if ((sc->flags & FULL_INIT_DONE) == 0)
 3303                 return (ENXIO);
 3304                 
 3305         coalesce_usecs = qsp->coalesce_usecs;
 3306         err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
 3307 
 3308         if (err != 0) {
 3309                 return (err);
 3310         }
 3311         if (coalesce_usecs == qsp->coalesce_usecs)
 3312                 return (0);
 3313 
 3314         for (i = 0; i < sc->params.nports; i++) 
 3315                 for (j = 0; j < sc->port[i].nqsets; j++)
 3316                         nqsets++;
 3317 
 3318         coalesce_usecs = max(1, coalesce_usecs);
 3319 
 3320         for (i = 0; i < nqsets; i++) {
 3321                 qs = &sc->sge.qs[i];
 3322                 qsp = &sc->params.sge.qset[i];
 3323                 qsp->coalesce_usecs = coalesce_usecs;
 3324                 
 3325                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 3326                             &sc->sge.qs[0].rspq.lock;
 3327 
 3328                 mtx_lock(lock);
 3329                 t3_update_qset_coalesce(qs, qsp);
 3330                 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
 3331                     V_NEWTIMER(qs->rspq.holdoff_tmr));
 3332                 mtx_unlock(lock);
 3333         }
 3334 
 3335         return (0);
 3336 }
 3337 
 3338 static int
 3339 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)
 3340 {
 3341         adapter_t *sc = arg1;
 3342         int rc, timestamp;
 3343 
 3344         if ((sc->flags & FULL_INIT_DONE) == 0)
 3345                 return (ENXIO);
 3346 
 3347         timestamp = sc->timestamp;
 3348         rc = sysctl_handle_int(oidp, &timestamp, arg2, req);
 3349 
 3350         if (rc != 0)
 3351                 return (rc);
 3352 
 3353         if (timestamp != sc->timestamp) {
 3354                 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS,
 3355                     timestamp ? F_ENABLERXPKTTMSTPRSS : 0);
 3356                 sc->timestamp = timestamp;
 3357         }
 3358 
 3359         return (0);
 3360 }
 3361 
 3362 void
 3363 t3_add_attach_sysctls(adapter_t *sc)
 3364 {
 3365         struct sysctl_ctx_list *ctx;
 3366         struct sysctl_oid_list *children;
 3367 
 3368         ctx = device_get_sysctl_ctx(sc->dev);
 3369         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3370 
 3371         /* random information */
 3372         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3373             "firmware_version",
 3374             CTLFLAG_RD, sc->fw_version,
 3375             0, "firmware version");
 3376         SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
 3377             "hw_revision",
 3378             CTLFLAG_RD, &sc->params.rev,
 3379             0, "chip model");
 3380         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3381             "port_types",
 3382             CTLFLAG_RD, sc->port_types,
 3383             0, "type of ports");
 3384         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3385             "enable_debug",
 3386             CTLFLAG_RW, &cxgb_debug,
 3387             0, "enable verbose debugging output");
 3388         SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce",
 3389             CTLFLAG_RD, &sc->tunq_coalesce,
 3390             "#tunneled packets freed");
 3391         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3392             "txq_overrun",
 3393             CTLFLAG_RD, &txq_fills,
 3394             0, "#times txq overrun");
 3395         SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
 3396             "core_clock",
 3397             CTLFLAG_RD, &sc->params.vpd.cclk,
 3398             0, "core clock frequency (in KHz)");
 3399 }
 3400 
 3401 
 3402 static const char *rspq_name = "rspq";
 3403 static const char *txq_names[] =
 3404 {
 3405         "txq_eth",
 3406         "txq_ofld",
 3407         "txq_ctrl"      
 3408 };
 3409 
 3410 static int
 3411 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
 3412 {
 3413         struct port_info *p = arg1;
 3414         uint64_t *parg;
 3415 
 3416         if (!p)
 3417                 return (EINVAL);
 3418 
 3419         parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
 3420         PORT_LOCK(p);
 3421         t3_mac_update_stats(&p->mac);
 3422         PORT_UNLOCK(p);
 3423 
 3424         return (sysctl_handle_64(oidp, parg, 0, req));
 3425 }
 3426 
 3427 void
 3428 t3_add_configured_sysctls(adapter_t *sc)
 3429 {
 3430         struct sysctl_ctx_list *ctx;
 3431         struct sysctl_oid_list *children;
 3432         int i, j;
 3433         
 3434         ctx = device_get_sysctl_ctx(sc->dev);
 3435         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3436 
 3437         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3438             "intr_coal",
 3439             CTLTYPE_INT|CTLFLAG_RW, sc,
 3440             0, t3_set_coalesce_usecs,
 3441             "I", "interrupt coalescing timer (us)");
 3442 
 3443         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3444             "pkt_timestamp",
 3445             CTLTYPE_INT | CTLFLAG_RW, sc,
 3446             0, t3_pkt_timestamp,
 3447             "I", "provide packet timestamp instead of connection hash");
 3448 
 3449         for (i = 0; i < sc->params.nports; i++) {
 3450                 struct port_info *pi = &sc->port[i];
 3451                 struct sysctl_oid *poid;
 3452                 struct sysctl_oid_list *poidlist;
 3453                 struct mac_stats *mstats = &pi->mac.stats;
 3454                 
 3455                 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
 3456                 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 
 3457                     pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
 3458                 poidlist = SYSCTL_CHILDREN(poid);
 3459                 SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO,
 3460                     "nqsets", CTLFLAG_RD, &pi->nqsets,
 3461                     0, "#queue sets");
 3462 
 3463                 for (j = 0; j < pi->nqsets; j++) {
 3464                         struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
 3465                         struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
 3466                                           *ctrlqpoid, *lropoid;
 3467                         struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
 3468                                                *txqpoidlist, *ctrlqpoidlist,
 3469                                                *lropoidlist;
 3470                         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 3471                         
 3472                         snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
 3473                         
 3474                         qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 
 3475                             qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
 3476                         qspoidlist = SYSCTL_CHILDREN(qspoid);
 3477 
 3478                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
 3479                                         CTLFLAG_RD, &qs->fl[0].empty, 0,
 3480                                         "freelist #0 empty");
 3481                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
 3482                                         CTLFLAG_RD, &qs->fl[1].empty, 0,
 3483                                         "freelist #1 empty");
 3484 
 3485                         rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3486                             rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
 3487                         rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
 3488 
 3489                         txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3490                             txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
 3491                         txqpoidlist = SYSCTL_CHILDREN(txqpoid);
 3492 
 3493                         ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3494                             txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
 3495                         ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
 3496 
 3497                         lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3498                             "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
 3499                         lropoidlist = SYSCTL_CHILDREN(lropoid);
 3500 
 3501                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
 3502                             CTLFLAG_RD, &qs->rspq.size,
 3503                             0, "#entries in response queue");
 3504                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
 3505                             CTLFLAG_RD, &qs->rspq.cidx,
 3506                             0, "consumer index");
 3507                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
 3508                             CTLFLAG_RD, &qs->rspq.credits,
 3509                             0, "#credits");
 3510                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
 3511                             CTLFLAG_RD, &qs->rspq.starved,
 3512                             0, "#times starved");
 3513                         SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
 3514                             CTLFLAG_RD, &qs->rspq.phys_addr,
 3515                             "physical_address_of the queue");
 3516                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
 3517                             CTLFLAG_RW, &qs->rspq.rspq_dump_start,
 3518                             0, "start rspq dump entry");
 3519                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
 3520                             CTLFLAG_RW, &qs->rspq.rspq_dump_count,
 3521                             0, "#rspq entries to dump");
 3522                         SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
 3523                             CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
 3524                             0, t3_dump_rspq, "A", "dump of the response queue");
 3525 
 3526                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
 3527                             CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
 3528                             "#tunneled packets dropped");
 3529                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
 3530                             CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
 3531                             0, "#tunneled packets waiting to be sent");
 3532 #if 0                   
 3533                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
 3534                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
 3535                             0, "#tunneled packets queue producer index");
 3536                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
 3537                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
 3538                             0, "#tunneled packets queue consumer index");
 3539 #endif                  
 3540                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed",
 3541                             CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
 3542                             0, "#tunneled packets processed by the card");
 3543                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
 3544                             CTLFLAG_RD, &txq->cleaned,
 3545                             0, "#tunneled packets cleaned");
 3546                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
 3547                             CTLFLAG_RD, &txq->in_use,
 3548                             0, "#tunneled packet slots in use");
 3549                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees",
 3550                             CTLFLAG_RD, &txq->txq_frees,
 3551                             "#tunneled packets freed");
 3552                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
 3553                             CTLFLAG_RD, &txq->txq_skipped,
 3554                             0, "#tunneled packet descriptors skipped");
 3555                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
 3556                             CTLFLAG_RD, &txq->txq_coalesced,
 3557                             "#tunneled packets coalesced");
 3558                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
 3559                             CTLFLAG_RD, &txq->txq_enqueued,
 3560                             0, "#tunneled packets enqueued to hardware");
 3561                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
 3562                             CTLFLAG_RD, &qs->txq_stopped,
 3563                             0, "tx queues stopped");
 3564                         SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr",
 3565                             CTLFLAG_RD, &txq->phys_addr,
 3566                             "physical_address_of the queue");
 3567                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
 3568                             CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
 3569                             0, "txq generation");
 3570                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
 3571                             CTLFLAG_RD, &txq->cidx,
 3572                             0, "hardware queue cidx");                  
 3573                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
 3574                             CTLFLAG_RD, &txq->pidx,
 3575                             0, "hardware queue pidx");
 3576                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
 3577                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
 3578                             0, "txq start idx for dump");
 3579                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
 3580                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
 3581                             0, "txq #entries to dump");                 
 3582                         SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
 3583                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
 3584                             0, t3_dump_txq_eth, "A", "dump of the transmit queue");
 3585 
 3586                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
 3587                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
 3588                             0, "ctrlq start idx for dump");
 3589                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
 3590                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
 3591                             0, "ctrl #entries to dump");                        
 3592                         SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
 3593                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
 3594                             0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
 3595 
 3596                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
 3597                             CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
 3598                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
 3599                             CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
 3600                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
 3601                             CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
 3602                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
 3603                             CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
 3604                 }
 3605 
 3606                 /* Now add a node for mac stats. */
 3607                 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
 3608                     CTLFLAG_RD, NULL, "MAC statistics");
 3609                 poidlist = SYSCTL_CHILDREN(poid);
 3610 
 3611                 /*
 3612                  * We (ab)use the length argument (arg2) to pass on the offset
 3613                  * of the data that we are interested in.  This is only required
 3614                  * for the quad counters that are updated from the hardware (we
 3615                  * make sure that we return the latest value).
 3616                  * sysctl_handle_macstat first updates *all* the counters from
 3617                  * the hardware, and then returns the latest value of the
 3618                  * requested counter.  Best would be to update only the
 3619                  * requested counter from hardware, but t3_mac_update_stats()
 3620                  * hides all the register details and we don't want to dive into
 3621                  * all that here.
 3622                  */
 3623 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
 3624     (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
 3625     sysctl_handle_macstat, "QU", 0)
 3626                 CXGB_SYSCTL_ADD_QUAD(tx_octets);
 3627                 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
 3628                 CXGB_SYSCTL_ADD_QUAD(tx_frames);
 3629                 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
 3630                 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
 3631                 CXGB_SYSCTL_ADD_QUAD(tx_pause);
 3632                 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
 3633                 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
 3634                 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
 3635                 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
 3636                 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
 3637                 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
 3638                 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
 3639                 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
 3640                 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
 3641                 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
 3642                 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
 3643                 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
 3644                 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
 3645                 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
 3646                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
 3647                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
 3648                 CXGB_SYSCTL_ADD_QUAD(rx_octets);
 3649                 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
 3650                 CXGB_SYSCTL_ADD_QUAD(rx_frames);
 3651                 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
 3652                 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
 3653                 CXGB_SYSCTL_ADD_QUAD(rx_pause);
 3654                 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
 3655                 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
 3656                 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
 3657                 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
 3658                 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
 3659                 CXGB_SYSCTL_ADD_QUAD(rx_runt);
 3660                 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
 3661                 CXGB_SYSCTL_ADD_QUAD(rx_short);
 3662                 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
 3663                 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
 3664                 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
 3665                 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
 3666                 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
 3667                 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
 3668                 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
 3669                 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
 3670                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
 3671                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
 3672 #undef CXGB_SYSCTL_ADD_QUAD
 3673 
 3674 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
 3675     CTLFLAG_RD, &mstats->a, 0)
 3676                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
 3677                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
 3678                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
 3679                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
 3680                 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
 3681                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
 3682                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
 3683                 CXGB_SYSCTL_ADD_ULONG(num_toggled);
 3684                 CXGB_SYSCTL_ADD_ULONG(num_resets);
 3685                 CXGB_SYSCTL_ADD_ULONG(link_faults);
 3686 #undef CXGB_SYSCTL_ADD_ULONG
 3687         }
 3688 }
 3689         
 3690 /**
 3691  *      t3_get_desc - dump an SGE descriptor for debugging purposes
 3692  *      @qs: the queue set
 3693  *      @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
 3694  *      @idx: the descriptor index in the queue
 3695  *      @data: where to dump the descriptor contents
 3696  *
 3697  *      Dumps the contents of a HW descriptor of an SGE queue.  Returns the
 3698  *      size of the descriptor.
 3699  */
 3700 int
 3701 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
 3702                 unsigned char *data)
 3703 {
 3704         if (qnum >= 6)
 3705                 return (EINVAL);
 3706 
 3707         if (qnum < 3) {
 3708                 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
 3709                         return -EINVAL;
 3710                 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
 3711                 return sizeof(struct tx_desc);
 3712         }
 3713 
 3714         if (qnum == 3) {
 3715                 if (!qs->rspq.desc || idx >= qs->rspq.size)
 3716                         return (EINVAL);
 3717                 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
 3718                 return sizeof(struct rsp_desc);
 3719         }
 3720 
 3721         qnum -= 4;
 3722         if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
 3723                 return (EINVAL);
 3724         memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
 3725         return sizeof(struct rx_desc);
 3726 }

Cache object: 2ac0452e59b5dccd43f523ba32087dba


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.