The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/cxgb/cxgb_sge.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /**************************************************************************
    2 
    3 Copyright (c) 2007-2009, Chelsio Inc.
    4 All rights reserved.
    5 
    6 Redistribution and use in source and binary forms, with or without
    7 modification, are permitted provided that the following conditions are met:
    8 
    9  1. Redistributions of source code must retain the above copyright notice,
   10     this list of conditions and the following disclaimer.
   11 
   12  2. Neither the name of the Chelsio Corporation nor the names of its
   13     contributors may be used to endorse or promote products derived from
   14     this software without specific prior written permission.
   15  
   16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   26 POSSIBILITY OF SUCH DAMAGE.
   27 
   28 ***************************************************************************/
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD: releng/10.0/sys/dev/cxgb/cxgb_sge.c 254804 2013-08-24 19:51:18Z andre $");
   32 
   33 #include "opt_inet6.h"
   34 #include "opt_inet.h"
   35 
   36 #include <sys/param.h>
   37 #include <sys/systm.h>
   38 #include <sys/kernel.h>
   39 #include <sys/module.h>
   40 #include <sys/bus.h>
   41 #include <sys/conf.h>
   42 #include <machine/bus.h>
   43 #include <machine/resource.h>
   44 #include <sys/bus_dma.h>
   45 #include <sys/rman.h>
   46 #include <sys/queue.h>
   47 #include <sys/sysctl.h>
   48 #include <sys/taskqueue.h>
   49 
   50 #include <sys/proc.h>
   51 #include <sys/sbuf.h>
   52 #include <sys/sched.h>
   53 #include <sys/smp.h>
   54 #include <sys/systm.h>
   55 #include <sys/syslog.h>
   56 #include <sys/socket.h>
   57 #include <sys/sglist.h>
   58 
   59 #include <net/bpf.h>    
   60 #include <net/ethernet.h>
   61 #include <net/if.h>
   62 #include <net/if_vlan_var.h>
   63 
   64 #include <netinet/in_systm.h>
   65 #include <netinet/in.h>
   66 #include <netinet/ip.h>
   67 #include <netinet/ip6.h>
   68 #include <netinet/tcp.h>
   69 
   70 #include <dev/pci/pcireg.h>
   71 #include <dev/pci/pcivar.h>
   72 
   73 #include <vm/vm.h>
   74 #include <vm/pmap.h>
   75 
   76 #include <cxgb_include.h>
   77 #include <sys/mvec.h>
   78 
   79 int     txq_fills = 0;
   80 int     multiq_tx_enable = 1;
   81 
   82 #ifdef TCP_OFFLOAD
   83 CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS);
   84 #endif
   85 
   86 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
   87 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
   88 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
   89 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
   90     "size of per-queue mbuf ring");
   91 
   92 static int cxgb_tx_coalesce_force = 0;
   93 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force);
   94 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW,
   95     &cxgb_tx_coalesce_force, 0,
   96     "coalesce small packets into a single work request regardless of ring state");
   97 
   98 #define COALESCE_START_DEFAULT          TX_ETH_Q_SIZE>>1
   99 #define COALESCE_START_MAX              (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
  100 #define COALESCE_STOP_DEFAULT           TX_ETH_Q_SIZE>>2
  101 #define COALESCE_STOP_MIN               TX_ETH_Q_SIZE>>5
  102 #define TX_RECLAIM_DEFAULT              TX_ETH_Q_SIZE>>5
  103 #define TX_RECLAIM_MAX                  TX_ETH_Q_SIZE>>2
  104 #define TX_RECLAIM_MIN                  TX_ETH_Q_SIZE>>6
  105 
  106 
  107 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
  108 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start",
  109     &cxgb_tx_coalesce_enable_start);
  110 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW,
  111     &cxgb_tx_coalesce_enable_start, 0,
  112     "coalesce enable threshold");
  113 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
  114 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop);
  115 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW,
  116     &cxgb_tx_coalesce_enable_stop, 0,
  117     "coalesce disable threshold");
  118 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  119 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold);
  120 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW,
  121     &cxgb_tx_reclaim_threshold, 0,
  122     "tx cleaning minimum threshold");
  123 
  124 /*
  125  * XXX don't re-enable this until TOE stops assuming
  126  * we have an m_ext
  127  */
  128 static int recycle_enable = 0;
  129 
  130 extern int cxgb_use_16k_clusters;
  131 extern int nmbjumbop;
  132 extern int nmbjumbo9;
  133 extern int nmbjumbo16;
  134 
  135 #define USE_GTS 0
  136 
  137 #define SGE_RX_SM_BUF_SIZE      1536
  138 #define SGE_RX_DROP_THRES       16
  139 #define SGE_RX_COPY_THRES       128
  140 
  141 /*
  142  * Period of the Tx buffer reclaim timer.  This timer does not need to run
  143  * frequently as Tx buffers are usually reclaimed by new Tx packets.
  144  */
  145 #define TX_RECLAIM_PERIOD       (hz >> 1)
  146 
  147 /* 
  148  * Values for sge_txq.flags
  149  */
  150 enum {
  151         TXQ_RUNNING     = 1 << 0,  /* fetch engine is running */
  152         TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
  153 };
  154 
  155 struct tx_desc {
  156         uint64_t        flit[TX_DESC_FLITS];
  157 } __packed;
  158 
  159 struct rx_desc {
  160         uint32_t        addr_lo;
  161         uint32_t        len_gen;
  162         uint32_t        gen2;
  163         uint32_t        addr_hi;
  164 } __packed;
  165 
  166 struct rsp_desc {               /* response queue descriptor */
  167         struct rss_header       rss_hdr;
  168         uint32_t                flags;
  169         uint32_t                len_cq;
  170         uint8_t                 imm_data[47];
  171         uint8_t                 intr_gen;
  172 } __packed;
  173 
  174 #define RX_SW_DESC_MAP_CREATED  (1 << 0)
  175 #define TX_SW_DESC_MAP_CREATED  (1 << 1)
  176 #define RX_SW_DESC_INUSE        (1 << 3)
  177 #define TX_SW_DESC_MAPPED       (1 << 4)
  178 
  179 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
  180 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
  181 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
  182 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
  183 
  184 struct tx_sw_desc {                /* SW state per Tx descriptor */
  185         struct mbuf     *m;
  186         bus_dmamap_t    map;
  187         int             flags;
  188 };
  189 
  190 struct rx_sw_desc {                /* SW state per Rx descriptor */
  191         caddr_t         rxsd_cl;
  192         struct mbuf     *m;
  193         bus_dmamap_t    map;
  194         int             flags;
  195 };
  196 
  197 struct txq_state {
  198         unsigned int    compl;
  199         unsigned int    gen;
  200         unsigned int    pidx;
  201 };
  202 
  203 struct refill_fl_cb_arg {
  204         int               error;
  205         bus_dma_segment_t seg;
  206         int               nseg;
  207 };
  208 
  209 
  210 /*
  211  * Maps a number of flits to the number of Tx descriptors that can hold them.
  212  * The formula is
  213  *
  214  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
  215  *
  216  * HW allows up to 4 descriptors to be combined into a WR.
  217  */
  218 static uint8_t flit_desc_map[] = {
  219         0,
  220 #if SGE_NUM_GENBITS == 1
  221         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  222         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  223         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  224         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
  225 #elif SGE_NUM_GENBITS == 2
  226         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  227         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  228         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  229         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  230 #else
  231 # error "SGE_NUM_GENBITS must be 1 or 2"
  232 #endif
  233 };
  234 
  235 #define TXQ_LOCK_ASSERT(qs)     mtx_assert(&(qs)->lock, MA_OWNED)
  236 #define TXQ_TRYLOCK(qs)         mtx_trylock(&(qs)->lock)        
  237 #define TXQ_LOCK(qs)            mtx_lock(&(qs)->lock)   
  238 #define TXQ_UNLOCK(qs)          mtx_unlock(&(qs)->lock) 
  239 #define TXQ_RING_EMPTY(qs)      drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  240 #define TXQ_RING_NEEDS_ENQUEUE(qs)                                      \
  241         drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  242 #define TXQ_RING_FLUSH(qs)      drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  243 #define TXQ_RING_DEQUEUE_COND(qs, func, arg)                            \
  244         drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
  245 #define TXQ_RING_DEQUEUE(qs) \
  246         drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  247 
  248 int cxgb_debug = 0;
  249 
  250 static void sge_timer_cb(void *arg);
  251 static void sge_timer_reclaim(void *arg, int ncount);
  252 static void sge_txq_reclaim_handler(void *arg, int ncount);
  253 static void cxgb_start_locked(struct sge_qset *qs);
  254 
  255 /*
  256  * XXX need to cope with bursty scheduling by looking at a wider
  257  * window than we are now for determining the need for coalescing
  258  *
  259  */
  260 static __inline uint64_t
  261 check_pkt_coalesce(struct sge_qset *qs) 
  262 { 
  263         struct adapter *sc; 
  264         struct sge_txq *txq; 
  265         uint8_t *fill;
  266 
  267         if (__predict_false(cxgb_tx_coalesce_force))
  268                 return (1);
  269         txq = &qs->txq[TXQ_ETH]; 
  270         sc = qs->port->adapter; 
  271         fill = &sc->tunq_fill[qs->idx];
  272 
  273         if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
  274                 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
  275         if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
  276                 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
  277         /*
  278          * if the hardware transmit queue is more than 1/8 full
  279          * we mark it as coalescing - we drop back from coalescing
  280          * when we go below 1/32 full and there are no packets enqueued, 
  281          * this provides us with some degree of hysteresis
  282          */
  283         if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
  284             TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
  285                 *fill = 0; 
  286         else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
  287                 *fill = 1; 
  288 
  289         return (sc->tunq_coalesce);
  290 } 
  291 
  292 #ifdef __LP64__
  293 static void
  294 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  295 {
  296         uint64_t wr_hilo;
  297 #if _BYTE_ORDER == _LITTLE_ENDIAN
  298         wr_hilo = wr_hi;
  299         wr_hilo |= (((uint64_t)wr_lo)<<32);
  300 #else
  301         wr_hilo = wr_lo;
  302         wr_hilo |= (((uint64_t)wr_hi)<<32);
  303 #endif  
  304         wrp->wrh_hilo = wr_hilo;
  305 }
  306 #else
  307 static void
  308 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  309 {
  310 
  311         wrp->wrh_hi = wr_hi;
  312         wmb();
  313         wrp->wrh_lo = wr_lo;
  314 }
  315 #endif
  316 
  317 struct coalesce_info {
  318         int count;
  319         int nbytes;
  320 };
  321 
  322 static int
  323 coalesce_check(struct mbuf *m, void *arg)
  324 {
  325         struct coalesce_info *ci = arg;
  326         int *count = &ci->count;
  327         int *nbytes = &ci->nbytes;
  328 
  329         if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
  330                 (*count < 7) && (m->m_next == NULL))) {
  331                 *count += 1;
  332                 *nbytes += m->m_len;
  333                 return (1);
  334         }
  335         return (0);
  336 }
  337 
  338 static struct mbuf *
  339 cxgb_dequeue(struct sge_qset *qs)
  340 {
  341         struct mbuf *m, *m_head, *m_tail;
  342         struct coalesce_info ci;
  343 
  344         
  345         if (check_pkt_coalesce(qs) == 0) 
  346                 return TXQ_RING_DEQUEUE(qs);
  347 
  348         m_head = m_tail = NULL;
  349         ci.count = ci.nbytes = 0;
  350         do {
  351                 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
  352                 if (m_head == NULL) {
  353                         m_tail = m_head = m;
  354                 } else if (m != NULL) {
  355                         m_tail->m_nextpkt = m;
  356                         m_tail = m;
  357                 }
  358         } while (m != NULL);
  359         if (ci.count > 7)
  360                 panic("trying to coalesce %d packets in to one WR", ci.count);
  361         return (m_head);
  362 }
  363         
  364 /**
  365  *      reclaim_completed_tx - reclaims completed Tx descriptors
  366  *      @adapter: the adapter
  367  *      @q: the Tx queue to reclaim completed descriptors from
  368  *
  369  *      Reclaims Tx descriptors that the SGE has indicated it has processed,
  370  *      and frees the associated buffers if possible.  Called with the Tx
  371  *      queue's lock held.
  372  */
  373 static __inline int
  374 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
  375 {
  376         struct sge_txq *q = &qs->txq[queue];
  377         int reclaim = desc_reclaimable(q);
  378 
  379         if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
  380             (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
  381                 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  382 
  383         if (reclaim < reclaim_min)
  384                 return (0);
  385 
  386         mtx_assert(&qs->lock, MA_OWNED);
  387         if (reclaim > 0) {
  388                 t3_free_tx_desc(qs, reclaim, queue);
  389                 q->cleaned += reclaim;
  390                 q->in_use -= reclaim;
  391         }
  392         if (isset(&qs->txq_stopped, TXQ_ETH))
  393                 clrbit(&qs->txq_stopped, TXQ_ETH);
  394 
  395         return (reclaim);
  396 }
  397 
  398 /**
  399  *      should_restart_tx - are there enough resources to restart a Tx queue?
  400  *      @q: the Tx queue
  401  *
  402  *      Checks if there are enough descriptors to restart a suspended Tx queue.
  403  */
  404 static __inline int
  405 should_restart_tx(const struct sge_txq *q)
  406 {
  407         unsigned int r = q->processed - q->cleaned;
  408 
  409         return q->in_use - r < (q->size >> 1);
  410 }
  411 
  412 /**
  413  *      t3_sge_init - initialize SGE
  414  *      @adap: the adapter
  415  *      @p: the SGE parameters
  416  *
  417  *      Performs SGE initialization needed every time after a chip reset.
  418  *      We do not initialize any of the queue sets here, instead the driver
  419  *      top-level must request those individually.  We also do not enable DMA
  420  *      here, that should be done after the queues have been set up.
  421  */
  422 void
  423 t3_sge_init(adapter_t *adap, struct sge_params *p)
  424 {
  425         u_int ctrl, ups;
  426 
  427         ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
  428 
  429         ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
  430                F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
  431                V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
  432                V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
  433 #if SGE_NUM_GENBITS == 1
  434         ctrl |= F_EGRGENCTRL;
  435 #endif
  436         if (adap->params.rev > 0) {
  437                 if (!(adap->flags & (USING_MSIX | USING_MSI)))
  438                         ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
  439         }
  440         t3_write_reg(adap, A_SG_CONTROL, ctrl);
  441         t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
  442                      V_LORCQDRBTHRSH(512));
  443         t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
  444         t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
  445                      V_TIMEOUT(200 * core_ticks_per_usec(adap)));
  446         t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
  447                      adap->params.rev < T3_REV_C ? 1000 : 500);
  448         t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
  449         t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
  450         t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
  451         t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
  452         t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
  453 }
  454 
  455 
  456 /**
  457  *      sgl_len - calculates the size of an SGL of the given capacity
  458  *      @n: the number of SGL entries
  459  *
  460  *      Calculates the number of flits needed for a scatter/gather list that
  461  *      can hold the given number of entries.
  462  */
  463 static __inline unsigned int
  464 sgl_len(unsigned int n)
  465 {
  466         return ((3 * n) / 2 + (n & 1));
  467 }
  468 
  469 /**
  470  *      get_imm_packet - return the next ingress packet buffer from a response
  471  *      @resp: the response descriptor containing the packet data
  472  *
  473  *      Return a packet containing the immediate data of the given response.
  474  */
  475 static int
  476 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
  477 {
  478 
  479         if (resp->rss_hdr.opcode == CPL_RX_DATA) {
  480                 const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0];
  481                 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
  482         } else if (resp->rss_hdr.opcode == CPL_RX_PKT) {
  483                 const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0];
  484                 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
  485         } else
  486                 m->m_len = IMMED_PKT_SIZE;
  487         m->m_ext.ext_buf = NULL;
  488         m->m_ext.ext_type = 0;
  489         memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len); 
  490         return (0);     
  491 }
  492 
  493 static __inline u_int
  494 flits_to_desc(u_int n)
  495 {
  496         return (flit_desc_map[n]);
  497 }
  498 
  499 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
  500                     F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
  501                     V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
  502                     F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
  503                     F_HIRCQPARITYERROR)
  504 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
  505 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
  506                       F_RSPQDISABLED)
  507 
  508 /**
  509  *      t3_sge_err_intr_handler - SGE async event interrupt handler
  510  *      @adapter: the adapter
  511  *
  512  *      Interrupt handler for SGE asynchronous (non-data) events.
  513  */
  514 void
  515 t3_sge_err_intr_handler(adapter_t *adapter)
  516 {
  517         unsigned int v, status;
  518 
  519         status = t3_read_reg(adapter, A_SG_INT_CAUSE);
  520         if (status & SGE_PARERR)
  521                 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
  522                          status & SGE_PARERR);
  523         if (status & SGE_FRAMINGERR)
  524                 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
  525                          status & SGE_FRAMINGERR);
  526         if (status & F_RSPQCREDITOVERFOW)
  527                 CH_ALERT(adapter, "SGE response queue credit overflow\n");
  528 
  529         if (status & F_RSPQDISABLED) {
  530                 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
  531 
  532                 CH_ALERT(adapter,
  533                          "packet delivered to disabled response queue (0x%x)\n",
  534                          (v >> S_RSPQ0DISABLED) & 0xff);
  535         }
  536 
  537         t3_write_reg(adapter, A_SG_INT_CAUSE, status);
  538         if (status & SGE_FATALERR)
  539                 t3_fatal_err(adapter);
  540 }
  541 
  542 void
  543 t3_sge_prep(adapter_t *adap, struct sge_params *p)
  544 {
  545         int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
  546 
  547         nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
  548         nqsets *= adap->params.nports;
  549 
  550         fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
  551 
  552         while (!powerof2(fl_q_size))
  553                 fl_q_size--;
  554 
  555         use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
  556             is_offload(adap);
  557 
  558 #if __FreeBSD_version >= 700111
  559         if (use_16k) {
  560                 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
  561                 jumbo_buf_size = MJUM16BYTES;
  562         } else {
  563                 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
  564                 jumbo_buf_size = MJUM9BYTES;
  565         }
  566 #else
  567         jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE);
  568         jumbo_buf_size = MJUMPAGESIZE;
  569 #endif
  570         while (!powerof2(jumbo_q_size))
  571                 jumbo_q_size--;
  572 
  573         if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
  574                 device_printf(adap->dev,
  575                     "Insufficient clusters and/or jumbo buffers.\n");
  576 
  577         p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
  578 
  579         for (i = 0; i < SGE_QSETS; ++i) {
  580                 struct qset_params *q = p->qset + i;
  581 
  582                 if (adap->params.nports > 2) {
  583                         q->coalesce_usecs = 50;
  584                 } else {
  585 #ifdef INVARIANTS                       
  586                         q->coalesce_usecs = 10;
  587 #else
  588                         q->coalesce_usecs = 5;
  589 #endif                  
  590                 }
  591                 q->polling = 0;
  592                 q->rspq_size = RSPQ_Q_SIZE;
  593                 q->fl_size = fl_q_size;
  594                 q->jumbo_size = jumbo_q_size;
  595                 q->jumbo_buf_size = jumbo_buf_size;
  596                 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
  597                 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
  598                 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
  599                 q->cong_thres = 0;
  600         }
  601 }
  602 
  603 int
  604 t3_sge_alloc(adapter_t *sc)
  605 {
  606 
  607         /* The parent tag. */
  608         if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */
  609                                 1, 0,                   /* algnmnt, boundary */
  610                                 BUS_SPACE_MAXADDR,      /* lowaddr */
  611                                 BUS_SPACE_MAXADDR,      /* highaddr */
  612                                 NULL, NULL,             /* filter, filterarg */
  613                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
  614                                 BUS_SPACE_UNRESTRICTED, /* nsegments */
  615                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
  616                                 0,                      /* flags */
  617                                 NULL, NULL,             /* lock, lockarg */
  618                                 &sc->parent_dmat)) {
  619                 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
  620                 return (ENOMEM);
  621         }
  622 
  623         /*
  624          * DMA tag for normal sized RX frames
  625          */
  626         if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
  627                 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
  628                 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
  629                 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
  630                 return (ENOMEM);
  631         }
  632 
  633         /* 
  634          * DMA tag for jumbo sized RX frames.
  635          */
  636         if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
  637                 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
  638                 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
  639                 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
  640                 return (ENOMEM);
  641         }
  642 
  643         /* 
  644          * DMA tag for TX frames.
  645          */
  646         if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
  647                 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  648                 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  649                 NULL, NULL, &sc->tx_dmat)) {
  650                 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
  651                 return (ENOMEM);
  652         }
  653 
  654         return (0);
  655 }
  656 
  657 int
  658 t3_sge_free(struct adapter * sc)
  659 {
  660 
  661         if (sc->tx_dmat != NULL)
  662                 bus_dma_tag_destroy(sc->tx_dmat);
  663 
  664         if (sc->rx_jumbo_dmat != NULL)
  665                 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
  666 
  667         if (sc->rx_dmat != NULL)
  668                 bus_dma_tag_destroy(sc->rx_dmat);
  669 
  670         if (sc->parent_dmat != NULL)
  671                 bus_dma_tag_destroy(sc->parent_dmat);
  672 
  673         return (0);
  674 }
  675 
  676 void
  677 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
  678 {
  679 
  680         qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
  681         qs->rspq.polling = 0 /* p->polling */;
  682 }
  683 
  684 #if !defined(__i386__) && !defined(__amd64__)
  685 static void
  686 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
  687 {
  688         struct refill_fl_cb_arg *cb_arg = arg;
  689         
  690         cb_arg->error = error;
  691         cb_arg->seg = segs[0];
  692         cb_arg->nseg = nseg;
  693 
  694 }
  695 #endif
  696 /**
  697  *      refill_fl - refill an SGE free-buffer list
  698  *      @sc: the controller softc
  699  *      @q: the free-list to refill
  700  *      @n: the number of new buffers to allocate
  701  *
  702  *      (Re)populate an SGE free-buffer list with up to @n new packet buffers.
  703  *      The caller must assure that @n does not exceed the queue's capacity.
  704  */
  705 static void
  706 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
  707 {
  708         struct rx_sw_desc *sd = &q->sdesc[q->pidx];
  709         struct rx_desc *d = &q->desc[q->pidx];
  710         struct refill_fl_cb_arg cb_arg;
  711         struct mbuf *m;
  712         caddr_t cl;
  713         int err;
  714         
  715         cb_arg.error = 0;
  716         while (n--) {
  717                 /*
  718                  * We allocate an uninitialized mbuf + cluster, mbuf is
  719                  * initialized after rx.
  720                  */
  721                 if (q->zone == zone_pack) {
  722                         if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
  723                                 break;
  724                         cl = m->m_ext.ext_buf;                  
  725                 } else {
  726                         if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
  727                                 break;
  728                         if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
  729                                 uma_zfree(q->zone, cl);
  730                                 break;
  731                         }
  732                 }
  733                 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
  734                         if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
  735                                 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
  736                                 uma_zfree(q->zone, cl);
  737                                 goto done;
  738                         }
  739                         sd->flags |= RX_SW_DESC_MAP_CREATED;
  740                 }
  741 #if !defined(__i386__) && !defined(__amd64__)
  742                 err = bus_dmamap_load(q->entry_tag, sd->map,
  743                     cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
  744                 
  745                 if (err != 0 || cb_arg.error) {
  746                         if (q->zone == zone_pack)
  747                                 uma_zfree(q->zone, cl);
  748                         m_free(m);
  749                         goto done;
  750                 }
  751 #else
  752                 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
  753 #endif          
  754                 sd->flags |= RX_SW_DESC_INUSE;
  755                 sd->rxsd_cl = cl;
  756                 sd->m = m;
  757                 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
  758                 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
  759                 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
  760                 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
  761 
  762                 d++;
  763                 sd++;
  764 
  765                 if (++q->pidx == q->size) {
  766                         q->pidx = 0;
  767                         q->gen ^= 1;
  768                         sd = q->sdesc;
  769                         d = q->desc;
  770                 }
  771                 q->credits++;
  772                 q->db_pending++;
  773         }
  774 
  775 done:
  776         if (q->db_pending >= 32) {
  777                 q->db_pending = 0;
  778                 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  779         }
  780 }
  781 
  782 
  783 /**
  784  *      free_rx_bufs - free the Rx buffers on an SGE free list
  785  *      @sc: the controle softc
  786  *      @q: the SGE free list to clean up
  787  *
  788  *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
  789  *      this queue should be stopped before calling this function.
  790  */
  791 static void
  792 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
  793 {
  794         u_int cidx = q->cidx;
  795 
  796         while (q->credits--) {
  797                 struct rx_sw_desc *d = &q->sdesc[cidx];
  798 
  799                 if (d->flags & RX_SW_DESC_INUSE) {
  800                         bus_dmamap_unload(q->entry_tag, d->map);
  801                         bus_dmamap_destroy(q->entry_tag, d->map);
  802                         if (q->zone == zone_pack) {
  803                                 m_init(d->m, zone_pack, MCLBYTES,
  804                                     M_NOWAIT, MT_DATA, M_EXT);
  805                                 uma_zfree(zone_pack, d->m);
  806                         } else {
  807                                 m_init(d->m, zone_mbuf, MLEN,
  808                                     M_NOWAIT, MT_DATA, 0);
  809                                 uma_zfree(zone_mbuf, d->m);
  810                                 uma_zfree(q->zone, d->rxsd_cl);
  811                         }                       
  812                 }
  813                 
  814                 d->rxsd_cl = NULL;
  815                 d->m = NULL;
  816                 if (++cidx == q->size)
  817                         cidx = 0;
  818         }
  819 }
  820 
  821 static __inline void
  822 __refill_fl(adapter_t *adap, struct sge_fl *fl)
  823 {
  824         refill_fl(adap, fl, min(16U, fl->size - fl->credits));
  825 }
  826 
  827 static __inline void
  828 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
  829 {
  830         uint32_t reclaimable = fl->size - fl->credits;
  831 
  832         if (reclaimable > 0)
  833                 refill_fl(adap, fl, min(max, reclaimable));
  834 }
  835 
  836 /**
  837  *      recycle_rx_buf - recycle a receive buffer
  838  *      @adapter: the adapter
  839  *      @q: the SGE free list
  840  *      @idx: index of buffer to recycle
  841  *
  842  *      Recycles the specified buffer on the given free list by adding it at
  843  *      the next available slot on the list.
  844  */
  845 static void
  846 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
  847 {
  848         struct rx_desc *from = &q->desc[idx];
  849         struct rx_desc *to   = &q->desc[q->pidx];
  850 
  851         q->sdesc[q->pidx] = q->sdesc[idx];
  852         to->addr_lo = from->addr_lo;        // already big endian
  853         to->addr_hi = from->addr_hi;        // likewise
  854         wmb();  /* necessary ? */
  855         to->len_gen = htobe32(V_FLD_GEN1(q->gen));
  856         to->gen2 = htobe32(V_FLD_GEN2(q->gen));
  857         q->credits++;
  858 
  859         if (++q->pidx == q->size) {
  860                 q->pidx = 0;
  861                 q->gen ^= 1;
  862         }
  863         t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  864 }
  865 
  866 static void
  867 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
  868 {
  869         uint32_t *addr;
  870 
  871         addr = arg;
  872         *addr = segs[0].ds_addr;
  873 }
  874 
  875 static int
  876 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
  877     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
  878     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
  879 {
  880         size_t len = nelem * elem_size;
  881         void *s = NULL;
  882         void *p = NULL;
  883         int err;
  884 
  885         if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
  886                                       BUS_SPACE_MAXADDR_32BIT,
  887                                       BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
  888                                       len, 0, NULL, NULL, tag)) != 0) {
  889                 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
  890                 return (ENOMEM);
  891         }
  892 
  893         if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
  894                                     map)) != 0) {
  895                 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
  896                 return (ENOMEM);
  897         }
  898 
  899         bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
  900         bzero(p, len);
  901         *(void **)desc = p;
  902 
  903         if (sw_size) {
  904                 len = nelem * sw_size;
  905                 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
  906                 *(void **)sdesc = s;
  907         }
  908         if (parent_entry_tag == NULL)
  909                 return (0);
  910             
  911         if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
  912                                       BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
  913                                       NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  914                                       TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  915                                       NULL, NULL, entry_tag)) != 0) {
  916                 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
  917                 return (ENOMEM);
  918         }
  919         return (0);
  920 }
  921 
  922 static void
  923 sge_slow_intr_handler(void *arg, int ncount)
  924 {
  925         adapter_t *sc = arg;
  926 
  927         t3_slow_intr_handler(sc);
  928         t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
  929         (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
  930 }
  931 
  932 /**
  933  *      sge_timer_cb - perform periodic maintenance of an SGE qset
  934  *      @data: the SGE queue set to maintain
  935  *
  936  *      Runs periodically from a timer to perform maintenance of an SGE queue
  937  *      set.  It performs two tasks:
  938  *
  939  *      a) Cleans up any completed Tx descriptors that may still be pending.
  940  *      Normal descriptor cleanup happens when new packets are added to a Tx
  941  *      queue so this timer is relatively infrequent and does any cleanup only
  942  *      if the Tx queue has not seen any new packets in a while.  We make a
  943  *      best effort attempt to reclaim descriptors, in that we don't wait
  944  *      around if we cannot get a queue's lock (which most likely is because
  945  *      someone else is queueing new packets and so will also handle the clean
  946  *      up).  Since control queues use immediate data exclusively we don't
  947  *      bother cleaning them up here.
  948  *
  949  *      b) Replenishes Rx queues that have run out due to memory shortage.
  950  *      Normally new Rx buffers are added when existing ones are consumed but
  951  *      when out of memory a queue can become empty.  We try to add only a few
  952  *      buffers here, the queue will be replenished fully as these new buffers
  953  *      are used up if memory shortage has subsided.
  954  *      
  955  *      c) Return coalesced response queue credits in case a response queue is
  956  *      starved.
  957  *
  958  *      d) Ring doorbells for T304 tunnel queues since we have seen doorbell 
  959  *      fifo overflows and the FW doesn't implement any recovery scheme yet.
  960  */
  961 static void
  962 sge_timer_cb(void *arg)
  963 {
  964         adapter_t *sc = arg;
  965         if ((sc->flags & USING_MSIX) == 0) {
  966                 
  967                 struct port_info *pi;
  968                 struct sge_qset *qs;
  969                 struct sge_txq  *txq;
  970                 int i, j;
  971                 int reclaim_ofl, refill_rx;
  972 
  973                 if (sc->open_device_map == 0) 
  974                         return;
  975 
  976                 for (i = 0; i < sc->params.nports; i++) {
  977                         pi = &sc->port[i];
  978                         for (j = 0; j < pi->nqsets; j++) {
  979                                 qs = &sc->sge.qs[pi->first_qset + j];
  980                                 txq = &qs->txq[0];
  981                                 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
  982                                 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 
  983                                     (qs->fl[1].credits < qs->fl[1].size));
  984                                 if (reclaim_ofl || refill_rx) {
  985                                         taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
  986                                         break;
  987                                 }
  988                         }
  989                 }
  990         }
  991         
  992         if (sc->params.nports > 2) {
  993                 int i;
  994 
  995                 for_each_port(sc, i) {
  996                         struct port_info *pi = &sc->port[i];
  997 
  998                         t3_write_reg(sc, A_SG_KDOORBELL, 
  999                                      F_SELEGRCNTX | 
 1000                                      (FW_TUNNEL_SGEEC_START + pi->first_qset));
 1001                 }
 1002         }       
 1003         if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
 1004             sc->open_device_map != 0)
 1005                 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1006 }
 1007 
 1008 /*
 1009  * This is meant to be a catch-all function to keep sge state private
 1010  * to sge.c
 1011  *
 1012  */
 1013 int
 1014 t3_sge_init_adapter(adapter_t *sc)
 1015 {
 1016         callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
 1017         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1018         TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
 1019         return (0);
 1020 }
 1021 
 1022 int
 1023 t3_sge_reset_adapter(adapter_t *sc)
 1024 {
 1025         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1026         return (0);
 1027 }
 1028 
 1029 int
 1030 t3_sge_init_port(struct port_info *pi)
 1031 {
 1032         TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
 1033         return (0);
 1034 }
 1035 
 1036 /**
 1037  *      refill_rspq - replenish an SGE response queue
 1038  *      @adapter: the adapter
 1039  *      @q: the response queue to replenish
 1040  *      @credits: how many new responses to make available
 1041  *
 1042  *      Replenishes a response queue by making the supplied number of responses
 1043  *      available to HW.
 1044  */
 1045 static __inline void
 1046 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
 1047 {
 1048 
 1049         /* mbufs are allocated on demand when a rspq entry is processed. */
 1050         t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
 1051                      V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 1052 }
 1053 
 1054 static void
 1055 sge_txq_reclaim_handler(void *arg, int ncount)
 1056 {
 1057         struct sge_qset *qs = arg;
 1058         int i;
 1059 
 1060         for (i = 0; i < 3; i++)
 1061                 reclaim_completed_tx(qs, 16, i);
 1062 }
 1063 
 1064 static void
 1065 sge_timer_reclaim(void *arg, int ncount)
 1066 {
 1067         struct port_info *pi = arg;
 1068         int i, nqsets = pi->nqsets;
 1069         adapter_t *sc = pi->adapter;
 1070         struct sge_qset *qs;
 1071         struct mtx *lock;
 1072         
 1073         KASSERT((sc->flags & USING_MSIX) == 0,
 1074             ("can't call timer reclaim for msi-x"));
 1075 
 1076         for (i = 0; i < nqsets; i++) {
 1077                 qs = &sc->sge.qs[pi->first_qset + i];
 1078 
 1079                 reclaim_completed_tx(qs, 16, TXQ_OFLD);
 1080                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 1081                             &sc->sge.qs[0].rspq.lock;
 1082 
 1083                 if (mtx_trylock(lock)) {
 1084                         /* XXX currently assume that we are *NOT* polling */
 1085                         uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
 1086 
 1087                         if (qs->fl[0].credits < qs->fl[0].size - 16)
 1088                                 __refill_fl(sc, &qs->fl[0]);
 1089                         if (qs->fl[1].credits < qs->fl[1].size - 16)
 1090                                 __refill_fl(sc, &qs->fl[1]);
 1091                         
 1092                         if (status & (1 << qs->rspq.cntxt_id)) {
 1093                                 if (qs->rspq.credits) {
 1094                                         refill_rspq(sc, &qs->rspq, 1);
 1095                                         qs->rspq.credits--;
 1096                                         t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 
 1097                                             1 << qs->rspq.cntxt_id);
 1098                                 }
 1099                         }
 1100                         mtx_unlock(lock);
 1101                 }
 1102         }
 1103 }
 1104 
 1105 /**
 1106  *      init_qset_cntxt - initialize an SGE queue set context info
 1107  *      @qs: the queue set
 1108  *      @id: the queue set id
 1109  *
 1110  *      Initializes the TIDs and context ids for the queues of a queue set.
 1111  */
 1112 static void
 1113 init_qset_cntxt(struct sge_qset *qs, u_int id)
 1114 {
 1115 
 1116         qs->rspq.cntxt_id = id;
 1117         qs->fl[0].cntxt_id = 2 * id;
 1118         qs->fl[1].cntxt_id = 2 * id + 1;
 1119         qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
 1120         qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
 1121         qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 1122         qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 1123         qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
 1124 
 1125         mbufq_init(&qs->txq[TXQ_ETH].sendq);
 1126         mbufq_init(&qs->txq[TXQ_OFLD].sendq);
 1127         mbufq_init(&qs->txq[TXQ_CTRL].sendq);
 1128 }
 1129 
 1130 
 1131 static void
 1132 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
 1133 {
 1134         txq->in_use += ndesc;
 1135         /*
 1136          * XXX we don't handle stopping of queue
 1137          * presumably start handles this when we bump against the end
 1138          */
 1139         txqs->gen = txq->gen;
 1140         txq->unacked += ndesc;
 1141         txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
 1142         txq->unacked &= 31;
 1143         txqs->pidx = txq->pidx;
 1144         txq->pidx += ndesc;
 1145 #ifdef INVARIANTS
 1146         if (((txqs->pidx > txq->cidx) &&
 1147                 (txq->pidx < txqs->pidx) &&
 1148                 (txq->pidx >= txq->cidx)) ||
 1149             ((txqs->pidx < txq->cidx) &&
 1150                 (txq->pidx >= txq-> cidx)) ||
 1151             ((txqs->pidx < txq->cidx) &&
 1152                 (txq->cidx < txqs->pidx)))
 1153                 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
 1154                     txqs->pidx, txq->pidx, txq->cidx);
 1155 #endif
 1156         if (txq->pidx >= txq->size) {
 1157                 txq->pidx -= txq->size;
 1158                 txq->gen ^= 1;
 1159         }
 1160 
 1161 }
 1162 
 1163 /**
 1164  *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 1165  *      @m: the packet mbufs
 1166  *      @nsegs: the number of segments 
 1167  *
 1168  *      Returns the number of Tx descriptors needed for the given Ethernet
 1169  *      packet.  Ethernet packets require addition of WR and CPL headers.
 1170  */
 1171 static __inline unsigned int
 1172 calc_tx_descs(const struct mbuf *m, int nsegs)
 1173 {
 1174         unsigned int flits;
 1175 
 1176         if (m->m_pkthdr.len <= PIO_LEN)
 1177                 return 1;
 1178 
 1179         flits = sgl_len(nsegs) + 2;
 1180         if (m->m_pkthdr.csum_flags & CSUM_TSO)
 1181                 flits++;
 1182 
 1183         return flits_to_desc(flits);
 1184 }
 1185 
 1186 /**
 1187  *      make_sgl - populate a scatter/gather list for a packet
 1188  *      @sgp: the SGL to populate
 1189  *      @segs: the packet dma segments
 1190  *      @nsegs: the number of segments
 1191  *
 1192  *      Generates a scatter/gather list for the buffers that make up a packet
 1193  *      and returns the SGL size in 8-byte words.  The caller must size the SGL
 1194  *      appropriately.
 1195  */
 1196 static __inline void
 1197 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
 1198 {
 1199         int i, idx;
 1200         
 1201         for (idx = 0, i = 0; i < nsegs; i++) {
 1202                 /*
 1203                  * firmware doesn't like empty segments
 1204                  */
 1205                 if (segs[i].ds_len == 0)
 1206                         continue;
 1207                 if (i && idx == 0) 
 1208                         ++sgp;
 1209                 
 1210                 sgp->len[idx] = htobe32(segs[i].ds_len);
 1211                 sgp->addr[idx] = htobe64(segs[i].ds_addr);
 1212                 idx ^= 1;
 1213         }
 1214         
 1215         if (idx) {
 1216                 sgp->len[idx] = 0;
 1217                 sgp->addr[idx] = 0;
 1218         }
 1219 }
 1220         
 1221 /**
 1222  *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
 1223  *      @adap: the adapter
 1224  *      @q: the Tx queue
 1225  *
 1226  *      Ring the doorbell if a Tx queue is asleep.  There is a natural race,
 1227  *      where the HW is going to sleep just after we checked, however,
 1228  *      then the interrupt handler will detect the outstanding TX packet
 1229  *      and ring the doorbell for us.
 1230  *
 1231  *      When GTS is disabled we unconditionally ring the doorbell.
 1232  */
 1233 static __inline void
 1234 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
 1235 {
 1236 #if USE_GTS
 1237         clear_bit(TXQ_LAST_PKT_DB, &q->flags);
 1238         if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
 1239                 set_bit(TXQ_LAST_PKT_DB, &q->flags);
 1240 #ifdef T3_TRACE
 1241                 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
 1242                           q->cntxt_id);
 1243 #endif
 1244                 t3_write_reg(adap, A_SG_KDOORBELL,
 1245                              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1246         }
 1247 #else
 1248         if (mustring || ++q->db_pending >= 32) {
 1249                 wmb();            /* write descriptors before telling HW */
 1250                 t3_write_reg(adap, A_SG_KDOORBELL,
 1251                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1252                 q->db_pending = 0;
 1253         }
 1254 #endif
 1255 }
 1256 
 1257 static __inline void
 1258 wr_gen2(struct tx_desc *d, unsigned int gen)
 1259 {
 1260 #if SGE_NUM_GENBITS == 2
 1261         d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
 1262 #endif
 1263 }
 1264 
 1265 /**
 1266  *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
 1267  *      @ndesc: number of Tx descriptors spanned by the SGL
 1268  *      @txd: first Tx descriptor to be written
 1269  *      @txqs: txq state (generation and producer index)
 1270  *      @txq: the SGE Tx queue
 1271  *      @sgl: the SGL
 1272  *      @flits: number of flits to the start of the SGL in the first descriptor
 1273  *      @sgl_flits: the SGL size in flits
 1274  *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
 1275  *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
 1276  *
 1277  *      Write a work request header and an associated SGL.  If the SGL is
 1278  *      small enough to fit into one Tx descriptor it has already been written
 1279  *      and we just need to write the WR header.  Otherwise we distribute the
 1280  *      SGL across the number of descriptors it spans.
 1281  */
 1282 static void
 1283 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
 1284     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
 1285     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
 1286 {
 1287 
 1288         struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
 1289         struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
 1290         
 1291         if (__predict_true(ndesc == 1)) {
 1292                 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1293                     V_WR_SGLSFLT(flits)) | wr_hi,
 1294                     htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) |
 1295                     wr_lo);
 1296 
 1297                 wr_gen2(txd, txqs->gen);
 1298                 
 1299         } else {
 1300                 unsigned int ogen = txqs->gen;
 1301                 const uint64_t *fp = (const uint64_t *)sgl;
 1302                 struct work_request_hdr *wp = wrp;
 1303                 
 1304                 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
 1305                     V_WR_SGLSFLT(flits)) | wr_hi;
 1306                 
 1307                 while (sgl_flits) {
 1308                         unsigned int avail = WR_FLITS - flits;
 1309 
 1310                         if (avail > sgl_flits)
 1311                                 avail = sgl_flits;
 1312                         memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
 1313                         sgl_flits -= avail;
 1314                         ndesc--;
 1315                         if (!sgl_flits)
 1316                                 break;
 1317                         
 1318                         fp += avail;
 1319                         txd++;
 1320                         txsd++;
 1321                         if (++txqs->pidx == txq->size) {
 1322                                 txqs->pidx = 0;
 1323                                 txqs->gen ^= 1;
 1324                                 txd = txq->desc;
 1325                                 txsd = txq->sdesc;
 1326                         }
 1327 
 1328                         /*
 1329                          * when the head of the mbuf chain
 1330                          * is freed all clusters will be freed
 1331                          * with it
 1332                          */
 1333                         wrp = (struct work_request_hdr *)txd;
 1334                         wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
 1335                             V_WR_SGLSFLT(1)) | wr_hi;
 1336                         wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
 1337                                     sgl_flits + 1)) |
 1338                             V_WR_GEN(txqs->gen)) | wr_lo;
 1339                         wr_gen2(txd, txqs->gen);
 1340                         flits = 1;
 1341                 }
 1342                 wrp->wrh_hi |= htonl(F_WR_EOP);
 1343                 wmb();
 1344                 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
 1345                 wr_gen2((struct tx_desc *)wp, ogen);
 1346         }
 1347 }
 1348 
 1349 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
 1350 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
 1351 
 1352 #define GET_VTAG(cntrl, m) \
 1353 do { \
 1354         if ((m)->m_flags & M_VLANTAG)                                               \
 1355                 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
 1356 } while (0)
 1357 
 1358 static int
 1359 t3_encap(struct sge_qset *qs, struct mbuf **m)
 1360 {
 1361         adapter_t *sc;
 1362         struct mbuf *m0;
 1363         struct sge_txq *txq;
 1364         struct txq_state txqs;
 1365         struct port_info *pi;
 1366         unsigned int ndesc, flits, cntrl, mlen;
 1367         int err, nsegs, tso_info = 0;
 1368 
 1369         struct work_request_hdr *wrp;
 1370         struct tx_sw_desc *txsd;
 1371         struct sg_ent *sgp, *sgl;
 1372         uint32_t wr_hi, wr_lo, sgl_flits; 
 1373         bus_dma_segment_t segs[TX_MAX_SEGS];
 1374 
 1375         struct tx_desc *txd;
 1376                 
 1377         pi = qs->port;
 1378         sc = pi->adapter;
 1379         txq = &qs->txq[TXQ_ETH];
 1380         txd = &txq->desc[txq->pidx];
 1381         txsd = &txq->sdesc[txq->pidx];
 1382         sgl = txq->txq_sgl;
 1383 
 1384         prefetch(txd);
 1385         m0 = *m;
 1386 
 1387         mtx_assert(&qs->lock, MA_OWNED);
 1388         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1389         KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
 1390         
 1391         if  (m0->m_nextpkt == NULL && m0->m_next != NULL &&
 1392             m0->m_pkthdr.csum_flags & (CSUM_TSO))
 1393                 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
 1394 
 1395         if (m0->m_nextpkt != NULL) {
 1396                 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
 1397                 ndesc = 1;
 1398                 mlen = 0;
 1399         } else {
 1400                 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
 1401                     &m0, segs, &nsegs))) {
 1402                         if (cxgb_debug)
 1403                                 printf("failed ... err=%d\n", err);
 1404                         return (err);
 1405                 }
 1406                 mlen = m0->m_pkthdr.len;
 1407                 ndesc = calc_tx_descs(m0, nsegs);
 1408         }
 1409         txq_prod(txq, ndesc, &txqs);
 1410 
 1411         KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
 1412         txsd->m = m0;
 1413 
 1414         if (m0->m_nextpkt != NULL) {
 1415                 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
 1416                 int i, fidx;
 1417 
 1418                 if (nsegs > 7)
 1419                         panic("trying to coalesce %d packets in to one WR", nsegs);
 1420                 txq->txq_coalesced += nsegs;
 1421                 wrp = (struct work_request_hdr *)txd;
 1422                 flits = nsegs*2 + 1;
 1423 
 1424                 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
 1425                         struct cpl_tx_pkt_batch_entry *cbe;
 1426                         uint64_t flit;
 1427                         uint32_t *hflit = (uint32_t *)&flit;
 1428                         int cflags = m0->m_pkthdr.csum_flags;
 1429 
 1430                         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1431                         GET_VTAG(cntrl, m0);
 1432                         cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1433                         if (__predict_false(!(cflags & CSUM_IP)))
 1434                                 cntrl |= F_TXPKT_IPCSUM_DIS;
 1435                         if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP |
 1436                             CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
 1437                                 cntrl |= F_TXPKT_L4CSUM_DIS;
 1438 
 1439                         hflit[0] = htonl(cntrl);
 1440                         hflit[1] = htonl(segs[i].ds_len | 0x80000000);
 1441                         flit |= htobe64(1 << 24);
 1442                         cbe = &cpl_batch->pkt_entry[i];
 1443                         cbe->cntrl = hflit[0];
 1444                         cbe->len = hflit[1];
 1445                         cbe->addr = htobe64(segs[i].ds_addr);
 1446                 }
 1447 
 1448                 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1449                     V_WR_SGLSFLT(flits)) |
 1450                     htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1451                 wr_lo = htonl(V_WR_LEN(flits) |
 1452                     V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
 1453                 set_wr_hdr(wrp, wr_hi, wr_lo);
 1454                 wmb();
 1455                 ETHER_BPF_MTAP(pi->ifp, m0);
 1456                 wr_gen2(txd, txqs.gen);
 1457                 check_ring_tx_db(sc, txq, 0);
 1458                 return (0);             
 1459         } else if (tso_info) {
 1460                 uint16_t eth_type;
 1461                 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
 1462                 struct ether_header *eh;
 1463                 void *l3hdr;
 1464                 struct tcphdr *tcp;
 1465 
 1466                 txd->flit[2] = 0;
 1467                 GET_VTAG(cntrl, m0);
 1468                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
 1469                 hdr->cntrl = htonl(cntrl);
 1470                 hdr->len = htonl(mlen | 0x80000000);
 1471 
 1472                 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
 1473                         printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x",
 1474                             m0, mlen, m0->m_pkthdr.tso_segsz,
 1475                             (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags);
 1476                         panic("tx tso packet too small");
 1477                 }
 1478 
 1479                 /* Make sure that ether, ip, tcp headers are all in m0 */
 1480                 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
 1481                         m0 = m_pullup(m0, TCPPKTHDRSIZE);
 1482                         if (__predict_false(m0 == NULL)) {
 1483                                 /* XXX panic probably an overreaction */
 1484                                 panic("couldn't fit header into mbuf");
 1485                         }
 1486                 }
 1487 
 1488                 eh = mtod(m0, struct ether_header *);
 1489                 eth_type = eh->ether_type;
 1490                 if (eth_type == htons(ETHERTYPE_VLAN)) {
 1491                         struct ether_vlan_header *evh = (void *)eh;
 1492 
 1493                         tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN);
 1494                         l3hdr = evh + 1;
 1495                         eth_type = evh->evl_proto;
 1496                 } else {
 1497                         tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II);
 1498                         l3hdr = eh + 1;
 1499                 }
 1500 
 1501                 if (eth_type == htons(ETHERTYPE_IP)) {
 1502                         struct ip *ip = l3hdr;
 1503 
 1504                         tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl);
 1505                         tcp = (struct tcphdr *)(ip + 1);
 1506                 } else if (eth_type == htons(ETHERTYPE_IPV6)) {
 1507                         struct ip6_hdr *ip6 = l3hdr;
 1508 
 1509                         KASSERT(ip6->ip6_nxt == IPPROTO_TCP,
 1510                             ("%s: CSUM_TSO with ip6_nxt %d",
 1511                             __func__, ip6->ip6_nxt));
 1512 
 1513                         tso_info |= F_LSO_IPV6;
 1514                         tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2);
 1515                         tcp = (struct tcphdr *)(ip6 + 1);
 1516                 } else
 1517                         panic("%s: CSUM_TSO but neither ip nor ip6", __func__);
 1518 
 1519                 tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off);
 1520                 hdr->lso_info = htonl(tso_info);
 1521 
 1522                 if (__predict_false(mlen <= PIO_LEN)) {
 1523                         /*
 1524                          * pkt not undersized but fits in PIO_LEN
 1525                          * Indicates a TSO bug at the higher levels.
 1526                          */
 1527                         txsd->m = NULL;
 1528                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
 1529                         flits = (mlen + 7) / 8 + 3;
 1530                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1531                                           V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1532                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1533                         wr_lo = htonl(V_WR_LEN(flits) |
 1534                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1535                         set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
 1536                         wmb();
 1537                         ETHER_BPF_MTAP(pi->ifp, m0);
 1538                         wr_gen2(txd, txqs.gen);
 1539                         check_ring_tx_db(sc, txq, 0);
 1540                         m_freem(m0);
 1541                         return (0);
 1542                 }
 1543                 flits = 3;      
 1544         } else {
 1545                 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
 1546                 
 1547                 GET_VTAG(cntrl, m0);
 1548                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1549                 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
 1550                         cntrl |= F_TXPKT_IPCSUM_DIS;
 1551                 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP |
 1552                     CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
 1553                         cntrl |= F_TXPKT_L4CSUM_DIS;
 1554                 cpl->cntrl = htonl(cntrl);
 1555                 cpl->len = htonl(mlen | 0x80000000);
 1556 
 1557                 if (mlen <= PIO_LEN) {
 1558                         txsd->m = NULL;
 1559                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
 1560                         flits = (mlen + 7) / 8 + 2;
 1561                         
 1562                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1563                             V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1564                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1565                         wr_lo = htonl(V_WR_LEN(flits) |
 1566                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1567                         set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
 1568                         wmb();
 1569                         ETHER_BPF_MTAP(pi->ifp, m0);
 1570                         wr_gen2(txd, txqs.gen);
 1571                         check_ring_tx_db(sc, txq, 0);
 1572                         m_freem(m0);
 1573                         return (0);
 1574                 }
 1575                 flits = 2;
 1576         }
 1577         wrp = (struct work_request_hdr *)txd;
 1578         sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
 1579         make_sgl(sgp, segs, nsegs);
 1580 
 1581         sgl_flits = sgl_len(nsegs);
 1582 
 1583         ETHER_BPF_MTAP(pi->ifp, m0);
 1584 
 1585         KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
 1586         wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1587         wr_lo = htonl(V_WR_TID(txq->token));
 1588         write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
 1589             sgl_flits, wr_hi, wr_lo);
 1590         check_ring_tx_db(sc, txq, 0);
 1591 
 1592         return (0);
 1593 }
 1594 
 1595 void
 1596 cxgb_tx_watchdog(void *arg)
 1597 {
 1598         struct sge_qset *qs = arg;
 1599         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1600 
 1601         if (qs->coalescing != 0 &&
 1602             (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
 1603             TXQ_RING_EMPTY(qs))
 1604                 qs->coalescing = 0; 
 1605         else if (qs->coalescing == 0 &&
 1606             (txq->in_use >= cxgb_tx_coalesce_enable_start))
 1607                 qs->coalescing = 1;
 1608         if (TXQ_TRYLOCK(qs)) {
 1609                 qs->qs_flags |= QS_FLUSHING;
 1610                 cxgb_start_locked(qs);
 1611                 qs->qs_flags &= ~QS_FLUSHING;
 1612                 TXQ_UNLOCK(qs);
 1613         }
 1614         if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
 1615                 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
 1616                     qs, txq->txq_watchdog.c_cpu);
 1617 }
 1618 
 1619 static void
 1620 cxgb_tx_timeout(void *arg)
 1621 {
 1622         struct sge_qset *qs = arg;
 1623         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1624 
 1625         if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
 1626                 qs->coalescing = 1;     
 1627         if (TXQ_TRYLOCK(qs)) {
 1628                 qs->qs_flags |= QS_TIMEOUT;
 1629                 cxgb_start_locked(qs);
 1630                 qs->qs_flags &= ~QS_TIMEOUT;
 1631                 TXQ_UNLOCK(qs);
 1632         }
 1633 }
 1634 
 1635 static void
 1636 cxgb_start_locked(struct sge_qset *qs)
 1637 {
 1638         struct mbuf *m_head = NULL;
 1639         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1640         struct port_info *pi = qs->port;
 1641         struct ifnet *ifp = pi->ifp;
 1642 
 1643         if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
 1644                 reclaim_completed_tx(qs, 0, TXQ_ETH);
 1645 
 1646         if (!pi->link_config.link_ok) {
 1647                 TXQ_RING_FLUSH(qs);
 1648                 return;
 1649         }
 1650         TXQ_LOCK_ASSERT(qs);
 1651         while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
 1652             pi->link_config.link_ok) {
 1653                 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1654 
 1655                 if (txq->size - txq->in_use <= TX_MAX_DESC)
 1656                         break;
 1657 
 1658                 if ((m_head = cxgb_dequeue(qs)) == NULL)
 1659                         break;
 1660                 /*
 1661                  *  Encapsulation can modify our pointer, and or make it
 1662                  *  NULL on failure.  In that event, we can't requeue.
 1663                  */
 1664                 if (t3_encap(qs, &m_head) || m_head == NULL)
 1665                         break;
 1666 
 1667                 m_head = NULL;
 1668         }
 1669 
 1670         if (txq->db_pending)
 1671                 check_ring_tx_db(pi->adapter, txq, 1);
 1672 
 1673         if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
 1674             pi->link_config.link_ok)
 1675                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1676                     qs, txq->txq_timer.c_cpu);
 1677         if (m_head != NULL)
 1678                 m_freem(m_head);
 1679 }
 1680 
 1681 static int
 1682 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
 1683 {
 1684         struct port_info *pi = qs->port;
 1685         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1686         struct buf_ring *br = txq->txq_mr;
 1687         int error, avail;
 1688 
 1689         avail = txq->size - txq->in_use;
 1690         TXQ_LOCK_ASSERT(qs);
 1691 
 1692         /*
 1693          * We can only do a direct transmit if the following are true:
 1694          * - we aren't coalescing (ring < 3/4 full)
 1695          * - the link is up -- checked in caller
 1696          * - there are no packets enqueued already
 1697          * - there is space in hardware transmit queue 
 1698          */
 1699         if (check_pkt_coalesce(qs) == 0 &&
 1700             !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
 1701                 if (t3_encap(qs, &m)) {
 1702                         if (m != NULL &&
 1703                             (error = drbr_enqueue(ifp, br, m)) != 0) 
 1704                                 return (error);
 1705                 } else {
 1706                         if (txq->db_pending)
 1707                                 check_ring_tx_db(pi->adapter, txq, 1);
 1708 
 1709                         /*
 1710                          * We've bypassed the buf ring so we need to update
 1711                          * the stats directly
 1712                          */
 1713                         txq->txq_direct_packets++;
 1714                         txq->txq_direct_bytes += m->m_pkthdr.len;
 1715                 }
 1716         } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
 1717                 return (error);
 1718 
 1719         reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1720         if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
 1721             (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
 1722                 cxgb_start_locked(qs);
 1723         else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
 1724                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1725                     qs, txq->txq_timer.c_cpu);
 1726         return (0);
 1727 }
 1728 
 1729 int
 1730 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
 1731 {
 1732         struct sge_qset *qs;
 1733         struct port_info *pi = ifp->if_softc;
 1734         int error, qidx = pi->first_qset;
 1735 
 1736         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
 1737             ||(!pi->link_config.link_ok)) {
 1738                 m_freem(m);
 1739                 return (0);
 1740         }
 1741         
 1742         if (m->m_flags & M_FLOWID)
 1743                 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
 1744 
 1745         qs = &pi->adapter->sge.qs[qidx];
 1746         
 1747         if (TXQ_TRYLOCK(qs)) {
 1748                 /* XXX running */
 1749                 error = cxgb_transmit_locked(ifp, qs, m);
 1750                 TXQ_UNLOCK(qs);
 1751         } else
 1752                 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
 1753         return (error);
 1754 }
 1755 
 1756 void
 1757 cxgb_qflush(struct ifnet *ifp)
 1758 {
 1759         /*
 1760          * flush any enqueued mbufs in the buf_rings
 1761          * and in the transmit queues
 1762          * no-op for now
 1763          */
 1764         return;
 1765 }
 1766 
 1767 /**
 1768  *      write_imm - write a packet into a Tx descriptor as immediate data
 1769  *      @d: the Tx descriptor to write
 1770  *      @m: the packet
 1771  *      @len: the length of packet data to write as immediate data
 1772  *      @gen: the generation bit value to write
 1773  *
 1774  *      Writes a packet as immediate data into a Tx descriptor.  The packet
 1775  *      contains a work request at its beginning.  We must write the packet
 1776  *      carefully so the SGE doesn't read accidentally before it's written in
 1777  *      its entirety.
 1778  */
 1779 static __inline void
 1780 write_imm(struct tx_desc *d, caddr_t src,
 1781           unsigned int len, unsigned int gen)
 1782 {
 1783         struct work_request_hdr *from = (struct work_request_hdr *)src;
 1784         struct work_request_hdr *to = (struct work_request_hdr *)d;
 1785         uint32_t wr_hi, wr_lo;
 1786 
 1787         KASSERT(len <= WR_LEN && len >= sizeof(*from),
 1788             ("%s: invalid len %d", __func__, len));
 1789         
 1790         memcpy(&to[1], &from[1], len - sizeof(*from));
 1791         wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
 1792             V_WR_BCNTLFLT(len & 7));
 1793         wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8));
 1794         set_wr_hdr(to, wr_hi, wr_lo);
 1795         wmb();
 1796         wr_gen2(d, gen);
 1797 }
 1798 
 1799 /**
 1800  *      check_desc_avail - check descriptor availability on a send queue
 1801  *      @adap: the adapter
 1802  *      @q: the TX queue
 1803  *      @m: the packet needing the descriptors
 1804  *      @ndesc: the number of Tx descriptors needed
 1805  *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
 1806  *
 1807  *      Checks if the requested number of Tx descriptors is available on an
 1808  *      SGE send queue.  If the queue is already suspended or not enough
 1809  *      descriptors are available the packet is queued for later transmission.
 1810  *      Must be called with the Tx queue locked.
 1811  *
 1812  *      Returns 0 if enough descriptors are available, 1 if there aren't
 1813  *      enough descriptors and the packet has been queued, and 2 if the caller
 1814  *      needs to retry because there weren't enough descriptors at the
 1815  *      beginning of the call but some freed up in the mean time.
 1816  */
 1817 static __inline int
 1818 check_desc_avail(adapter_t *adap, struct sge_txq *q,
 1819                  struct mbuf *m, unsigned int ndesc,
 1820                  unsigned int qid)
 1821 {
 1822         /* 
 1823          * XXX We currently only use this for checking the control queue
 1824          * the control queue is only used for binding qsets which happens
 1825          * at init time so we are guaranteed enough descriptors
 1826          */
 1827         if (__predict_false(!mbufq_empty(&q->sendq))) {
 1828 addq_exit:      mbufq_tail(&q->sendq, m);
 1829                 return 1;
 1830         }
 1831         if (__predict_false(q->size - q->in_use < ndesc)) {
 1832 
 1833                 struct sge_qset *qs = txq_to_qset(q, qid);
 1834 
 1835                 setbit(&qs->txq_stopped, qid);
 1836                 if (should_restart_tx(q) &&
 1837                     test_and_clear_bit(qid, &qs->txq_stopped))
 1838                         return 2;
 1839 
 1840                 q->stops++;
 1841                 goto addq_exit;
 1842         }
 1843         return 0;
 1844 }
 1845 
 1846 
 1847 /**
 1848  *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
 1849  *      @q: the SGE control Tx queue
 1850  *
 1851  *      This is a variant of reclaim_completed_tx() that is used for Tx queues
 1852  *      that send only immediate data (presently just the control queues) and
 1853  *      thus do not have any mbufs
 1854  */
 1855 static __inline void
 1856 reclaim_completed_tx_imm(struct sge_txq *q)
 1857 {
 1858         unsigned int reclaim = q->processed - q->cleaned;
 1859 
 1860         q->in_use -= reclaim;
 1861         q->cleaned += reclaim;
 1862 }
 1863 
 1864 /**
 1865  *      ctrl_xmit - send a packet through an SGE control Tx queue
 1866  *      @adap: the adapter
 1867  *      @q: the control queue
 1868  *      @m: the packet
 1869  *
 1870  *      Send a packet through an SGE control Tx queue.  Packets sent through
 1871  *      a control queue must fit entirely as immediate data in a single Tx
 1872  *      descriptor and have no page fragments.
 1873  */
 1874 static int
 1875 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 1876 {
 1877         int ret;
 1878         struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
 1879         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1880         
 1881         KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__));
 1882 
 1883         wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
 1884         wrp->wrh_lo = htonl(V_WR_TID(q->token));
 1885 
 1886         TXQ_LOCK(qs);
 1887 again:  reclaim_completed_tx_imm(q);
 1888 
 1889         ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
 1890         if (__predict_false(ret)) {
 1891                 if (ret == 1) {
 1892                         TXQ_UNLOCK(qs);
 1893                         return (ENOSPC);
 1894                 }
 1895                 goto again;
 1896         }
 1897         write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
 1898         
 1899         q->in_use++;
 1900         if (++q->pidx >= q->size) {
 1901                 q->pidx = 0;
 1902                 q->gen ^= 1;
 1903         }
 1904         TXQ_UNLOCK(qs);
 1905         wmb();
 1906         t3_write_reg(adap, A_SG_KDOORBELL,
 1907             F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1908 
 1909         m_free(m);
 1910         return (0);
 1911 }
 1912 
 1913 
 1914 /**
 1915  *      restart_ctrlq - restart a suspended control queue
 1916  *      @qs: the queue set cotaining the control queue
 1917  *
 1918  *      Resumes transmission on a suspended Tx control queue.
 1919  */
 1920 static void
 1921 restart_ctrlq(void *data, int npending)
 1922 {
 1923         struct mbuf *m;
 1924         struct sge_qset *qs = (struct sge_qset *)data;
 1925         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1926         adapter_t *adap = qs->port->adapter;
 1927 
 1928         TXQ_LOCK(qs);
 1929 again:  reclaim_completed_tx_imm(q);
 1930 
 1931         while (q->in_use < q->size &&
 1932                (m = mbufq_dequeue(&q->sendq)) != NULL) {
 1933 
 1934                 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
 1935                 m_free(m);
 1936 
 1937                 if (++q->pidx >= q->size) {
 1938                         q->pidx = 0;
 1939                         q->gen ^= 1;
 1940                 }
 1941                 q->in_use++;
 1942         }
 1943         if (!mbufq_empty(&q->sendq)) {
 1944                 setbit(&qs->txq_stopped, TXQ_CTRL);
 1945 
 1946                 if (should_restart_tx(q) &&
 1947                     test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
 1948                         goto again;
 1949                 q->stops++;
 1950         }
 1951         TXQ_UNLOCK(qs);
 1952         t3_write_reg(adap, A_SG_KDOORBELL,
 1953                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1954 }
 1955 
 1956 
 1957 /*
 1958  * Send a management message through control queue 0
 1959  */
 1960 int
 1961 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
 1962 {
 1963         return ctrl_xmit(adap, &adap->sge.qs[0], m);
 1964 }
 1965 
 1966 /**
 1967  *      free_qset - free the resources of an SGE queue set
 1968  *      @sc: the controller owning the queue set
 1969  *      @q: the queue set
 1970  *
 1971  *      Release the HW and SW resources associated with an SGE queue set, such
 1972  *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 1973  *      queue set must be quiesced prior to calling this.
 1974  */
 1975 static void
 1976 t3_free_qset(adapter_t *sc, struct sge_qset *q)
 1977 {
 1978         int i;
 1979         
 1980         reclaim_completed_tx(q, 0, TXQ_ETH);
 1981         if (q->txq[TXQ_ETH].txq_mr != NULL) 
 1982                 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
 1983         if (q->txq[TXQ_ETH].txq_ifq != NULL) {
 1984                 ifq_delete(q->txq[TXQ_ETH].txq_ifq);
 1985                 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
 1986         }
 1987 
 1988         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 1989                 if (q->fl[i].desc) {
 1990                         mtx_lock_spin(&sc->sge.reg_lock);
 1991                         t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
 1992                         mtx_unlock_spin(&sc->sge.reg_lock);
 1993                         bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
 1994                         bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
 1995                                         q->fl[i].desc_map);
 1996                         bus_dma_tag_destroy(q->fl[i].desc_tag);
 1997                         bus_dma_tag_destroy(q->fl[i].entry_tag);
 1998                 }
 1999                 if (q->fl[i].sdesc) {
 2000                         free_rx_bufs(sc, &q->fl[i]);
 2001                         free(q->fl[i].sdesc, M_DEVBUF);
 2002                 }
 2003         }
 2004 
 2005         mtx_unlock(&q->lock);
 2006         MTX_DESTROY(&q->lock);
 2007         for (i = 0; i < SGE_TXQ_PER_SET; i++) {
 2008                 if (q->txq[i].desc) {
 2009                         mtx_lock_spin(&sc->sge.reg_lock);
 2010                         t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
 2011                         mtx_unlock_spin(&sc->sge.reg_lock);
 2012                         bus_dmamap_unload(q->txq[i].desc_tag,
 2013                                         q->txq[i].desc_map);
 2014                         bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
 2015                                         q->txq[i].desc_map);
 2016                         bus_dma_tag_destroy(q->txq[i].desc_tag);
 2017                         bus_dma_tag_destroy(q->txq[i].entry_tag);
 2018                 }
 2019                 if (q->txq[i].sdesc) {
 2020                         free(q->txq[i].sdesc, M_DEVBUF);
 2021                 }
 2022         }
 2023 
 2024         if (q->rspq.desc) {
 2025                 mtx_lock_spin(&sc->sge.reg_lock);
 2026                 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
 2027                 mtx_unlock_spin(&sc->sge.reg_lock);
 2028                 
 2029                 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
 2030                 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
 2031                                 q->rspq.desc_map);
 2032                 bus_dma_tag_destroy(q->rspq.desc_tag);
 2033                 MTX_DESTROY(&q->rspq.lock);
 2034         }
 2035 
 2036 #if defined(INET6) || defined(INET)
 2037         tcp_lro_free(&q->lro.ctrl);
 2038 #endif
 2039 
 2040         bzero(q, sizeof(*q));
 2041 }
 2042 
 2043 /**
 2044  *      t3_free_sge_resources - free SGE resources
 2045  *      @sc: the adapter softc
 2046  *
 2047  *      Frees resources used by the SGE queue sets.
 2048  */
 2049 void
 2050 t3_free_sge_resources(adapter_t *sc, int nqsets)
 2051 {
 2052         int i;
 2053 
 2054         for (i = 0; i < nqsets; ++i) {
 2055                 TXQ_LOCK(&sc->sge.qs[i]);
 2056                 t3_free_qset(sc, &sc->sge.qs[i]);
 2057         }
 2058 }
 2059 
 2060 /**
 2061  *      t3_sge_start - enable SGE
 2062  *      @sc: the controller softc
 2063  *
 2064  *      Enables the SGE for DMAs.  This is the last step in starting packet
 2065  *      transfers.
 2066  */
 2067 void
 2068 t3_sge_start(adapter_t *sc)
 2069 {
 2070         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
 2071 }
 2072 
 2073 /**
 2074  *      t3_sge_stop - disable SGE operation
 2075  *      @sc: the adapter
 2076  *
 2077  *      Disables the DMA engine.  This can be called in emeregencies (e.g.,
 2078  *      from error interrupts) or from normal process context.  In the latter
 2079  *      case it also disables any pending queue restart tasklets.  Note that
 2080  *      if it is called in interrupt context it cannot disable the restart
 2081  *      tasklets as it cannot wait, however the tasklets will have no effect
 2082  *      since the doorbells are disabled and the driver will call this again
 2083  *      later from process context, at which time the tasklets will be stopped
 2084  *      if they are still running.
 2085  */
 2086 void
 2087 t3_sge_stop(adapter_t *sc)
 2088 {
 2089         int i, nqsets;
 2090         
 2091         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
 2092 
 2093         if (sc->tq == NULL)
 2094                 return;
 2095         
 2096         for (nqsets = i = 0; i < (sc)->params.nports; i++) 
 2097                 nqsets += sc->port[i].nqsets;
 2098 #ifdef notyet
 2099         /*
 2100          * 
 2101          * XXX
 2102          */
 2103         for (i = 0; i < nqsets; ++i) {
 2104                 struct sge_qset *qs = &sc->sge.qs[i];
 2105                 
 2106                 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2107                 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2108         }
 2109 #endif
 2110 }
 2111 
 2112 /**
 2113  *      t3_free_tx_desc - reclaims Tx descriptors and their buffers
 2114  *      @adapter: the adapter
 2115  *      @q: the Tx queue to reclaim descriptors from
 2116  *      @reclaimable: the number of descriptors to reclaim
 2117  *      @m_vec_size: maximum number of buffers to reclaim
 2118  *      @desc_reclaimed: returns the number of descriptors reclaimed
 2119  *
 2120  *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 2121  *      Tx buffers.  Called with the Tx queue lock held.
 2122  *
 2123  *      Returns number of buffers of reclaimed   
 2124  */
 2125 void
 2126 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
 2127 {
 2128         struct tx_sw_desc *txsd;
 2129         unsigned int cidx, mask;
 2130         struct sge_txq *q = &qs->txq[queue];
 2131 
 2132 #ifdef T3_TRACE
 2133         T3_TRACE2(sc->tb[q->cntxt_id & 7],
 2134                   "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
 2135 #endif
 2136         cidx = q->cidx;
 2137         mask = q->size - 1;
 2138         txsd = &q->sdesc[cidx];
 2139 
 2140         mtx_assert(&qs->lock, MA_OWNED);
 2141         while (reclaimable--) {
 2142                 prefetch(q->sdesc[(cidx + 1) & mask].m);
 2143                 prefetch(q->sdesc[(cidx + 2) & mask].m);
 2144 
 2145                 if (txsd->m != NULL) {
 2146                         if (txsd->flags & TX_SW_DESC_MAPPED) {
 2147                                 bus_dmamap_unload(q->entry_tag, txsd->map);
 2148                                 txsd->flags &= ~TX_SW_DESC_MAPPED;
 2149                         }
 2150                         m_freem_list(txsd->m);
 2151                         txsd->m = NULL;
 2152                 } else
 2153                         q->txq_skipped++;
 2154                 
 2155                 ++txsd;
 2156                 if (++cidx == q->size) {
 2157                         cidx = 0;
 2158                         txsd = q->sdesc;
 2159                 }
 2160         }
 2161         q->cidx = cidx;
 2162 
 2163 }
 2164 
 2165 /**
 2166  *      is_new_response - check if a response is newly written
 2167  *      @r: the response descriptor
 2168  *      @q: the response queue
 2169  *
 2170  *      Returns true if a response descriptor contains a yet unprocessed
 2171  *      response.
 2172  */
 2173 static __inline int
 2174 is_new_response(const struct rsp_desc *r,
 2175     const struct sge_rspq *q)
 2176 {
 2177         return (r->intr_gen & F_RSPD_GEN2) == q->gen;
 2178 }
 2179 
 2180 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
 2181 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
 2182                         V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
 2183                         V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
 2184                         V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
 2185 
 2186 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
 2187 #define NOMEM_INTR_DELAY 2500
 2188 
 2189 #ifdef TCP_OFFLOAD
 2190 /**
 2191  *      write_ofld_wr - write an offload work request
 2192  *      @adap: the adapter
 2193  *      @m: the packet to send
 2194  *      @q: the Tx queue
 2195  *      @pidx: index of the first Tx descriptor to write
 2196  *      @gen: the generation value to use
 2197  *      @ndesc: number of descriptors the packet will occupy
 2198  *
 2199  *      Write an offload work request to send the supplied packet.  The packet
 2200  *      data already carry the work request with most fields populated.
 2201  */
 2202 static void
 2203 write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q,
 2204     unsigned int pidx, unsigned int gen, unsigned int ndesc)
 2205 {
 2206         unsigned int sgl_flits, flits;
 2207         int i, idx, nsegs, wrlen;
 2208         struct work_request_hdr *from;
 2209         struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1];
 2210         struct tx_desc *d = &q->desc[pidx];
 2211         struct txq_state txqs;
 2212         struct sglist_seg *segs;
 2213         struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2214         struct sglist *sgl;
 2215 
 2216         from = (void *)(oh + 1);        /* Start of WR within mbuf */
 2217         wrlen = m->m_len - sizeof(*oh);
 2218 
 2219         if (!(oh->flags & F_HDR_SGL)) {
 2220                 write_imm(d, (caddr_t)from, wrlen, gen);
 2221 
 2222                 /*
 2223                  * mbuf with "real" immediate tx data will be enqueue_wr'd by
 2224                  * t3_push_frames and freed in wr_ack.  Others, like those sent
 2225                  * down by close_conn, t3_send_reset, etc. should be freed here.
 2226                  */
 2227                 if (!(oh->flags & F_HDR_DF))
 2228                         m_free(m);
 2229                 return;
 2230         }
 2231 
 2232         memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from));
 2233 
 2234         sgl = oh->sgl;
 2235         flits = wrlen / 8;
 2236         sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl;
 2237 
 2238         nsegs = sgl->sg_nseg;
 2239         segs = sgl->sg_segs;
 2240         for (idx = 0, i = 0; i < nsegs; i++) {
 2241                 KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__));
 2242                 if (i && idx == 0) 
 2243                         ++sgp;
 2244                 sgp->len[idx] = htobe32(segs[i].ss_len);
 2245                 sgp->addr[idx] = htobe64(segs[i].ss_paddr);
 2246                 idx ^= 1;
 2247         }
 2248         if (idx) {
 2249                 sgp->len[idx] = 0;
 2250                 sgp->addr[idx] = 0;
 2251         }
 2252 
 2253         sgl_flits = sgl_len(nsegs);
 2254         txqs.gen = gen;
 2255         txqs.pidx = pidx;
 2256         txqs.compl = 0;
 2257 
 2258         write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits,
 2259             from->wrh_hi, from->wrh_lo);
 2260 }
 2261 
 2262 /**
 2263  *      ofld_xmit - send a packet through an offload queue
 2264  *      @adap: the adapter
 2265  *      @q: the Tx offload queue
 2266  *      @m: the packet
 2267  *
 2268  *      Send an offload packet through an SGE offload queue.
 2269  */
 2270 static int
 2271 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 2272 {
 2273         int ret;
 2274         unsigned int ndesc;
 2275         unsigned int pidx, gen;
 2276         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2277         struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2278 
 2279         ndesc = G_HDR_NDESC(oh->flags);
 2280 
 2281         TXQ_LOCK(qs);
 2282 again:  reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2283         ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
 2284         if (__predict_false(ret)) {
 2285                 if (ret == 1) {
 2286                         TXQ_UNLOCK(qs);
 2287                         return (EINTR);
 2288                 }
 2289                 goto again;
 2290         }
 2291 
 2292         gen = q->gen;
 2293         q->in_use += ndesc;
 2294         pidx = q->pidx;
 2295         q->pidx += ndesc;
 2296         if (q->pidx >= q->size) {
 2297                 q->pidx -= q->size;
 2298                 q->gen ^= 1;
 2299         }
 2300 
 2301         write_ofld_wr(adap, m, q, pidx, gen, ndesc);
 2302         check_ring_tx_db(adap, q, 1);
 2303         TXQ_UNLOCK(qs);
 2304 
 2305         return (0);
 2306 }
 2307 
 2308 /**
 2309  *      restart_offloadq - restart a suspended offload queue
 2310  *      @qs: the queue set cotaining the offload queue
 2311  *
 2312  *      Resumes transmission on a suspended Tx offload queue.
 2313  */
 2314 static void
 2315 restart_offloadq(void *data, int npending)
 2316 {
 2317         struct mbuf *m;
 2318         struct sge_qset *qs = data;
 2319         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2320         adapter_t *adap = qs->port->adapter;
 2321         int cleaned;
 2322                 
 2323         TXQ_LOCK(qs);
 2324 again:  cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2325 
 2326         while ((m = mbufq_peek(&q->sendq)) != NULL) {
 2327                 unsigned int gen, pidx;
 2328                 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2329                 unsigned int ndesc = G_HDR_NDESC(oh->flags);
 2330 
 2331                 if (__predict_false(q->size - q->in_use < ndesc)) {
 2332                         setbit(&qs->txq_stopped, TXQ_OFLD);
 2333                         if (should_restart_tx(q) &&
 2334                             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
 2335                                 goto again;
 2336                         q->stops++;
 2337                         break;
 2338                 }
 2339 
 2340                 gen = q->gen;
 2341                 q->in_use += ndesc;
 2342                 pidx = q->pidx;
 2343                 q->pidx += ndesc;
 2344                 if (q->pidx >= q->size) {
 2345                         q->pidx -= q->size;
 2346                         q->gen ^= 1;
 2347                 }
 2348                 
 2349                 (void)mbufq_dequeue(&q->sendq);
 2350                 TXQ_UNLOCK(qs);
 2351                 write_ofld_wr(adap, m, q, pidx, gen, ndesc);
 2352                 TXQ_LOCK(qs);
 2353         }
 2354 #if USE_GTS
 2355         set_bit(TXQ_RUNNING, &q->flags);
 2356         set_bit(TXQ_LAST_PKT_DB, &q->flags);
 2357 #endif
 2358         TXQ_UNLOCK(qs);
 2359         wmb();
 2360         t3_write_reg(adap, A_SG_KDOORBELL,
 2361                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 2362 }
 2363 
 2364 /**
 2365  *      t3_offload_tx - send an offload packet
 2366  *      @m: the packet
 2367  *
 2368  *      Sends an offload packet.  We use the packet priority to select the
 2369  *      appropriate Tx queue as follows: bit 0 indicates whether the packet
 2370  *      should be sent as regular or control, bits 1-3 select the queue set.
 2371  */
 2372 int
 2373 t3_offload_tx(struct adapter *sc, struct mbuf *m)
 2374 {
 2375         struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2376         struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)];
 2377 
 2378         if (oh->flags & F_HDR_CTRL) {
 2379                 m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */
 2380                 return (ctrl_xmit(sc, qs, m));
 2381         } else
 2382                 return (ofld_xmit(sc, qs, m));
 2383 }
 2384 #endif
 2385 
 2386 static void
 2387 restart_tx(struct sge_qset *qs)
 2388 {
 2389         struct adapter *sc = qs->port->adapter;
 2390 
 2391         if (isset(&qs->txq_stopped, TXQ_OFLD) &&
 2392             should_restart_tx(&qs->txq[TXQ_OFLD]) &&
 2393             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
 2394                 qs->txq[TXQ_OFLD].restarts++;
 2395                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2396         }
 2397 
 2398         if (isset(&qs->txq_stopped, TXQ_CTRL) &&
 2399             should_restart_tx(&qs->txq[TXQ_CTRL]) &&
 2400             test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
 2401                 qs->txq[TXQ_CTRL].restarts++;
 2402                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2403         }
 2404 }
 2405 
 2406 /**
 2407  *      t3_sge_alloc_qset - initialize an SGE queue set
 2408  *      @sc: the controller softc
 2409  *      @id: the queue set id
 2410  *      @nports: how many Ethernet ports will be using this queue set
 2411  *      @irq_vec_idx: the IRQ vector index for response queue interrupts
 2412  *      @p: configuration parameters for this queue set
 2413  *      @ntxq: number of Tx queues for the queue set
 2414  *      @pi: port info for queue set
 2415  *
 2416  *      Allocate resources and initialize an SGE queue set.  A queue set
 2417  *      comprises a response queue, two Rx free-buffer queues, and up to 3
 2418  *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
 2419  *      queue, offload queue, and control queue.
 2420  */
 2421 int
 2422 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
 2423                   const struct qset_params *p, int ntxq, struct port_info *pi)
 2424 {
 2425         struct sge_qset *q = &sc->sge.qs[id];
 2426         int i, ret = 0;
 2427 
 2428         MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
 2429         q->port = pi;
 2430         q->adap = sc;
 2431 
 2432         if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
 2433             M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
 2434                 device_printf(sc->dev, "failed to allocate mbuf ring\n");
 2435                 goto err;
 2436         }
 2437         if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
 2438             M_NOWAIT | M_ZERO)) == NULL) {
 2439                 device_printf(sc->dev, "failed to allocate ifq\n");
 2440                 goto err;
 2441         }
 2442         ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);     
 2443         callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
 2444         callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
 2445         q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
 2446         q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
 2447 
 2448         init_qset_cntxt(q, id);
 2449         q->idx = id;
 2450         if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
 2451                     sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
 2452                     &q->fl[0].desc, &q->fl[0].sdesc,
 2453                     &q->fl[0].desc_tag, &q->fl[0].desc_map,
 2454                     sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
 2455                 printf("error %d from alloc ring fl0\n", ret);
 2456                 goto err;
 2457         }
 2458 
 2459         if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
 2460                     sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
 2461                     &q->fl[1].desc, &q->fl[1].sdesc,
 2462                     &q->fl[1].desc_tag, &q->fl[1].desc_map,
 2463                     sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
 2464                 printf("error %d from alloc ring fl1\n", ret);
 2465                 goto err;
 2466         }
 2467 
 2468         if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
 2469                     &q->rspq.phys_addr, &q->rspq.desc, NULL,
 2470                     &q->rspq.desc_tag, &q->rspq.desc_map,
 2471                     NULL, NULL)) != 0) {
 2472                 printf("error %d from alloc ring rspq\n", ret);
 2473                 goto err;
 2474         }
 2475 
 2476         snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
 2477             device_get_unit(sc->dev), irq_vec_idx);
 2478         MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
 2479 
 2480         for (i = 0; i < ntxq; ++i) {
 2481                 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
 2482 
 2483                 if ((ret = alloc_ring(sc, p->txq_size[i],
 2484                             sizeof(struct tx_desc), sz,
 2485                             &q->txq[i].phys_addr, &q->txq[i].desc,
 2486                             &q->txq[i].sdesc, &q->txq[i].desc_tag,
 2487                             &q->txq[i].desc_map,
 2488                             sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
 2489                         printf("error %d from alloc ring tx %i\n", ret, i);
 2490                         goto err;
 2491                 }
 2492                 mbufq_init(&q->txq[i].sendq);
 2493                 q->txq[i].gen = 1;
 2494                 q->txq[i].size = p->txq_size[i];
 2495         }
 2496 
 2497 #ifdef TCP_OFFLOAD
 2498         TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
 2499 #endif
 2500         TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
 2501         TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2502         TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2503 
 2504         q->fl[0].gen = q->fl[1].gen = 1;
 2505         q->fl[0].size = p->fl_size;
 2506         q->fl[1].size = p->jumbo_size;
 2507 
 2508         q->rspq.gen = 1;
 2509         q->rspq.cidx = 0;
 2510         q->rspq.size = p->rspq_size;
 2511 
 2512         q->txq[TXQ_ETH].stop_thres = nports *
 2513             flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
 2514 
 2515         q->fl[0].buf_size = MCLBYTES;
 2516         q->fl[0].zone = zone_pack;
 2517         q->fl[0].type = EXT_PACKET;
 2518 
 2519         if (p->jumbo_buf_size ==  MJUM16BYTES) {
 2520                 q->fl[1].zone = zone_jumbo16;
 2521                 q->fl[1].type = EXT_JUMBO16;
 2522         } else if (p->jumbo_buf_size ==  MJUM9BYTES) {
 2523                 q->fl[1].zone = zone_jumbo9;
 2524                 q->fl[1].type = EXT_JUMBO9;             
 2525         } else if (p->jumbo_buf_size ==  MJUMPAGESIZE) {
 2526                 q->fl[1].zone = zone_jumbop;
 2527                 q->fl[1].type = EXT_JUMBOP;
 2528         } else {
 2529                 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
 2530                 ret = EDOOFUS;
 2531                 goto err;
 2532         }
 2533         q->fl[1].buf_size = p->jumbo_buf_size;
 2534 
 2535         /* Allocate and setup the lro_ctrl structure */
 2536         q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
 2537 #if defined(INET6) || defined(INET)
 2538         ret = tcp_lro_init(&q->lro.ctrl);
 2539         if (ret) {
 2540                 printf("error %d from tcp_lro_init\n", ret);
 2541                 goto err;
 2542         }
 2543 #endif
 2544         q->lro.ctrl.ifp = pi->ifp;
 2545 
 2546         mtx_lock_spin(&sc->sge.reg_lock);
 2547         ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
 2548                                    q->rspq.phys_addr, q->rspq.size,
 2549                                    q->fl[0].buf_size, 1, 0);
 2550         if (ret) {
 2551                 printf("error %d from t3_sge_init_rspcntxt\n", ret);
 2552                 goto err_unlock;
 2553         }
 2554 
 2555         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2556                 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
 2557                                           q->fl[i].phys_addr, q->fl[i].size,
 2558                                           q->fl[i].buf_size, p->cong_thres, 1,
 2559                                           0);
 2560                 if (ret) {
 2561                         printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
 2562                         goto err_unlock;
 2563                 }
 2564         }
 2565 
 2566         ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
 2567                                  SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
 2568                                  q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
 2569                                  1, 0);
 2570         if (ret) {
 2571                 printf("error %d from t3_sge_init_ecntxt\n", ret);
 2572                 goto err_unlock;
 2573         }
 2574 
 2575         if (ntxq > 1) {
 2576                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
 2577                                          USE_GTS, SGE_CNTXT_OFLD, id,
 2578                                          q->txq[TXQ_OFLD].phys_addr,
 2579                                          q->txq[TXQ_OFLD].size, 0, 1, 0);
 2580                 if (ret) {
 2581                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2582                         goto err_unlock;
 2583                 }
 2584         }
 2585 
 2586         if (ntxq > 2) {
 2587                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
 2588                                          SGE_CNTXT_CTRL, id,
 2589                                          q->txq[TXQ_CTRL].phys_addr,
 2590                                          q->txq[TXQ_CTRL].size,
 2591                                          q->txq[TXQ_CTRL].token, 1, 0);
 2592                 if (ret) {
 2593                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2594                         goto err_unlock;
 2595                 }
 2596         }
 2597 
 2598         mtx_unlock_spin(&sc->sge.reg_lock);
 2599         t3_update_qset_coalesce(q, p);
 2600 
 2601         refill_fl(sc, &q->fl[0], q->fl[0].size);
 2602         refill_fl(sc, &q->fl[1], q->fl[1].size);
 2603         refill_rspq(sc, &q->rspq, q->rspq.size - 1);
 2604 
 2605         t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
 2606                      V_NEWTIMER(q->rspq.holdoff_tmr));
 2607 
 2608         return (0);
 2609 
 2610 err_unlock:
 2611         mtx_unlock_spin(&sc->sge.reg_lock);
 2612 err:    
 2613         TXQ_LOCK(q);
 2614         t3_free_qset(sc, q);
 2615 
 2616         return (ret);
 2617 }
 2618 
 2619 /*
 2620  * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
 2621  * ethernet data.  Hardware assistance with various checksums and any vlan tag
 2622  * will also be taken into account here.
 2623  */
 2624 void
 2625 t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad)
 2626 {
 2627         struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
 2628         struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
 2629         struct ifnet *ifp = pi->ifp;
 2630         
 2631         if (cpl->vlan_valid) {
 2632                 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
 2633                 m->m_flags |= M_VLANTAG;
 2634         } 
 2635 
 2636         m->m_pkthdr.rcvif = ifp;
 2637         /*
 2638          * adjust after conversion to mbuf chain
 2639          */
 2640         m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
 2641         m->m_len -= (sizeof(*cpl) + ethpad);
 2642         m->m_data += (sizeof(*cpl) + ethpad);
 2643 
 2644         if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) {
 2645                 struct ether_header *eh = mtod(m, void *);
 2646                 uint16_t eh_type;
 2647 
 2648                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 2649                         struct ether_vlan_header *evh = mtod(m, void *);
 2650 
 2651                         eh_type = evh->evl_proto;
 2652                 } else
 2653                         eh_type = eh->ether_type;
 2654 
 2655                 if (ifp->if_capenable & IFCAP_RXCSUM &&
 2656                     eh_type == htons(ETHERTYPE_IP)) {
 2657                         m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
 2658                             CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 2659                         m->m_pkthdr.csum_data = 0xffff;
 2660                 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
 2661                     eh_type == htons(ETHERTYPE_IPV6)) {
 2662                         m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
 2663                             CSUM_PSEUDO_HDR);
 2664                         m->m_pkthdr.csum_data = 0xffff;
 2665                 }
 2666         }
 2667 }
 2668 
 2669 /**
 2670  *      get_packet - return the next ingress packet buffer from a free list
 2671  *      @adap: the adapter that received the packet
 2672  *      @drop_thres: # of remaining buffers before we start dropping packets
 2673  *      @qs: the qset that the SGE free list holding the packet belongs to
 2674  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
 2675  *      @r: response descriptor 
 2676  *
 2677  *      Get the next packet from a free list and complete setup of the
 2678  *      sk_buff.  If the packet is small we make a copy and recycle the
 2679  *      original buffer, otherwise we use the original buffer itself.  If a
 2680  *      positive drop threshold is supplied packets are dropped and their
 2681  *      buffers recycled if (a) the number of remaining buffers is under the
 2682  *      threshold and the packet is too big to copy, or (b) the packet should
 2683  *      be copied but there is no memory for the copy.
 2684  */
 2685 static int
 2686 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
 2687     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
 2688 {
 2689 
 2690         unsigned int len_cq =  ntohl(r->len_cq);
 2691         struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
 2692         int mask, cidx = fl->cidx;
 2693         struct rx_sw_desc *sd = &fl->sdesc[cidx];
 2694         uint32_t len = G_RSPD_LEN(len_cq);
 2695         uint32_t flags = M_EXT;
 2696         uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
 2697         caddr_t cl;
 2698         struct mbuf *m;
 2699         int ret = 0;
 2700 
 2701         mask = fl->size - 1;
 2702         prefetch(fl->sdesc[(cidx + 1) & mask].m);
 2703         prefetch(fl->sdesc[(cidx + 2) & mask].m);
 2704         prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
 2705         prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 
 2706 
 2707         fl->credits--;
 2708         bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
 2709         
 2710         if (recycle_enable && len <= SGE_RX_COPY_THRES &&
 2711             sopeop == RSPQ_SOP_EOP) {
 2712                 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
 2713                         goto skip_recycle;
 2714                 cl = mtod(m, void *);
 2715                 memcpy(cl, sd->rxsd_cl, len);
 2716                 recycle_rx_buf(adap, fl, fl->cidx);
 2717                 m->m_pkthdr.len = m->m_len = len;
 2718                 m->m_flags = 0;
 2719                 mh->mh_head = mh->mh_tail = m;
 2720                 ret = 1;
 2721                 goto done;
 2722         } else {
 2723         skip_recycle:
 2724                 bus_dmamap_unload(fl->entry_tag, sd->map);
 2725                 cl = sd->rxsd_cl;
 2726                 m = sd->m;
 2727 
 2728                 if ((sopeop == RSPQ_SOP_EOP) ||
 2729                     (sopeop == RSPQ_SOP))
 2730                         flags |= M_PKTHDR;
 2731                 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags);
 2732                 if (fl->zone == zone_pack) {
 2733                         /*
 2734                          * restore clobbered data pointer
 2735                          */
 2736                         m->m_data = m->m_ext.ext_buf;
 2737                 } else {
 2738                         m_cljset(m, cl, fl->type);
 2739                 }
 2740                 m->m_len = len;
 2741         }               
 2742         switch(sopeop) {
 2743         case RSPQ_SOP_EOP:
 2744                 ret = 1;
 2745                 /* FALLTHROUGH */
 2746         case RSPQ_SOP:
 2747                 mh->mh_head = mh->mh_tail = m;
 2748                 m->m_pkthdr.len = len;
 2749                 break;
 2750         case RSPQ_EOP:
 2751                 ret = 1;
 2752                 /* FALLTHROUGH */
 2753         case RSPQ_NSOP_NEOP:
 2754                 if (mh->mh_tail == NULL) {
 2755                         log(LOG_ERR, "discarding intermediate descriptor entry\n");
 2756                         m_freem(m);
 2757                         break;
 2758                 }
 2759                 mh->mh_tail->m_next = m;
 2760                 mh->mh_tail = m;
 2761                 mh->mh_head->m_pkthdr.len += len;
 2762                 break;
 2763         }
 2764         if (cxgb_debug)
 2765                 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
 2766 done:
 2767         if (++fl->cidx == fl->size)
 2768                 fl->cidx = 0;
 2769 
 2770         return (ret);
 2771 }
 2772 
 2773 /**
 2774  *      handle_rsp_cntrl_info - handles control information in a response
 2775  *      @qs: the queue set corresponding to the response
 2776  *      @flags: the response control flags
 2777  *
 2778  *      Handles the control information of an SGE response, such as GTS
 2779  *      indications and completion credits for the queue set's Tx queues.
 2780  *      HW coalesces credits, we don't do any extra SW coalescing.
 2781  */
 2782 static __inline void
 2783 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
 2784 {
 2785         unsigned int credits;
 2786 
 2787 #if USE_GTS
 2788         if (flags & F_RSPD_TXQ0_GTS)
 2789                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
 2790 #endif
 2791         credits = G_RSPD_TXQ0_CR(flags);
 2792         if (credits) 
 2793                 qs->txq[TXQ_ETH].processed += credits;
 2794 
 2795         credits = G_RSPD_TXQ2_CR(flags);
 2796         if (credits)
 2797                 qs->txq[TXQ_CTRL].processed += credits;
 2798 
 2799 # if USE_GTS
 2800         if (flags & F_RSPD_TXQ1_GTS)
 2801                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
 2802 # endif
 2803         credits = G_RSPD_TXQ1_CR(flags);
 2804         if (credits)
 2805                 qs->txq[TXQ_OFLD].processed += credits;
 2806 
 2807 }
 2808 
 2809 static void
 2810 check_ring_db(adapter_t *adap, struct sge_qset *qs,
 2811     unsigned int sleeping)
 2812 {
 2813         ;
 2814 }
 2815 
 2816 /**
 2817  *      process_responses - process responses from an SGE response queue
 2818  *      @adap: the adapter
 2819  *      @qs: the queue set to which the response queue belongs
 2820  *      @budget: how many responses can be processed in this round
 2821  *
 2822  *      Process responses from an SGE response queue up to the supplied budget.
 2823  *      Responses include received packets as well as credits and other events
 2824  *      for the queues that belong to the response queue's queue set.
 2825  *      A negative budget is effectively unlimited.
 2826  *
 2827  *      Additionally choose the interrupt holdoff time for the next interrupt
 2828  *      on this queue.  If the system is under memory shortage use a fairly
 2829  *      long delay to help recovery.
 2830  */
 2831 static int
 2832 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
 2833 {
 2834         struct sge_rspq *rspq = &qs->rspq;
 2835         struct rsp_desc *r = &rspq->desc[rspq->cidx];
 2836         int budget_left = budget;
 2837         unsigned int sleeping = 0;
 2838 #if defined(INET6) || defined(INET)
 2839         int lro_enabled = qs->lro.enabled;
 2840         int skip_lro;
 2841         struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
 2842 #endif
 2843         struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
 2844 #ifdef DEBUG    
 2845         static int last_holdoff = 0;
 2846         if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
 2847                 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
 2848                 last_holdoff = rspq->holdoff_tmr;
 2849         }
 2850 #endif
 2851         rspq->next_holdoff = rspq->holdoff_tmr;
 2852 
 2853         while (__predict_true(budget_left && is_new_response(r, rspq))) {
 2854                 int eth, eop = 0, ethpad = 0;
 2855                 uint32_t flags = ntohl(r->flags);
 2856                 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
 2857                 uint8_t opcode = r->rss_hdr.opcode;
 2858                 
 2859                 eth = (opcode == CPL_RX_PKT);
 2860                 
 2861                 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
 2862                         struct mbuf *m;
 2863 
 2864                         if (cxgb_debug)
 2865                                 printf("async notification\n");
 2866 
 2867                         if (mh->mh_head == NULL) {
 2868                                 mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA);
 2869                                 m = mh->mh_head;
 2870                         } else {
 2871                                 m = m_gethdr(M_NOWAIT, MT_DATA);
 2872                         }
 2873                         if (m == NULL)
 2874                                 goto no_mem;
 2875 
 2876                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
 2877                         m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
 2878                         *mtod(m, char *) = CPL_ASYNC_NOTIF;
 2879                         opcode = CPL_ASYNC_NOTIF;
 2880                         eop = 1;
 2881                         rspq->async_notif++;
 2882                         goto skip;
 2883                 } else if  (flags & F_RSPD_IMM_DATA_VALID) {
 2884                         struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
 2885 
 2886                         if (m == NULL) {        
 2887                 no_mem:
 2888                                 rspq->next_holdoff = NOMEM_INTR_DELAY;
 2889                                 budget_left--;
 2890                                 break;
 2891                         }
 2892                         if (mh->mh_head == NULL)
 2893                                 mh->mh_head = m;
 2894                         else 
 2895                                 mh->mh_tail->m_next = m;
 2896                         mh->mh_tail = m;
 2897 
 2898                         get_imm_packet(adap, r, m);
 2899                         mh->mh_head->m_pkthdr.len += m->m_len;
 2900                         eop = 1;
 2901                         rspq->imm_data++;
 2902                 } else if (r->len_cq) {
 2903                         int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
 2904                         
 2905                         eop = get_packet(adap, drop_thresh, qs, mh, r);
 2906                         if (eop) {
 2907                                 if (r->rss_hdr.hash_type && !adap->timestamp)
 2908                                         mh->mh_head->m_flags |= M_FLOWID;
 2909                                 mh->mh_head->m_pkthdr.flowid = rss_hash;
 2910                         }
 2911                         
 2912                         ethpad = 2;
 2913                 } else {
 2914                         rspq->pure_rsps++;
 2915                 }
 2916         skip:
 2917                 if (flags & RSPD_CTRL_MASK) {
 2918                         sleeping |= flags & RSPD_GTS_MASK;
 2919                         handle_rsp_cntrl_info(qs, flags);
 2920                 }
 2921 
 2922                 if (!eth && eop) {
 2923                         rspq->offload_pkts++;
 2924 #ifdef TCP_OFFLOAD
 2925                         adap->cpl_handler[opcode](qs, r, mh->mh_head);
 2926 #else
 2927                         m_freem(mh->mh_head);
 2928 #endif
 2929                         mh->mh_head = NULL;
 2930                 } else if (eth && eop) {
 2931                         struct mbuf *m = mh->mh_head;
 2932 
 2933                         t3_rx_eth(adap, m, ethpad);
 2934 
 2935                         /*
 2936                          * The T304 sends incoming packets on any qset.  If LRO
 2937                          * is also enabled, we could end up sending packet up
 2938                          * lro_ctrl->ifp's input.  That is incorrect.
 2939                          *
 2940                          * The mbuf's rcvif was derived from the cpl header and
 2941                          * is accurate.  Skip LRO and just use that.
 2942                          */
 2943 #if defined(INET6) || defined(INET)
 2944                         skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
 2945 
 2946                         if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
 2947                             && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
 2948                             ) {
 2949                                 /* successfully queue'd for LRO */
 2950                         } else
 2951 #endif
 2952                         {
 2953                                 /*
 2954                                  * LRO not enabled, packet unsuitable for LRO,
 2955                                  * or unable to queue.  Pass it up right now in
 2956                                  * either case.
 2957                                  */
 2958                                 struct ifnet *ifp = m->m_pkthdr.rcvif;
 2959                                 (*ifp->if_input)(ifp, m);
 2960                         }
 2961                         mh->mh_head = NULL;
 2962 
 2963                 }
 2964 
 2965                 r++;
 2966                 if (__predict_false(++rspq->cidx == rspq->size)) {
 2967                         rspq->cidx = 0;
 2968                         rspq->gen ^= 1;
 2969                         r = rspq->desc;
 2970                 }
 2971 
 2972                 if (++rspq->credits >= 64) {
 2973                         refill_rspq(adap, rspq, rspq->credits);
 2974                         rspq->credits = 0;
 2975                 }
 2976                 __refill_fl_lt(adap, &qs->fl[0], 32);
 2977                 __refill_fl_lt(adap, &qs->fl[1], 32);
 2978                 --budget_left;
 2979         }
 2980 
 2981 #if defined(INET6) || defined(INET)
 2982         /* Flush LRO */
 2983         while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
 2984                 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
 2985                 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
 2986                 tcp_lro_flush(lro_ctrl, queued);
 2987         }
 2988 #endif
 2989 
 2990         if (sleeping)
 2991                 check_ring_db(adap, qs, sleeping);
 2992 
 2993         mb();  /* commit Tx queue processed updates */
 2994         if (__predict_false(qs->txq_stopped > 1))
 2995                 restart_tx(qs);
 2996 
 2997         __refill_fl_lt(adap, &qs->fl[0], 512);
 2998         __refill_fl_lt(adap, &qs->fl[1], 512);
 2999         budget -= budget_left;
 3000         return (budget);
 3001 }
 3002 
 3003 /*
 3004  * A helper function that processes responses and issues GTS.
 3005  */
 3006 static __inline int
 3007 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
 3008 {
 3009         int work;
 3010         static int last_holdoff = 0;
 3011         
 3012         work = process_responses(adap, rspq_to_qset(rq), -1);
 3013 
 3014         if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
 3015                 printf("next_holdoff=%d\n", rq->next_holdoff);
 3016                 last_holdoff = rq->next_holdoff;
 3017         }
 3018         t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
 3019             V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
 3020         
 3021         return (work);
 3022 }
 3023 
 3024 
 3025 /*
 3026  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
 3027  * Handles data events from SGE response queues as well as error and other
 3028  * async events as they all use the same interrupt pin.  We use one SGE
 3029  * response queue per port in this mode and protect all response queues with
 3030  * queue 0's lock.
 3031  */
 3032 void
 3033 t3b_intr(void *data)
 3034 {
 3035         uint32_t i, map;
 3036         adapter_t *adap = data;
 3037         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3038         
 3039         t3_write_reg(adap, A_PL_CLI, 0);
 3040         map = t3_read_reg(adap, A_SG_DATA_INTR);
 3041 
 3042         if (!map) 
 3043                 return;
 3044 
 3045         if (__predict_false(map & F_ERRINTR)) {
 3046                 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
 3047                 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
 3048                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3049         }
 3050 
 3051         mtx_lock(&q0->lock);
 3052         for_each_port(adap, i)
 3053             if (map & (1 << i))
 3054                         process_responses_gts(adap, &adap->sge.qs[i].rspq);
 3055         mtx_unlock(&q0->lock);
 3056 }
 3057 
 3058 /*
 3059  * The MSI interrupt handler.  This needs to handle data events from SGE
 3060  * response queues as well as error and other async events as they all use
 3061  * the same MSI vector.  We use one SGE response queue per port in this mode
 3062  * and protect all response queues with queue 0's lock.
 3063  */
 3064 void
 3065 t3_intr_msi(void *data)
 3066 {
 3067         adapter_t *adap = data;
 3068         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3069         int i, new_packets = 0;
 3070 
 3071         mtx_lock(&q0->lock);
 3072 
 3073         for_each_port(adap, i)
 3074             if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 
 3075                     new_packets = 1;
 3076         mtx_unlock(&q0->lock);
 3077         if (new_packets == 0) {
 3078                 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
 3079                 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
 3080                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3081         }
 3082 }
 3083 
 3084 void
 3085 t3_intr_msix(void *data)
 3086 {
 3087         struct sge_qset *qs = data;
 3088         adapter_t *adap = qs->port->adapter;
 3089         struct sge_rspq *rspq = &qs->rspq;
 3090 
 3091         if (process_responses_gts(adap, rspq) == 0)
 3092                 rspq->unhandled_irqs++;
 3093 }
 3094 
 3095 #define QDUMP_SBUF_SIZE         32 * 400
 3096 static int
 3097 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
 3098 {
 3099         struct sge_rspq *rspq;
 3100         struct sge_qset *qs;
 3101         int i, err, dump_end, idx;
 3102         struct sbuf *sb;
 3103         struct rsp_desc *rspd;
 3104         uint32_t data[4];
 3105         
 3106         rspq = arg1;
 3107         qs = rspq_to_qset(rspq);
 3108         if (rspq->rspq_dump_count == 0) 
 3109                 return (0);
 3110         if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
 3111                 log(LOG_WARNING,
 3112                     "dump count is too large %d\n", rspq->rspq_dump_count);
 3113                 rspq->rspq_dump_count = 0;
 3114                 return (EINVAL);
 3115         }
 3116         if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
 3117                 log(LOG_WARNING,
 3118                     "dump start of %d is greater than queue size\n",
 3119                     rspq->rspq_dump_start);
 3120                 rspq->rspq_dump_start = 0;
 3121                 return (EINVAL);
 3122         }
 3123         err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
 3124         if (err)
 3125                 return (err);
 3126         err = sysctl_wire_old_buffer(req, 0);
 3127         if (err)
 3128                 return (err);
 3129         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3130 
 3131         sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
 3132             (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
 3133             ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
 3134         sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
 3135             ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
 3136         
 3137         sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
 3138             (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
 3139         
 3140         dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
 3141         for (i = rspq->rspq_dump_start; i < dump_end; i++) {
 3142                 idx = i & (RSPQ_Q_SIZE-1);
 3143                 
 3144                 rspd = &rspq->desc[idx];
 3145                 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
 3146                     idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
 3147                     rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
 3148                 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
 3149                     rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
 3150                     be32toh(rspd->len_cq), rspd->intr_gen);
 3151         }
 3152 
 3153         err = sbuf_finish(sb);
 3154         /* Output a trailing NUL. */
 3155         if (err == 0)
 3156                 err = SYSCTL_OUT(req, "", 1);
 3157         sbuf_delete(sb);
 3158         return (err);
 3159 }       
 3160 
 3161 static int
 3162 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
 3163 {
 3164         struct sge_txq *txq;
 3165         struct sge_qset *qs;
 3166         int i, j, err, dump_end;
 3167         struct sbuf *sb;
 3168         struct tx_desc *txd;
 3169         uint32_t *WR, wr_hi, wr_lo, gen;
 3170         uint32_t data[4];
 3171         
 3172         txq = arg1;
 3173         qs = txq_to_qset(txq, TXQ_ETH);
 3174         if (txq->txq_dump_count == 0) {
 3175                 return (0);
 3176         }
 3177         if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
 3178                 log(LOG_WARNING,
 3179                     "dump count is too large %d\n", txq->txq_dump_count);
 3180                 txq->txq_dump_count = 1;
 3181                 return (EINVAL);
 3182         }
 3183         if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
 3184                 log(LOG_WARNING,
 3185                     "dump start of %d is greater than queue size\n",
 3186                     txq->txq_dump_start);
 3187                 txq->txq_dump_start = 0;
 3188                 return (EINVAL);
 3189         }
 3190         err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
 3191         if (err)
 3192                 return (err);
 3193         err = sysctl_wire_old_buffer(req, 0);
 3194         if (err)
 3195                 return (err);
 3196         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3197 
 3198         sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
 3199             (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 
 3200             (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
 3201         sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
 3202             ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
 3203             ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
 3204         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3205             txq->txq_dump_start,
 3206             (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
 3207 
 3208         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3209         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3210                 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
 3211                 WR = (uint32_t *)txd->flit;
 3212                 wr_hi = ntohl(WR[0]);
 3213                 wr_lo = ntohl(WR[1]);           
 3214                 gen = G_WR_GEN(wr_lo);
 3215                 
 3216                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3217                     wr_hi, wr_lo, gen);
 3218                 for (j = 2; j < 30; j += 4) 
 3219                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3220                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3221 
 3222         }
 3223         err = sbuf_finish(sb);
 3224         /* Output a trailing NUL. */
 3225         if (err == 0)
 3226                 err = SYSCTL_OUT(req, "", 1);
 3227         sbuf_delete(sb);
 3228         return (err);
 3229 }
 3230 
 3231 static int
 3232 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
 3233 {
 3234         struct sge_txq *txq;
 3235         struct sge_qset *qs;
 3236         int i, j, err, dump_end;
 3237         struct sbuf *sb;
 3238         struct tx_desc *txd;
 3239         uint32_t *WR, wr_hi, wr_lo, gen;
 3240         
 3241         txq = arg1;
 3242         qs = txq_to_qset(txq, TXQ_CTRL);
 3243         if (txq->txq_dump_count == 0) {
 3244                 return (0);
 3245         }
 3246         if (txq->txq_dump_count > 256) {
 3247                 log(LOG_WARNING,
 3248                     "dump count is too large %d\n", txq->txq_dump_count);
 3249                 txq->txq_dump_count = 1;
 3250                 return (EINVAL);
 3251         }
 3252         if (txq->txq_dump_start > 255) {
 3253                 log(LOG_WARNING,
 3254                     "dump start of %d is greater than queue size\n",
 3255                     txq->txq_dump_start);
 3256                 txq->txq_dump_start = 0;
 3257                 return (EINVAL);
 3258         }
 3259 
 3260         err = sysctl_wire_old_buffer(req, 0);
 3261         if (err != 0)
 3262                 return (err);
 3263         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3264         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3265             txq->txq_dump_start,
 3266             (txq->txq_dump_start + txq->txq_dump_count) & 255);
 3267 
 3268         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3269         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3270                 txd = &txq->desc[i & (255)];
 3271                 WR = (uint32_t *)txd->flit;
 3272                 wr_hi = ntohl(WR[0]);
 3273                 wr_lo = ntohl(WR[1]);           
 3274                 gen = G_WR_GEN(wr_lo);
 3275                 
 3276                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3277                     wr_hi, wr_lo, gen);
 3278                 for (j = 2; j < 30; j += 4) 
 3279                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3280                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3281 
 3282         }
 3283         err = sbuf_finish(sb);
 3284         /* Output a trailing NUL. */
 3285         if (err == 0)
 3286                 err = SYSCTL_OUT(req, "", 1);
 3287         sbuf_delete(sb);
 3288         return (err);
 3289 }
 3290 
 3291 static int
 3292 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
 3293 {
 3294         adapter_t *sc = arg1;
 3295         struct qset_params *qsp = &sc->params.sge.qset[0]; 
 3296         int coalesce_usecs;     
 3297         struct sge_qset *qs;
 3298         int i, j, err, nqsets = 0;
 3299         struct mtx *lock;
 3300 
 3301         if ((sc->flags & FULL_INIT_DONE) == 0)
 3302                 return (ENXIO);
 3303                 
 3304         coalesce_usecs = qsp->coalesce_usecs;
 3305         err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
 3306 
 3307         if (err != 0) {
 3308                 return (err);
 3309         }
 3310         if (coalesce_usecs == qsp->coalesce_usecs)
 3311                 return (0);
 3312 
 3313         for (i = 0; i < sc->params.nports; i++) 
 3314                 for (j = 0; j < sc->port[i].nqsets; j++)
 3315                         nqsets++;
 3316 
 3317         coalesce_usecs = max(1, coalesce_usecs);
 3318 
 3319         for (i = 0; i < nqsets; i++) {
 3320                 qs = &sc->sge.qs[i];
 3321                 qsp = &sc->params.sge.qset[i];
 3322                 qsp->coalesce_usecs = coalesce_usecs;
 3323                 
 3324                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 3325                             &sc->sge.qs[0].rspq.lock;
 3326 
 3327                 mtx_lock(lock);
 3328                 t3_update_qset_coalesce(qs, qsp);
 3329                 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
 3330                     V_NEWTIMER(qs->rspq.holdoff_tmr));
 3331                 mtx_unlock(lock);
 3332         }
 3333 
 3334         return (0);
 3335 }
 3336 
 3337 static int
 3338 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)
 3339 {
 3340         adapter_t *sc = arg1;
 3341         int rc, timestamp;
 3342 
 3343         if ((sc->flags & FULL_INIT_DONE) == 0)
 3344                 return (ENXIO);
 3345 
 3346         timestamp = sc->timestamp;
 3347         rc = sysctl_handle_int(oidp, &timestamp, arg2, req);
 3348 
 3349         if (rc != 0)
 3350                 return (rc);
 3351 
 3352         if (timestamp != sc->timestamp) {
 3353                 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS,
 3354                     timestamp ? F_ENABLERXPKTTMSTPRSS : 0);
 3355                 sc->timestamp = timestamp;
 3356         }
 3357 
 3358         return (0);
 3359 }
 3360 
 3361 void
 3362 t3_add_attach_sysctls(adapter_t *sc)
 3363 {
 3364         struct sysctl_ctx_list *ctx;
 3365         struct sysctl_oid_list *children;
 3366 
 3367         ctx = device_get_sysctl_ctx(sc->dev);
 3368         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3369 
 3370         /* random information */
 3371         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3372             "firmware_version",
 3373             CTLFLAG_RD, &sc->fw_version,
 3374             0, "firmware version");
 3375         SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
 3376             "hw_revision",
 3377             CTLFLAG_RD, &sc->params.rev,
 3378             0, "chip model");
 3379         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3380             "port_types",
 3381             CTLFLAG_RD, &sc->port_types,
 3382             0, "type of ports");
 3383         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3384             "enable_debug",
 3385             CTLFLAG_RW, &cxgb_debug,
 3386             0, "enable verbose debugging output");
 3387         SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce",
 3388             CTLFLAG_RD, &sc->tunq_coalesce,
 3389             "#tunneled packets freed");
 3390         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3391             "txq_overrun",
 3392             CTLFLAG_RD, &txq_fills,
 3393             0, "#times txq overrun");
 3394         SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
 3395             "core_clock",
 3396             CTLFLAG_RD, &sc->params.vpd.cclk,
 3397             0, "core clock frequency (in KHz)");
 3398 }
 3399 
 3400 
 3401 static const char *rspq_name = "rspq";
 3402 static const char *txq_names[] =
 3403 {
 3404         "txq_eth",
 3405         "txq_ofld",
 3406         "txq_ctrl"      
 3407 };
 3408 
 3409 static int
 3410 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
 3411 {
 3412         struct port_info *p = arg1;
 3413         uint64_t *parg;
 3414 
 3415         if (!p)
 3416                 return (EINVAL);
 3417 
 3418         parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
 3419         PORT_LOCK(p);
 3420         t3_mac_update_stats(&p->mac);
 3421         PORT_UNLOCK(p);
 3422 
 3423         return (sysctl_handle_64(oidp, parg, 0, req));
 3424 }
 3425 
 3426 void
 3427 t3_add_configured_sysctls(adapter_t *sc)
 3428 {
 3429         struct sysctl_ctx_list *ctx;
 3430         struct sysctl_oid_list *children;
 3431         int i, j;
 3432         
 3433         ctx = device_get_sysctl_ctx(sc->dev);
 3434         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3435 
 3436         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3437             "intr_coal",
 3438             CTLTYPE_INT|CTLFLAG_RW, sc,
 3439             0, t3_set_coalesce_usecs,
 3440             "I", "interrupt coalescing timer (us)");
 3441 
 3442         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3443             "pkt_timestamp",
 3444             CTLTYPE_INT | CTLFLAG_RW, sc,
 3445             0, t3_pkt_timestamp,
 3446             "I", "provide packet timestamp instead of connection hash");
 3447 
 3448         for (i = 0; i < sc->params.nports; i++) {
 3449                 struct port_info *pi = &sc->port[i];
 3450                 struct sysctl_oid *poid;
 3451                 struct sysctl_oid_list *poidlist;
 3452                 struct mac_stats *mstats = &pi->mac.stats;
 3453                 
 3454                 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
 3455                 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 
 3456                     pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
 3457                 poidlist = SYSCTL_CHILDREN(poid);
 3458                 SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO,
 3459                     "nqsets", CTLFLAG_RD, &pi->nqsets,
 3460                     0, "#queue sets");
 3461 
 3462                 for (j = 0; j < pi->nqsets; j++) {
 3463                         struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
 3464                         struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
 3465                                           *ctrlqpoid, *lropoid;
 3466                         struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
 3467                                                *txqpoidlist, *ctrlqpoidlist,
 3468                                                *lropoidlist;
 3469                         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 3470                         
 3471                         snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
 3472                         
 3473                         qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 
 3474                             qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
 3475                         qspoidlist = SYSCTL_CHILDREN(qspoid);
 3476 
 3477                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
 3478                                         CTLFLAG_RD, &qs->fl[0].empty, 0,
 3479                                         "freelist #0 empty");
 3480                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
 3481                                         CTLFLAG_RD, &qs->fl[1].empty, 0,
 3482                                         "freelist #1 empty");
 3483 
 3484                         rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3485                             rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
 3486                         rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
 3487 
 3488                         txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3489                             txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
 3490                         txqpoidlist = SYSCTL_CHILDREN(txqpoid);
 3491 
 3492                         ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3493                             txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
 3494                         ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
 3495 
 3496                         lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3497                             "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
 3498                         lropoidlist = SYSCTL_CHILDREN(lropoid);
 3499 
 3500                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
 3501                             CTLFLAG_RD, &qs->rspq.size,
 3502                             0, "#entries in response queue");
 3503                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
 3504                             CTLFLAG_RD, &qs->rspq.cidx,
 3505                             0, "consumer index");
 3506                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
 3507                             CTLFLAG_RD, &qs->rspq.credits,
 3508                             0, "#credits");
 3509                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
 3510                             CTLFLAG_RD, &qs->rspq.starved,
 3511                             0, "#times starved");
 3512                         SYSCTL_ADD_ULONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
 3513                             CTLFLAG_RD, &qs->rspq.phys_addr,
 3514                             "physical_address_of the queue");
 3515                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
 3516                             CTLFLAG_RW, &qs->rspq.rspq_dump_start,
 3517                             0, "start rspq dump entry");
 3518                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
 3519                             CTLFLAG_RW, &qs->rspq.rspq_dump_count,
 3520                             0, "#rspq entries to dump");
 3521                         SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
 3522                             CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
 3523                             0, t3_dump_rspq, "A", "dump of the response queue");
 3524 
 3525                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
 3526                             CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
 3527                             "#tunneled packets dropped");
 3528                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
 3529                             CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
 3530                             0, "#tunneled packets waiting to be sent");
 3531 #if 0                   
 3532                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
 3533                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
 3534                             0, "#tunneled packets queue producer index");
 3535                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
 3536                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
 3537                             0, "#tunneled packets queue consumer index");
 3538 #endif                  
 3539                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed",
 3540                             CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
 3541                             0, "#tunneled packets processed by the card");
 3542                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
 3543                             CTLFLAG_RD, &txq->cleaned,
 3544                             0, "#tunneled packets cleaned");
 3545                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
 3546                             CTLFLAG_RD, &txq->in_use,
 3547                             0, "#tunneled packet slots in use");
 3548                         SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
 3549                             CTLFLAG_RD, &txq->txq_frees,
 3550                             "#tunneled packets freed");
 3551                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
 3552                             CTLFLAG_RD, &txq->txq_skipped,
 3553                             0, "#tunneled packet descriptors skipped");
 3554                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
 3555                             CTLFLAG_RD, &txq->txq_coalesced,
 3556                             "#tunneled packets coalesced");
 3557                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
 3558                             CTLFLAG_RD, &txq->txq_enqueued,
 3559                             0, "#tunneled packets enqueued to hardware");
 3560                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
 3561                             CTLFLAG_RD, &qs->txq_stopped,
 3562                             0, "tx queues stopped");
 3563                         SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
 3564                             CTLFLAG_RD, &txq->phys_addr,
 3565                             "physical_address_of the queue");
 3566                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
 3567                             CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
 3568                             0, "txq generation");
 3569                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
 3570                             CTLFLAG_RD, &txq->cidx,
 3571                             0, "hardware queue cidx");                  
 3572                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
 3573                             CTLFLAG_RD, &txq->pidx,
 3574                             0, "hardware queue pidx");
 3575                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
 3576                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
 3577                             0, "txq start idx for dump");
 3578                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
 3579                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
 3580                             0, "txq #entries to dump");                 
 3581                         SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
 3582                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
 3583                             0, t3_dump_txq_eth, "A", "dump of the transmit queue");
 3584 
 3585                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
 3586                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
 3587                             0, "ctrlq start idx for dump");
 3588                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
 3589                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
 3590                             0, "ctrl #entries to dump");                        
 3591                         SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
 3592                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
 3593                             0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
 3594 
 3595                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
 3596                             CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
 3597                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
 3598                             CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
 3599                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
 3600                             CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
 3601                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
 3602                             CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
 3603                 }
 3604 
 3605                 /* Now add a node for mac stats. */
 3606                 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
 3607                     CTLFLAG_RD, NULL, "MAC statistics");
 3608                 poidlist = SYSCTL_CHILDREN(poid);
 3609 
 3610                 /*
 3611                  * We (ab)use the length argument (arg2) to pass on the offset
 3612                  * of the data that we are interested in.  This is only required
 3613                  * for the quad counters that are updated from the hardware (we
 3614                  * make sure that we return the latest value).
 3615                  * sysctl_handle_macstat first updates *all* the counters from
 3616                  * the hardware, and then returns the latest value of the
 3617                  * requested counter.  Best would be to update only the
 3618                  * requested counter from hardware, but t3_mac_update_stats()
 3619                  * hides all the register details and we don't want to dive into
 3620                  * all that here.
 3621                  */
 3622 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
 3623     (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
 3624     sysctl_handle_macstat, "QU", 0)
 3625                 CXGB_SYSCTL_ADD_QUAD(tx_octets);
 3626                 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
 3627                 CXGB_SYSCTL_ADD_QUAD(tx_frames);
 3628                 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
 3629                 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
 3630                 CXGB_SYSCTL_ADD_QUAD(tx_pause);
 3631                 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
 3632                 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
 3633                 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
 3634                 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
 3635                 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
 3636                 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
 3637                 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
 3638                 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
 3639                 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
 3640                 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
 3641                 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
 3642                 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
 3643                 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
 3644                 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
 3645                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
 3646                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
 3647                 CXGB_SYSCTL_ADD_QUAD(rx_octets);
 3648                 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
 3649                 CXGB_SYSCTL_ADD_QUAD(rx_frames);
 3650                 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
 3651                 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
 3652                 CXGB_SYSCTL_ADD_QUAD(rx_pause);
 3653                 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
 3654                 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
 3655                 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
 3656                 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
 3657                 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
 3658                 CXGB_SYSCTL_ADD_QUAD(rx_runt);
 3659                 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
 3660                 CXGB_SYSCTL_ADD_QUAD(rx_short);
 3661                 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
 3662                 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
 3663                 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
 3664                 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
 3665                 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
 3666                 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
 3667                 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
 3668                 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
 3669                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
 3670                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
 3671 #undef CXGB_SYSCTL_ADD_QUAD
 3672 
 3673 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
 3674     CTLFLAG_RD, &mstats->a, 0)
 3675                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
 3676                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
 3677                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
 3678                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
 3679                 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
 3680                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
 3681                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
 3682                 CXGB_SYSCTL_ADD_ULONG(num_toggled);
 3683                 CXGB_SYSCTL_ADD_ULONG(num_resets);
 3684                 CXGB_SYSCTL_ADD_ULONG(link_faults);
 3685 #undef CXGB_SYSCTL_ADD_ULONG
 3686         }
 3687 }
 3688         
 3689 /**
 3690  *      t3_get_desc - dump an SGE descriptor for debugging purposes
 3691  *      @qs: the queue set
 3692  *      @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
 3693  *      @idx: the descriptor index in the queue
 3694  *      @data: where to dump the descriptor contents
 3695  *
 3696  *      Dumps the contents of a HW descriptor of an SGE queue.  Returns the
 3697  *      size of the descriptor.
 3698  */
 3699 int
 3700 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
 3701                 unsigned char *data)
 3702 {
 3703         if (qnum >= 6)
 3704                 return (EINVAL);
 3705 
 3706         if (qnum < 3) {
 3707                 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
 3708                         return -EINVAL;
 3709                 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
 3710                 return sizeof(struct tx_desc);
 3711         }
 3712 
 3713         if (qnum == 3) {
 3714                 if (!qs->rspq.desc || idx >= qs->rspq.size)
 3715                         return (EINVAL);
 3716                 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
 3717                 return sizeof(struct rsp_desc);
 3718         }
 3719 
 3720         qnum -= 4;
 3721         if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
 3722                 return (EINVAL);
 3723         memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
 3724         return sizeof(struct rx_desc);
 3725 }

Cache object: 112c303ca50199d9a699e036b3cd458c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.