The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/cxgb/cxgb_sge.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /**************************************************************************
    2 
    3 Copyright (c) 2007-2009, Chelsio Inc.
    4 All rights reserved.
    5 
    6 Redistribution and use in source and binary forms, with or without
    7 modification, are permitted provided that the following conditions are met:
    8 
    9  1. Redistributions of source code must retain the above copyright notice,
   10     this list of conditions and the following disclaimer.
   11 
   12  2. Neither the name of the Chelsio Corporation nor the names of its
   13     contributors may be used to endorse or promote products derived from
   14     this software without specific prior written permission.
   15  
   16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   26 POSSIBILITY OF SUCH DAMAGE.
   27 
   28 ***************************************************************************/
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD: releng/10.3/sys/dev/cxgb/cxgb_sge.c 281955 2015-04-24 23:26:44Z hiren $");
   32 
   33 #include "opt_inet6.h"
   34 #include "opt_inet.h"
   35 
   36 #include <sys/param.h>
   37 #include <sys/systm.h>
   38 #include <sys/kernel.h>
   39 #include <sys/module.h>
   40 #include <sys/bus.h>
   41 #include <sys/conf.h>
   42 #include <machine/bus.h>
   43 #include <machine/resource.h>
   44 #include <sys/bus_dma.h>
   45 #include <sys/rman.h>
   46 #include <sys/queue.h>
   47 #include <sys/sysctl.h>
   48 #include <sys/taskqueue.h>
   49 
   50 #include <sys/proc.h>
   51 #include <sys/sbuf.h>
   52 #include <sys/sched.h>
   53 #include <sys/smp.h>
   54 #include <sys/systm.h>
   55 #include <sys/syslog.h>
   56 #include <sys/socket.h>
   57 #include <sys/sglist.h>
   58 
   59 #include <net/bpf.h>    
   60 #include <net/ethernet.h>
   61 #include <net/if.h>
   62 #include <net/if_vlan_var.h>
   63 
   64 #include <netinet/in_systm.h>
   65 #include <netinet/in.h>
   66 #include <netinet/ip.h>
   67 #include <netinet/ip6.h>
   68 #include <netinet/tcp.h>
   69 
   70 #include <dev/pci/pcireg.h>
   71 #include <dev/pci/pcivar.h>
   72 
   73 #include <vm/vm.h>
   74 #include <vm/pmap.h>
   75 
   76 #include <cxgb_include.h>
   77 #include <sys/mvec.h>
   78 
   79 int     txq_fills = 0;
   80 int     multiq_tx_enable = 1;
   81 
   82 #ifdef TCP_OFFLOAD
   83 CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS);
   84 #endif
   85 
   86 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
   87 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
   88 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
   89 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
   90     "size of per-queue mbuf ring");
   91 
   92 static int cxgb_tx_coalesce_force = 0;
   93 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force);
   94 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW,
   95     &cxgb_tx_coalesce_force, 0,
   96     "coalesce small packets into a single work request regardless of ring state");
   97 
   98 #define COALESCE_START_DEFAULT          TX_ETH_Q_SIZE>>1
   99 #define COALESCE_START_MAX              (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
  100 #define COALESCE_STOP_DEFAULT           TX_ETH_Q_SIZE>>2
  101 #define COALESCE_STOP_MIN               TX_ETH_Q_SIZE>>5
  102 #define TX_RECLAIM_DEFAULT              TX_ETH_Q_SIZE>>5
  103 #define TX_RECLAIM_MAX                  TX_ETH_Q_SIZE>>2
  104 #define TX_RECLAIM_MIN                  TX_ETH_Q_SIZE>>6
  105 
  106 
  107 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
  108 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start",
  109     &cxgb_tx_coalesce_enable_start);
  110 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW,
  111     &cxgb_tx_coalesce_enable_start, 0,
  112     "coalesce enable threshold");
  113 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
  114 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop);
  115 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW,
  116     &cxgb_tx_coalesce_enable_stop, 0,
  117     "coalesce disable threshold");
  118 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  119 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold);
  120 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW,
  121     &cxgb_tx_reclaim_threshold, 0,
  122     "tx cleaning minimum threshold");
  123 
  124 /*
  125  * XXX don't re-enable this until TOE stops assuming
  126  * we have an m_ext
  127  */
  128 static int recycle_enable = 0;
  129 
  130 extern int cxgb_use_16k_clusters;
  131 extern int nmbjumbop;
  132 extern int nmbjumbo9;
  133 extern int nmbjumbo16;
  134 
  135 #define USE_GTS 0
  136 
  137 #define SGE_RX_SM_BUF_SIZE      1536
  138 #define SGE_RX_DROP_THRES       16
  139 #define SGE_RX_COPY_THRES       128
  140 
  141 /*
  142  * Period of the Tx buffer reclaim timer.  This timer does not need to run
  143  * frequently as Tx buffers are usually reclaimed by new Tx packets.
  144  */
  145 #define TX_RECLAIM_PERIOD       (hz >> 1)
  146 
  147 /* 
  148  * Values for sge_txq.flags
  149  */
  150 enum {
  151         TXQ_RUNNING     = 1 << 0,  /* fetch engine is running */
  152         TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
  153 };
  154 
  155 struct tx_desc {
  156         uint64_t        flit[TX_DESC_FLITS];
  157 } __packed;
  158 
  159 struct rx_desc {
  160         uint32_t        addr_lo;
  161         uint32_t        len_gen;
  162         uint32_t        gen2;
  163         uint32_t        addr_hi;
  164 } __packed;
  165 
  166 struct rsp_desc {               /* response queue descriptor */
  167         struct rss_header       rss_hdr;
  168         uint32_t                flags;
  169         uint32_t                len_cq;
  170         uint8_t                 imm_data[47];
  171         uint8_t                 intr_gen;
  172 } __packed;
  173 
  174 #define RX_SW_DESC_MAP_CREATED  (1 << 0)
  175 #define TX_SW_DESC_MAP_CREATED  (1 << 1)
  176 #define RX_SW_DESC_INUSE        (1 << 3)
  177 #define TX_SW_DESC_MAPPED       (1 << 4)
  178 
  179 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
  180 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
  181 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
  182 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
  183 
  184 struct tx_sw_desc {                /* SW state per Tx descriptor */
  185         struct mbuf     *m;
  186         bus_dmamap_t    map;
  187         int             flags;
  188 };
  189 
  190 struct rx_sw_desc {                /* SW state per Rx descriptor */
  191         caddr_t         rxsd_cl;
  192         struct mbuf     *m;
  193         bus_dmamap_t    map;
  194         int             flags;
  195 };
  196 
  197 struct txq_state {
  198         unsigned int    compl;
  199         unsigned int    gen;
  200         unsigned int    pidx;
  201 };
  202 
  203 struct refill_fl_cb_arg {
  204         int               error;
  205         bus_dma_segment_t seg;
  206         int               nseg;
  207 };
  208 
  209 
  210 /*
  211  * Maps a number of flits to the number of Tx descriptors that can hold them.
  212  * The formula is
  213  *
  214  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
  215  *
  216  * HW allows up to 4 descriptors to be combined into a WR.
  217  */
  218 static uint8_t flit_desc_map[] = {
  219         0,
  220 #if SGE_NUM_GENBITS == 1
  221         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  222         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  223         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  224         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
  225 #elif SGE_NUM_GENBITS == 2
  226         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  227         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  228         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  229         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  230 #else
  231 # error "SGE_NUM_GENBITS must be 1 or 2"
  232 #endif
  233 };
  234 
  235 #define TXQ_LOCK_ASSERT(qs)     mtx_assert(&(qs)->lock, MA_OWNED)
  236 #define TXQ_TRYLOCK(qs)         mtx_trylock(&(qs)->lock)        
  237 #define TXQ_LOCK(qs)            mtx_lock(&(qs)->lock)   
  238 #define TXQ_UNLOCK(qs)          mtx_unlock(&(qs)->lock) 
  239 #define TXQ_RING_EMPTY(qs)      drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  240 #define TXQ_RING_NEEDS_ENQUEUE(qs)                                      \
  241         drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  242 #define TXQ_RING_FLUSH(qs)      drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  243 #define TXQ_RING_DEQUEUE_COND(qs, func, arg)                            \
  244         drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
  245 #define TXQ_RING_DEQUEUE(qs) \
  246         drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  247 
  248 int cxgb_debug = 0;
  249 
  250 static void sge_timer_cb(void *arg);
  251 static void sge_timer_reclaim(void *arg, int ncount);
  252 static void sge_txq_reclaim_handler(void *arg, int ncount);
  253 static void cxgb_start_locked(struct sge_qset *qs);
  254 
  255 /*
  256  * XXX need to cope with bursty scheduling by looking at a wider
  257  * window than we are now for determining the need for coalescing
  258  *
  259  */
  260 static __inline uint64_t
  261 check_pkt_coalesce(struct sge_qset *qs) 
  262 { 
  263         struct adapter *sc; 
  264         struct sge_txq *txq; 
  265         uint8_t *fill;
  266 
  267         if (__predict_false(cxgb_tx_coalesce_force))
  268                 return (1);
  269         txq = &qs->txq[TXQ_ETH]; 
  270         sc = qs->port->adapter; 
  271         fill = &sc->tunq_fill[qs->idx];
  272 
  273         if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
  274                 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
  275         if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
  276                 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
  277         /*
  278          * if the hardware transmit queue is more than 1/8 full
  279          * we mark it as coalescing - we drop back from coalescing
  280          * when we go below 1/32 full and there are no packets enqueued, 
  281          * this provides us with some degree of hysteresis
  282          */
  283         if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
  284             TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
  285                 *fill = 0; 
  286         else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
  287                 *fill = 1; 
  288 
  289         return (sc->tunq_coalesce);
  290 } 
  291 
  292 #ifdef __LP64__
  293 static void
  294 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  295 {
  296         uint64_t wr_hilo;
  297 #if _BYTE_ORDER == _LITTLE_ENDIAN
  298         wr_hilo = wr_hi;
  299         wr_hilo |= (((uint64_t)wr_lo)<<32);
  300 #else
  301         wr_hilo = wr_lo;
  302         wr_hilo |= (((uint64_t)wr_hi)<<32);
  303 #endif  
  304         wrp->wrh_hilo = wr_hilo;
  305 }
  306 #else
  307 static void
  308 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  309 {
  310 
  311         wrp->wrh_hi = wr_hi;
  312         wmb();
  313         wrp->wrh_lo = wr_lo;
  314 }
  315 #endif
  316 
  317 struct coalesce_info {
  318         int count;
  319         int nbytes;
  320 };
  321 
  322 static int
  323 coalesce_check(struct mbuf *m, void *arg)
  324 {
  325         struct coalesce_info *ci = arg;
  326         int *count = &ci->count;
  327         int *nbytes = &ci->nbytes;
  328 
  329         if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
  330                 (*count < 7) && (m->m_next == NULL))) {
  331                 *count += 1;
  332                 *nbytes += m->m_len;
  333                 return (1);
  334         }
  335         return (0);
  336 }
  337 
  338 static struct mbuf *
  339 cxgb_dequeue(struct sge_qset *qs)
  340 {
  341         struct mbuf *m, *m_head, *m_tail;
  342         struct coalesce_info ci;
  343 
  344         
  345         if (check_pkt_coalesce(qs) == 0) 
  346                 return TXQ_RING_DEQUEUE(qs);
  347 
  348         m_head = m_tail = NULL;
  349         ci.count = ci.nbytes = 0;
  350         do {
  351                 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
  352                 if (m_head == NULL) {
  353                         m_tail = m_head = m;
  354                 } else if (m != NULL) {
  355                         m_tail->m_nextpkt = m;
  356                         m_tail = m;
  357                 }
  358         } while (m != NULL);
  359         if (ci.count > 7)
  360                 panic("trying to coalesce %d packets in to one WR", ci.count);
  361         return (m_head);
  362 }
  363         
  364 /**
  365  *      reclaim_completed_tx - reclaims completed Tx descriptors
  366  *      @adapter: the adapter
  367  *      @q: the Tx queue to reclaim completed descriptors from
  368  *
  369  *      Reclaims Tx descriptors that the SGE has indicated it has processed,
  370  *      and frees the associated buffers if possible.  Called with the Tx
  371  *      queue's lock held.
  372  */
  373 static __inline int
  374 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
  375 {
  376         struct sge_txq *q = &qs->txq[queue];
  377         int reclaim = desc_reclaimable(q);
  378 
  379         if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
  380             (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
  381                 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  382 
  383         if (reclaim < reclaim_min)
  384                 return (0);
  385 
  386         mtx_assert(&qs->lock, MA_OWNED);
  387         if (reclaim > 0) {
  388                 t3_free_tx_desc(qs, reclaim, queue);
  389                 q->cleaned += reclaim;
  390                 q->in_use -= reclaim;
  391         }
  392         if (isset(&qs->txq_stopped, TXQ_ETH))
  393                 clrbit(&qs->txq_stopped, TXQ_ETH);
  394 
  395         return (reclaim);
  396 }
  397 
  398 /**
  399  *      should_restart_tx - are there enough resources to restart a Tx queue?
  400  *      @q: the Tx queue
  401  *
  402  *      Checks if there are enough descriptors to restart a suspended Tx queue.
  403  */
  404 static __inline int
  405 should_restart_tx(const struct sge_txq *q)
  406 {
  407         unsigned int r = q->processed - q->cleaned;
  408 
  409         return q->in_use - r < (q->size >> 1);
  410 }
  411 
  412 /**
  413  *      t3_sge_init - initialize SGE
  414  *      @adap: the adapter
  415  *      @p: the SGE parameters
  416  *
  417  *      Performs SGE initialization needed every time after a chip reset.
  418  *      We do not initialize any of the queue sets here, instead the driver
  419  *      top-level must request those individually.  We also do not enable DMA
  420  *      here, that should be done after the queues have been set up.
  421  */
  422 void
  423 t3_sge_init(adapter_t *adap, struct sge_params *p)
  424 {
  425         u_int ctrl, ups;
  426 
  427         ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
  428 
  429         ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
  430                F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
  431                V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
  432                V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
  433 #if SGE_NUM_GENBITS == 1
  434         ctrl |= F_EGRGENCTRL;
  435 #endif
  436         if (adap->params.rev > 0) {
  437                 if (!(adap->flags & (USING_MSIX | USING_MSI)))
  438                         ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
  439         }
  440         t3_write_reg(adap, A_SG_CONTROL, ctrl);
  441         t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
  442                      V_LORCQDRBTHRSH(512));
  443         t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
  444         t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
  445                      V_TIMEOUT(200 * core_ticks_per_usec(adap)));
  446         t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
  447                      adap->params.rev < T3_REV_C ? 1000 : 500);
  448         t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
  449         t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
  450         t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
  451         t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
  452         t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
  453 }
  454 
  455 
  456 /**
  457  *      sgl_len - calculates the size of an SGL of the given capacity
  458  *      @n: the number of SGL entries
  459  *
  460  *      Calculates the number of flits needed for a scatter/gather list that
  461  *      can hold the given number of entries.
  462  */
  463 static __inline unsigned int
  464 sgl_len(unsigned int n)
  465 {
  466         return ((3 * n) / 2 + (n & 1));
  467 }
  468 
  469 /**
  470  *      get_imm_packet - return the next ingress packet buffer from a response
  471  *      @resp: the response descriptor containing the packet data
  472  *
  473  *      Return a packet containing the immediate data of the given response.
  474  */
  475 static int
  476 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
  477 {
  478 
  479         if (resp->rss_hdr.opcode == CPL_RX_DATA) {
  480                 const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0];
  481                 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
  482         } else if (resp->rss_hdr.opcode == CPL_RX_PKT) {
  483                 const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0];
  484                 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
  485         } else
  486                 m->m_len = IMMED_PKT_SIZE;
  487         m->m_ext.ext_buf = NULL;
  488         m->m_ext.ext_type = 0;
  489         memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len); 
  490         return (0);     
  491 }
  492 
  493 static __inline u_int
  494 flits_to_desc(u_int n)
  495 {
  496         return (flit_desc_map[n]);
  497 }
  498 
  499 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
  500                     F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
  501                     V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
  502                     F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
  503                     F_HIRCQPARITYERROR)
  504 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
  505 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
  506                       F_RSPQDISABLED)
  507 
  508 /**
  509  *      t3_sge_err_intr_handler - SGE async event interrupt handler
  510  *      @adapter: the adapter
  511  *
  512  *      Interrupt handler for SGE asynchronous (non-data) events.
  513  */
  514 void
  515 t3_sge_err_intr_handler(adapter_t *adapter)
  516 {
  517         unsigned int v, status;
  518 
  519         status = t3_read_reg(adapter, A_SG_INT_CAUSE);
  520         if (status & SGE_PARERR)
  521                 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
  522                          status & SGE_PARERR);
  523         if (status & SGE_FRAMINGERR)
  524                 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
  525                          status & SGE_FRAMINGERR);
  526         if (status & F_RSPQCREDITOVERFOW)
  527                 CH_ALERT(adapter, "SGE response queue credit overflow\n");
  528 
  529         if (status & F_RSPQDISABLED) {
  530                 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
  531 
  532                 CH_ALERT(adapter,
  533                          "packet delivered to disabled response queue (0x%x)\n",
  534                          (v >> S_RSPQ0DISABLED) & 0xff);
  535         }
  536 
  537         t3_write_reg(adapter, A_SG_INT_CAUSE, status);
  538         if (status & SGE_FATALERR)
  539                 t3_fatal_err(adapter);
  540 }
  541 
  542 void
  543 t3_sge_prep(adapter_t *adap, struct sge_params *p)
  544 {
  545         int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
  546 
  547         nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
  548         nqsets *= adap->params.nports;
  549 
  550         fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
  551 
  552         while (!powerof2(fl_q_size))
  553                 fl_q_size--;
  554 
  555         use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
  556             is_offload(adap);
  557 
  558 #if __FreeBSD_version >= 700111
  559         if (use_16k) {
  560                 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
  561                 jumbo_buf_size = MJUM16BYTES;
  562         } else {
  563                 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
  564                 jumbo_buf_size = MJUM9BYTES;
  565         }
  566 #else
  567         jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE);
  568         jumbo_buf_size = MJUMPAGESIZE;
  569 #endif
  570         while (!powerof2(jumbo_q_size))
  571                 jumbo_q_size--;
  572 
  573         if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
  574                 device_printf(adap->dev,
  575                     "Insufficient clusters and/or jumbo buffers.\n");
  576 
  577         p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
  578 
  579         for (i = 0; i < SGE_QSETS; ++i) {
  580                 struct qset_params *q = p->qset + i;
  581 
  582                 if (adap->params.nports > 2) {
  583                         q->coalesce_usecs = 50;
  584                 } else {
  585 #ifdef INVARIANTS                       
  586                         q->coalesce_usecs = 10;
  587 #else
  588                         q->coalesce_usecs = 5;
  589 #endif                  
  590                 }
  591                 q->polling = 0;
  592                 q->rspq_size = RSPQ_Q_SIZE;
  593                 q->fl_size = fl_q_size;
  594                 q->jumbo_size = jumbo_q_size;
  595                 q->jumbo_buf_size = jumbo_buf_size;
  596                 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
  597                 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
  598                 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
  599                 q->cong_thres = 0;
  600         }
  601 }
  602 
  603 int
  604 t3_sge_alloc(adapter_t *sc)
  605 {
  606 
  607         /* The parent tag. */
  608         if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */
  609                                 1, 0,                   /* algnmnt, boundary */
  610                                 BUS_SPACE_MAXADDR,      /* lowaddr */
  611                                 BUS_SPACE_MAXADDR,      /* highaddr */
  612                                 NULL, NULL,             /* filter, filterarg */
  613                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
  614                                 BUS_SPACE_UNRESTRICTED, /* nsegments */
  615                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
  616                                 0,                      /* flags */
  617                                 NULL, NULL,             /* lock, lockarg */
  618                                 &sc->parent_dmat)) {
  619                 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
  620                 return (ENOMEM);
  621         }
  622 
  623         /*
  624          * DMA tag for normal sized RX frames
  625          */
  626         if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
  627                 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
  628                 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
  629                 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
  630                 return (ENOMEM);
  631         }
  632 
  633         /* 
  634          * DMA tag for jumbo sized RX frames.
  635          */
  636         if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
  637                 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
  638                 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
  639                 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
  640                 return (ENOMEM);
  641         }
  642 
  643         /* 
  644          * DMA tag for TX frames.
  645          */
  646         if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
  647                 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  648                 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  649                 NULL, NULL, &sc->tx_dmat)) {
  650                 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
  651                 return (ENOMEM);
  652         }
  653 
  654         return (0);
  655 }
  656 
  657 int
  658 t3_sge_free(struct adapter * sc)
  659 {
  660 
  661         if (sc->tx_dmat != NULL)
  662                 bus_dma_tag_destroy(sc->tx_dmat);
  663 
  664         if (sc->rx_jumbo_dmat != NULL)
  665                 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
  666 
  667         if (sc->rx_dmat != NULL)
  668                 bus_dma_tag_destroy(sc->rx_dmat);
  669 
  670         if (sc->parent_dmat != NULL)
  671                 bus_dma_tag_destroy(sc->parent_dmat);
  672 
  673         return (0);
  674 }
  675 
  676 void
  677 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
  678 {
  679 
  680         qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
  681         qs->rspq.polling = 0 /* p->polling */;
  682 }
  683 
  684 #if !defined(__i386__) && !defined(__amd64__)
  685 static void
  686 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
  687 {
  688         struct refill_fl_cb_arg *cb_arg = arg;
  689         
  690         cb_arg->error = error;
  691         cb_arg->seg = segs[0];
  692         cb_arg->nseg = nseg;
  693 
  694 }
  695 #endif
  696 /**
  697  *      refill_fl - refill an SGE free-buffer list
  698  *      @sc: the controller softc
  699  *      @q: the free-list to refill
  700  *      @n: the number of new buffers to allocate
  701  *
  702  *      (Re)populate an SGE free-buffer list with up to @n new packet buffers.
  703  *      The caller must assure that @n does not exceed the queue's capacity.
  704  */
  705 static void
  706 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
  707 {
  708         struct rx_sw_desc *sd = &q->sdesc[q->pidx];
  709         struct rx_desc *d = &q->desc[q->pidx];
  710         struct refill_fl_cb_arg cb_arg;
  711         struct mbuf *m;
  712         caddr_t cl;
  713         int err;
  714         
  715         cb_arg.error = 0;
  716         while (n--) {
  717                 /*
  718                  * We allocate an uninitialized mbuf + cluster, mbuf is
  719                  * initialized after rx.
  720                  */
  721                 if (q->zone == zone_pack) {
  722                         if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
  723                                 break;
  724                         cl = m->m_ext.ext_buf;                  
  725                 } else {
  726                         if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
  727                                 break;
  728                         if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
  729                                 uma_zfree(q->zone, cl);
  730                                 break;
  731                         }
  732                 }
  733                 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
  734                         if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
  735                                 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
  736                                 uma_zfree(q->zone, cl);
  737                                 goto done;
  738                         }
  739                         sd->flags |= RX_SW_DESC_MAP_CREATED;
  740                 }
  741 #if !defined(__i386__) && !defined(__amd64__)
  742                 err = bus_dmamap_load(q->entry_tag, sd->map,
  743                     cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
  744                 
  745                 if (err != 0 || cb_arg.error) {
  746                         if (q->zone == zone_pack)
  747                                 uma_zfree(q->zone, cl);
  748                         m_free(m);
  749                         goto done;
  750                 }
  751 #else
  752                 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
  753 #endif          
  754                 sd->flags |= RX_SW_DESC_INUSE;
  755                 sd->rxsd_cl = cl;
  756                 sd->m = m;
  757                 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
  758                 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
  759                 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
  760                 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
  761 
  762                 d++;
  763                 sd++;
  764 
  765                 if (++q->pidx == q->size) {
  766                         q->pidx = 0;
  767                         q->gen ^= 1;
  768                         sd = q->sdesc;
  769                         d = q->desc;
  770                 }
  771                 q->credits++;
  772                 q->db_pending++;
  773         }
  774 
  775 done:
  776         if (q->db_pending >= 32) {
  777                 q->db_pending = 0;
  778                 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  779         }
  780 }
  781 
  782 
  783 /**
  784  *      free_rx_bufs - free the Rx buffers on an SGE free list
  785  *      @sc: the controle softc
  786  *      @q: the SGE free list to clean up
  787  *
  788  *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
  789  *      this queue should be stopped before calling this function.
  790  */
  791 static void
  792 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
  793 {
  794         u_int cidx = q->cidx;
  795 
  796         while (q->credits--) {
  797                 struct rx_sw_desc *d = &q->sdesc[cidx];
  798 
  799                 if (d->flags & RX_SW_DESC_INUSE) {
  800                         bus_dmamap_unload(q->entry_tag, d->map);
  801                         bus_dmamap_destroy(q->entry_tag, d->map);
  802                         if (q->zone == zone_pack) {
  803                                 m_init(d->m, zone_pack, MCLBYTES,
  804                                     M_NOWAIT, MT_DATA, M_EXT);
  805                                 uma_zfree(zone_pack, d->m);
  806                         } else {
  807                                 m_init(d->m, zone_mbuf, MLEN,
  808                                     M_NOWAIT, MT_DATA, 0);
  809                                 uma_zfree(zone_mbuf, d->m);
  810                                 uma_zfree(q->zone, d->rxsd_cl);
  811                         }                       
  812                 }
  813                 
  814                 d->rxsd_cl = NULL;
  815                 d->m = NULL;
  816                 if (++cidx == q->size)
  817                         cidx = 0;
  818         }
  819 }
  820 
  821 static __inline void
  822 __refill_fl(adapter_t *adap, struct sge_fl *fl)
  823 {
  824         refill_fl(adap, fl, min(16U, fl->size - fl->credits));
  825 }
  826 
  827 static __inline void
  828 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
  829 {
  830         uint32_t reclaimable = fl->size - fl->credits;
  831 
  832         if (reclaimable > 0)
  833                 refill_fl(adap, fl, min(max, reclaimable));
  834 }
  835 
  836 /**
  837  *      recycle_rx_buf - recycle a receive buffer
  838  *      @adapter: the adapter
  839  *      @q: the SGE free list
  840  *      @idx: index of buffer to recycle
  841  *
  842  *      Recycles the specified buffer on the given free list by adding it at
  843  *      the next available slot on the list.
  844  */
  845 static void
  846 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
  847 {
  848         struct rx_desc *from = &q->desc[idx];
  849         struct rx_desc *to   = &q->desc[q->pidx];
  850 
  851         q->sdesc[q->pidx] = q->sdesc[idx];
  852         to->addr_lo = from->addr_lo;        // already big endian
  853         to->addr_hi = from->addr_hi;        // likewise
  854         wmb();  /* necessary ? */
  855         to->len_gen = htobe32(V_FLD_GEN1(q->gen));
  856         to->gen2 = htobe32(V_FLD_GEN2(q->gen));
  857         q->credits++;
  858 
  859         if (++q->pidx == q->size) {
  860                 q->pidx = 0;
  861                 q->gen ^= 1;
  862         }
  863         t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  864 }
  865 
  866 static void
  867 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
  868 {
  869         uint32_t *addr;
  870 
  871         addr = arg;
  872         *addr = segs[0].ds_addr;
  873 }
  874 
  875 static int
  876 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
  877     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
  878     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
  879 {
  880         size_t len = nelem * elem_size;
  881         void *s = NULL;
  882         void *p = NULL;
  883         int err;
  884 
  885         if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
  886                                       BUS_SPACE_MAXADDR_32BIT,
  887                                       BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
  888                                       len, 0, NULL, NULL, tag)) != 0) {
  889                 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
  890                 return (ENOMEM);
  891         }
  892 
  893         if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
  894                                     map)) != 0) {
  895                 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
  896                 return (ENOMEM);
  897         }
  898 
  899         bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
  900         bzero(p, len);
  901         *(void **)desc = p;
  902 
  903         if (sw_size) {
  904                 len = nelem * sw_size;
  905                 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
  906                 *(void **)sdesc = s;
  907         }
  908         if (parent_entry_tag == NULL)
  909                 return (0);
  910             
  911         if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
  912                                       BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
  913                                       NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  914                                       TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  915                                       NULL, NULL, entry_tag)) != 0) {
  916                 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
  917                 return (ENOMEM);
  918         }
  919         return (0);
  920 }
  921 
  922 static void
  923 sge_slow_intr_handler(void *arg, int ncount)
  924 {
  925         adapter_t *sc = arg;
  926 
  927         t3_slow_intr_handler(sc);
  928         t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
  929         (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
  930 }
  931 
  932 /**
  933  *      sge_timer_cb - perform periodic maintenance of an SGE qset
  934  *      @data: the SGE queue set to maintain
  935  *
  936  *      Runs periodically from a timer to perform maintenance of an SGE queue
  937  *      set.  It performs two tasks:
  938  *
  939  *      a) Cleans up any completed Tx descriptors that may still be pending.
  940  *      Normal descriptor cleanup happens when new packets are added to a Tx
  941  *      queue so this timer is relatively infrequent and does any cleanup only
  942  *      if the Tx queue has not seen any new packets in a while.  We make a
  943  *      best effort attempt to reclaim descriptors, in that we don't wait
  944  *      around if we cannot get a queue's lock (which most likely is because
  945  *      someone else is queueing new packets and so will also handle the clean
  946  *      up).  Since control queues use immediate data exclusively we don't
  947  *      bother cleaning them up here.
  948  *
  949  *      b) Replenishes Rx queues that have run out due to memory shortage.
  950  *      Normally new Rx buffers are added when existing ones are consumed but
  951  *      when out of memory a queue can become empty.  We try to add only a few
  952  *      buffers here, the queue will be replenished fully as these new buffers
  953  *      are used up if memory shortage has subsided.
  954  *      
  955  *      c) Return coalesced response queue credits in case a response queue is
  956  *      starved.
  957  *
  958  *      d) Ring doorbells for T304 tunnel queues since we have seen doorbell 
  959  *      fifo overflows and the FW doesn't implement any recovery scheme yet.
  960  */
  961 static void
  962 sge_timer_cb(void *arg)
  963 {
  964         adapter_t *sc = arg;
  965         if ((sc->flags & USING_MSIX) == 0) {
  966                 
  967                 struct port_info *pi;
  968                 struct sge_qset *qs;
  969                 struct sge_txq  *txq;
  970                 int i, j;
  971                 int reclaim_ofl, refill_rx;
  972 
  973                 if (sc->open_device_map == 0) 
  974                         return;
  975 
  976                 for (i = 0; i < sc->params.nports; i++) {
  977                         pi = &sc->port[i];
  978                         for (j = 0; j < pi->nqsets; j++) {
  979                                 qs = &sc->sge.qs[pi->first_qset + j];
  980                                 txq = &qs->txq[0];
  981                                 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
  982                                 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 
  983                                     (qs->fl[1].credits < qs->fl[1].size));
  984                                 if (reclaim_ofl || refill_rx) {
  985                                         taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
  986                                         break;
  987                                 }
  988                         }
  989                 }
  990         }
  991         
  992         if (sc->params.nports > 2) {
  993                 int i;
  994 
  995                 for_each_port(sc, i) {
  996                         struct port_info *pi = &sc->port[i];
  997 
  998                         t3_write_reg(sc, A_SG_KDOORBELL, 
  999                                      F_SELEGRCNTX | 
 1000                                      (FW_TUNNEL_SGEEC_START + pi->first_qset));
 1001                 }
 1002         }       
 1003         if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
 1004             sc->open_device_map != 0)
 1005                 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1006 }
 1007 
 1008 /*
 1009  * This is meant to be a catch-all function to keep sge state private
 1010  * to sge.c
 1011  *
 1012  */
 1013 int
 1014 t3_sge_init_adapter(adapter_t *sc)
 1015 {
 1016         callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
 1017         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1018         TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
 1019         return (0);
 1020 }
 1021 
 1022 int
 1023 t3_sge_reset_adapter(adapter_t *sc)
 1024 {
 1025         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1026         return (0);
 1027 }
 1028 
 1029 int
 1030 t3_sge_init_port(struct port_info *pi)
 1031 {
 1032         TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
 1033         return (0);
 1034 }
 1035 
 1036 /**
 1037  *      refill_rspq - replenish an SGE response queue
 1038  *      @adapter: the adapter
 1039  *      @q: the response queue to replenish
 1040  *      @credits: how many new responses to make available
 1041  *
 1042  *      Replenishes a response queue by making the supplied number of responses
 1043  *      available to HW.
 1044  */
 1045 static __inline void
 1046 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
 1047 {
 1048 
 1049         /* mbufs are allocated on demand when a rspq entry is processed. */
 1050         t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
 1051                      V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 1052 }
 1053 
 1054 static void
 1055 sge_txq_reclaim_handler(void *arg, int ncount)
 1056 {
 1057         struct sge_qset *qs = arg;
 1058         int i;
 1059 
 1060         for (i = 0; i < 3; i++)
 1061                 reclaim_completed_tx(qs, 16, i);
 1062 }
 1063 
 1064 static void
 1065 sge_timer_reclaim(void *arg, int ncount)
 1066 {
 1067         struct port_info *pi = arg;
 1068         int i, nqsets = pi->nqsets;
 1069         adapter_t *sc = pi->adapter;
 1070         struct sge_qset *qs;
 1071         struct mtx *lock;
 1072         
 1073         KASSERT((sc->flags & USING_MSIX) == 0,
 1074             ("can't call timer reclaim for msi-x"));
 1075 
 1076         for (i = 0; i < nqsets; i++) {
 1077                 qs = &sc->sge.qs[pi->first_qset + i];
 1078 
 1079                 reclaim_completed_tx(qs, 16, TXQ_OFLD);
 1080                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 1081                             &sc->sge.qs[0].rspq.lock;
 1082 
 1083                 if (mtx_trylock(lock)) {
 1084                         /* XXX currently assume that we are *NOT* polling */
 1085                         uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
 1086 
 1087                         if (qs->fl[0].credits < qs->fl[0].size - 16)
 1088                                 __refill_fl(sc, &qs->fl[0]);
 1089                         if (qs->fl[1].credits < qs->fl[1].size - 16)
 1090                                 __refill_fl(sc, &qs->fl[1]);
 1091                         
 1092                         if (status & (1 << qs->rspq.cntxt_id)) {
 1093                                 if (qs->rspq.credits) {
 1094                                         refill_rspq(sc, &qs->rspq, 1);
 1095                                         qs->rspq.credits--;
 1096                                         t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 
 1097                                             1 << qs->rspq.cntxt_id);
 1098                                 }
 1099                         }
 1100                         mtx_unlock(lock);
 1101                 }
 1102         }
 1103 }
 1104 
 1105 /**
 1106  *      init_qset_cntxt - initialize an SGE queue set context info
 1107  *      @qs: the queue set
 1108  *      @id: the queue set id
 1109  *
 1110  *      Initializes the TIDs and context ids for the queues of a queue set.
 1111  */
 1112 static void
 1113 init_qset_cntxt(struct sge_qset *qs, u_int id)
 1114 {
 1115 
 1116         qs->rspq.cntxt_id = id;
 1117         qs->fl[0].cntxt_id = 2 * id;
 1118         qs->fl[1].cntxt_id = 2 * id + 1;
 1119         qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
 1120         qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
 1121         qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 1122         qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 1123         qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
 1124 
 1125         mbufq_init(&qs->txq[TXQ_ETH].sendq);
 1126         mbufq_init(&qs->txq[TXQ_OFLD].sendq);
 1127         mbufq_init(&qs->txq[TXQ_CTRL].sendq);
 1128 }
 1129 
 1130 
 1131 static void
 1132 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
 1133 {
 1134         txq->in_use += ndesc;
 1135         /*
 1136          * XXX we don't handle stopping of queue
 1137          * presumably start handles this when we bump against the end
 1138          */
 1139         txqs->gen = txq->gen;
 1140         txq->unacked += ndesc;
 1141         txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
 1142         txq->unacked &= 31;
 1143         txqs->pidx = txq->pidx;
 1144         txq->pidx += ndesc;
 1145 #ifdef INVARIANTS
 1146         if (((txqs->pidx > txq->cidx) &&
 1147                 (txq->pidx < txqs->pidx) &&
 1148                 (txq->pidx >= txq->cidx)) ||
 1149             ((txqs->pidx < txq->cidx) &&
 1150                 (txq->pidx >= txq-> cidx)) ||
 1151             ((txqs->pidx < txq->cidx) &&
 1152                 (txq->cidx < txqs->pidx)))
 1153                 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
 1154                     txqs->pidx, txq->pidx, txq->cidx);
 1155 #endif
 1156         if (txq->pidx >= txq->size) {
 1157                 txq->pidx -= txq->size;
 1158                 txq->gen ^= 1;
 1159         }
 1160 
 1161 }
 1162 
 1163 /**
 1164  *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 1165  *      @m: the packet mbufs
 1166  *      @nsegs: the number of segments 
 1167  *
 1168  *      Returns the number of Tx descriptors needed for the given Ethernet
 1169  *      packet.  Ethernet packets require addition of WR and CPL headers.
 1170  */
 1171 static __inline unsigned int
 1172 calc_tx_descs(const struct mbuf *m, int nsegs)
 1173 {
 1174         unsigned int flits;
 1175 
 1176         if (m->m_pkthdr.len <= PIO_LEN)
 1177                 return 1;
 1178 
 1179         flits = sgl_len(nsegs) + 2;
 1180         if (m->m_pkthdr.csum_flags & CSUM_TSO)
 1181                 flits++;
 1182 
 1183         return flits_to_desc(flits);
 1184 }
 1185 
 1186 /**
 1187  *      make_sgl - populate a scatter/gather list for a packet
 1188  *      @sgp: the SGL to populate
 1189  *      @segs: the packet dma segments
 1190  *      @nsegs: the number of segments
 1191  *
 1192  *      Generates a scatter/gather list for the buffers that make up a packet
 1193  *      and returns the SGL size in 8-byte words.  The caller must size the SGL
 1194  *      appropriately.
 1195  */
 1196 static __inline void
 1197 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
 1198 {
 1199         int i, idx;
 1200         
 1201         for (idx = 0, i = 0; i < nsegs; i++) {
 1202                 /*
 1203                  * firmware doesn't like empty segments
 1204                  */
 1205                 if (segs[i].ds_len == 0)
 1206                         continue;
 1207                 if (i && idx == 0) 
 1208                         ++sgp;
 1209                 
 1210                 sgp->len[idx] = htobe32(segs[i].ds_len);
 1211                 sgp->addr[idx] = htobe64(segs[i].ds_addr);
 1212                 idx ^= 1;
 1213         }
 1214         
 1215         if (idx) {
 1216                 sgp->len[idx] = 0;
 1217                 sgp->addr[idx] = 0;
 1218         }
 1219 }
 1220         
 1221 /**
 1222  *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
 1223  *      @adap: the adapter
 1224  *      @q: the Tx queue
 1225  *
 1226  *      Ring the doorbell if a Tx queue is asleep.  There is a natural race,
 1227  *      where the HW is going to sleep just after we checked, however,
 1228  *      then the interrupt handler will detect the outstanding TX packet
 1229  *      and ring the doorbell for us.
 1230  *
 1231  *      When GTS is disabled we unconditionally ring the doorbell.
 1232  */
 1233 static __inline void
 1234 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
 1235 {
 1236 #if USE_GTS
 1237         clear_bit(TXQ_LAST_PKT_DB, &q->flags);
 1238         if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
 1239                 set_bit(TXQ_LAST_PKT_DB, &q->flags);
 1240 #ifdef T3_TRACE
 1241                 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
 1242                           q->cntxt_id);
 1243 #endif
 1244                 t3_write_reg(adap, A_SG_KDOORBELL,
 1245                              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1246         }
 1247 #else
 1248         if (mustring || ++q->db_pending >= 32) {
 1249                 wmb();            /* write descriptors before telling HW */
 1250                 t3_write_reg(adap, A_SG_KDOORBELL,
 1251                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1252                 q->db_pending = 0;
 1253         }
 1254 #endif
 1255 }
 1256 
 1257 static __inline void
 1258 wr_gen2(struct tx_desc *d, unsigned int gen)
 1259 {
 1260 #if SGE_NUM_GENBITS == 2
 1261         d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
 1262 #endif
 1263 }
 1264 
 1265 /**
 1266  *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
 1267  *      @ndesc: number of Tx descriptors spanned by the SGL
 1268  *      @txd: first Tx descriptor to be written
 1269  *      @txqs: txq state (generation and producer index)
 1270  *      @txq: the SGE Tx queue
 1271  *      @sgl: the SGL
 1272  *      @flits: number of flits to the start of the SGL in the first descriptor
 1273  *      @sgl_flits: the SGL size in flits
 1274  *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
 1275  *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
 1276  *
 1277  *      Write a work request header and an associated SGL.  If the SGL is
 1278  *      small enough to fit into one Tx descriptor it has already been written
 1279  *      and we just need to write the WR header.  Otherwise we distribute the
 1280  *      SGL across the number of descriptors it spans.
 1281  */
 1282 static void
 1283 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
 1284     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
 1285     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
 1286 {
 1287 
 1288         struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
 1289         struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
 1290         
 1291         if (__predict_true(ndesc == 1)) {
 1292                 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1293                     V_WR_SGLSFLT(flits)) | wr_hi,
 1294                     htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) |
 1295                     wr_lo);
 1296 
 1297                 wr_gen2(txd, txqs->gen);
 1298                 
 1299         } else {
 1300                 unsigned int ogen = txqs->gen;
 1301                 const uint64_t *fp = (const uint64_t *)sgl;
 1302                 struct work_request_hdr *wp = wrp;
 1303                 
 1304                 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
 1305                     V_WR_SGLSFLT(flits)) | wr_hi;
 1306                 
 1307                 while (sgl_flits) {
 1308                         unsigned int avail = WR_FLITS - flits;
 1309 
 1310                         if (avail > sgl_flits)
 1311                                 avail = sgl_flits;
 1312                         memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
 1313                         sgl_flits -= avail;
 1314                         ndesc--;
 1315                         if (!sgl_flits)
 1316                                 break;
 1317                         
 1318                         fp += avail;
 1319                         txd++;
 1320                         txsd++;
 1321                         if (++txqs->pidx == txq->size) {
 1322                                 txqs->pidx = 0;
 1323                                 txqs->gen ^= 1;
 1324                                 txd = txq->desc;
 1325                                 txsd = txq->sdesc;
 1326                         }
 1327 
 1328                         /*
 1329                          * when the head of the mbuf chain
 1330                          * is freed all clusters will be freed
 1331                          * with it
 1332                          */
 1333                         wrp = (struct work_request_hdr *)txd;
 1334                         wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
 1335                             V_WR_SGLSFLT(1)) | wr_hi;
 1336                         wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
 1337                                     sgl_flits + 1)) |
 1338                             V_WR_GEN(txqs->gen)) | wr_lo;
 1339                         wr_gen2(txd, txqs->gen);
 1340                         flits = 1;
 1341                 }
 1342                 wrp->wrh_hi |= htonl(F_WR_EOP);
 1343                 wmb();
 1344                 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
 1345                 wr_gen2((struct tx_desc *)wp, ogen);
 1346         }
 1347 }
 1348 
 1349 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
 1350 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
 1351 
 1352 #define GET_VTAG(cntrl, m) \
 1353 do { \
 1354         if ((m)->m_flags & M_VLANTAG)                                               \
 1355                 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
 1356 } while (0)
 1357 
 1358 static int
 1359 t3_encap(struct sge_qset *qs, struct mbuf **m)
 1360 {
 1361         adapter_t *sc;
 1362         struct mbuf *m0;
 1363         struct sge_txq *txq;
 1364         struct txq_state txqs;
 1365         struct port_info *pi;
 1366         unsigned int ndesc, flits, cntrl, mlen;
 1367         int err, nsegs, tso_info = 0;
 1368 
 1369         struct work_request_hdr *wrp;
 1370         struct tx_sw_desc *txsd;
 1371         struct sg_ent *sgp, *sgl;
 1372         uint32_t wr_hi, wr_lo, sgl_flits; 
 1373         bus_dma_segment_t segs[TX_MAX_SEGS];
 1374 
 1375         struct tx_desc *txd;
 1376                 
 1377         pi = qs->port;
 1378         sc = pi->adapter;
 1379         txq = &qs->txq[TXQ_ETH];
 1380         txd = &txq->desc[txq->pidx];
 1381         txsd = &txq->sdesc[txq->pidx];
 1382         sgl = txq->txq_sgl;
 1383 
 1384         prefetch(txd);
 1385         m0 = *m;
 1386 
 1387         mtx_assert(&qs->lock, MA_OWNED);
 1388         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1389         KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
 1390         
 1391         if  (m0->m_nextpkt == NULL && m0->m_next != NULL &&
 1392             m0->m_pkthdr.csum_flags & (CSUM_TSO))
 1393                 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
 1394 
 1395         if (m0->m_nextpkt != NULL) {
 1396                 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
 1397                 ndesc = 1;
 1398                 mlen = 0;
 1399         } else {
 1400                 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
 1401                     &m0, segs, &nsegs))) {
 1402                         if (cxgb_debug)
 1403                                 printf("failed ... err=%d\n", err);
 1404                         return (err);
 1405                 }
 1406                 mlen = m0->m_pkthdr.len;
 1407                 ndesc = calc_tx_descs(m0, nsegs);
 1408         }
 1409         txq_prod(txq, ndesc, &txqs);
 1410 
 1411         KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
 1412         txsd->m = m0;
 1413 
 1414         if (m0->m_nextpkt != NULL) {
 1415                 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
 1416                 int i, fidx;
 1417 
 1418                 if (nsegs > 7)
 1419                         panic("trying to coalesce %d packets in to one WR", nsegs);
 1420                 txq->txq_coalesced += nsegs;
 1421                 wrp = (struct work_request_hdr *)txd;
 1422                 flits = nsegs*2 + 1;
 1423 
 1424                 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
 1425                         struct cpl_tx_pkt_batch_entry *cbe;
 1426                         uint64_t flit;
 1427                         uint32_t *hflit = (uint32_t *)&flit;
 1428                         int cflags = m0->m_pkthdr.csum_flags;
 1429 
 1430                         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1431                         GET_VTAG(cntrl, m0);
 1432                         cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1433                         if (__predict_false(!(cflags & CSUM_IP)))
 1434                                 cntrl |= F_TXPKT_IPCSUM_DIS;
 1435                         if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP |
 1436                             CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
 1437                                 cntrl |= F_TXPKT_L4CSUM_DIS;
 1438 
 1439                         hflit[0] = htonl(cntrl);
 1440                         hflit[1] = htonl(segs[i].ds_len | 0x80000000);
 1441                         flit |= htobe64(1 << 24);
 1442                         cbe = &cpl_batch->pkt_entry[i];
 1443                         cbe->cntrl = hflit[0];
 1444                         cbe->len = hflit[1];
 1445                         cbe->addr = htobe64(segs[i].ds_addr);
 1446                 }
 1447 
 1448                 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1449                     V_WR_SGLSFLT(flits)) |
 1450                     htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1451                 wr_lo = htonl(V_WR_LEN(flits) |
 1452                     V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
 1453                 set_wr_hdr(wrp, wr_hi, wr_lo);
 1454                 wmb();
 1455                 ETHER_BPF_MTAP(pi->ifp, m0);
 1456                 wr_gen2(txd, txqs.gen);
 1457                 check_ring_tx_db(sc, txq, 0);
 1458                 return (0);             
 1459         } else if (tso_info) {
 1460                 uint16_t eth_type;
 1461                 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
 1462                 struct ether_header *eh;
 1463                 void *l3hdr;
 1464                 struct tcphdr *tcp;
 1465 
 1466                 txd->flit[2] = 0;
 1467                 GET_VTAG(cntrl, m0);
 1468                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
 1469                 hdr->cntrl = htonl(cntrl);
 1470                 hdr->len = htonl(mlen | 0x80000000);
 1471 
 1472                 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
 1473                         printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x",
 1474                             m0, mlen, m0->m_pkthdr.tso_segsz,
 1475                             (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags);
 1476                         panic("tx tso packet too small");
 1477                 }
 1478 
 1479                 /* Make sure that ether, ip, tcp headers are all in m0 */
 1480                 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
 1481                         m0 = m_pullup(m0, TCPPKTHDRSIZE);
 1482                         if (__predict_false(m0 == NULL)) {
 1483                                 /* XXX panic probably an overreaction */
 1484                                 panic("couldn't fit header into mbuf");
 1485                         }
 1486                 }
 1487 
 1488                 eh = mtod(m0, struct ether_header *);
 1489                 eth_type = eh->ether_type;
 1490                 if (eth_type == htons(ETHERTYPE_VLAN)) {
 1491                         struct ether_vlan_header *evh = (void *)eh;
 1492 
 1493                         tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN);
 1494                         l3hdr = evh + 1;
 1495                         eth_type = evh->evl_proto;
 1496                 } else {
 1497                         tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II);
 1498                         l3hdr = eh + 1;
 1499                 }
 1500 
 1501                 if (eth_type == htons(ETHERTYPE_IP)) {
 1502                         struct ip *ip = l3hdr;
 1503 
 1504                         tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl);
 1505                         tcp = (struct tcphdr *)(ip + 1);
 1506                 } else if (eth_type == htons(ETHERTYPE_IPV6)) {
 1507                         struct ip6_hdr *ip6 = l3hdr;
 1508 
 1509                         KASSERT(ip6->ip6_nxt == IPPROTO_TCP,
 1510                             ("%s: CSUM_TSO with ip6_nxt %d",
 1511                             __func__, ip6->ip6_nxt));
 1512 
 1513                         tso_info |= F_LSO_IPV6;
 1514                         tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2);
 1515                         tcp = (struct tcphdr *)(ip6 + 1);
 1516                 } else
 1517                         panic("%s: CSUM_TSO but neither ip nor ip6", __func__);
 1518 
 1519                 tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off);
 1520                 hdr->lso_info = htonl(tso_info);
 1521 
 1522                 if (__predict_false(mlen <= PIO_LEN)) {
 1523                         /*
 1524                          * pkt not undersized but fits in PIO_LEN
 1525                          * Indicates a TSO bug at the higher levels.
 1526                          */
 1527                         txsd->m = NULL;
 1528                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
 1529                         flits = (mlen + 7) / 8 + 3;
 1530                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1531                                           V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1532                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1533                         wr_lo = htonl(V_WR_LEN(flits) |
 1534                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1535                         set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
 1536                         wmb();
 1537                         ETHER_BPF_MTAP(pi->ifp, m0);
 1538                         wr_gen2(txd, txqs.gen);
 1539                         check_ring_tx_db(sc, txq, 0);
 1540                         m_freem(m0);
 1541                         return (0);
 1542                 }
 1543                 flits = 3;      
 1544         } else {
 1545                 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
 1546                 
 1547                 GET_VTAG(cntrl, m0);
 1548                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1549                 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
 1550                         cntrl |= F_TXPKT_IPCSUM_DIS;
 1551                 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP |
 1552                     CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
 1553                         cntrl |= F_TXPKT_L4CSUM_DIS;
 1554                 cpl->cntrl = htonl(cntrl);
 1555                 cpl->len = htonl(mlen | 0x80000000);
 1556 
 1557                 if (mlen <= PIO_LEN) {
 1558                         txsd->m = NULL;
 1559                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
 1560                         flits = (mlen + 7) / 8 + 2;
 1561                         
 1562                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1563                             V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1564                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1565                         wr_lo = htonl(V_WR_LEN(flits) |
 1566                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1567                         set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
 1568                         wmb();
 1569                         ETHER_BPF_MTAP(pi->ifp, m0);
 1570                         wr_gen2(txd, txqs.gen);
 1571                         check_ring_tx_db(sc, txq, 0);
 1572                         m_freem(m0);
 1573                         return (0);
 1574                 }
 1575                 flits = 2;
 1576         }
 1577         wrp = (struct work_request_hdr *)txd;
 1578         sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
 1579         make_sgl(sgp, segs, nsegs);
 1580 
 1581         sgl_flits = sgl_len(nsegs);
 1582 
 1583         ETHER_BPF_MTAP(pi->ifp, m0);
 1584 
 1585         KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
 1586         wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1587         wr_lo = htonl(V_WR_TID(txq->token));
 1588         write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
 1589             sgl_flits, wr_hi, wr_lo);
 1590         check_ring_tx_db(sc, txq, 0);
 1591 
 1592         return (0);
 1593 }
 1594 
 1595 void
 1596 cxgb_tx_watchdog(void *arg)
 1597 {
 1598         struct sge_qset *qs = arg;
 1599         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1600 
 1601         if (qs->coalescing != 0 &&
 1602             (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
 1603             TXQ_RING_EMPTY(qs))
 1604                 qs->coalescing = 0; 
 1605         else if (qs->coalescing == 0 &&
 1606             (txq->in_use >= cxgb_tx_coalesce_enable_start))
 1607                 qs->coalescing = 1;
 1608         if (TXQ_TRYLOCK(qs)) {
 1609                 qs->qs_flags |= QS_FLUSHING;
 1610                 cxgb_start_locked(qs);
 1611                 qs->qs_flags &= ~QS_FLUSHING;
 1612                 TXQ_UNLOCK(qs);
 1613         }
 1614         if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
 1615                 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
 1616                     qs, txq->txq_watchdog.c_cpu);
 1617 }
 1618 
 1619 static void
 1620 cxgb_tx_timeout(void *arg)
 1621 {
 1622         struct sge_qset *qs = arg;
 1623         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1624 
 1625         if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
 1626                 qs->coalescing = 1;     
 1627         if (TXQ_TRYLOCK(qs)) {
 1628                 qs->qs_flags |= QS_TIMEOUT;
 1629                 cxgb_start_locked(qs);
 1630                 qs->qs_flags &= ~QS_TIMEOUT;
 1631                 TXQ_UNLOCK(qs);
 1632         }
 1633 }
 1634 
 1635 static void
 1636 cxgb_start_locked(struct sge_qset *qs)
 1637 {
 1638         struct mbuf *m_head = NULL;
 1639         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1640         struct port_info *pi = qs->port;
 1641         struct ifnet *ifp = pi->ifp;
 1642 
 1643         if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
 1644                 reclaim_completed_tx(qs, 0, TXQ_ETH);
 1645 
 1646         if (!pi->link_config.link_ok) {
 1647                 TXQ_RING_FLUSH(qs);
 1648                 return;
 1649         }
 1650         TXQ_LOCK_ASSERT(qs);
 1651         while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
 1652             pi->link_config.link_ok) {
 1653                 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1654 
 1655                 if (txq->size - txq->in_use <= TX_MAX_DESC)
 1656                         break;
 1657 
 1658                 if ((m_head = cxgb_dequeue(qs)) == NULL)
 1659                         break;
 1660                 /*
 1661                  *  Encapsulation can modify our pointer, and or make it
 1662                  *  NULL on failure.  In that event, we can't requeue.
 1663                  */
 1664                 if (t3_encap(qs, &m_head) || m_head == NULL)
 1665                         break;
 1666 
 1667                 m_head = NULL;
 1668         }
 1669 
 1670         if (txq->db_pending)
 1671                 check_ring_tx_db(pi->adapter, txq, 1);
 1672 
 1673         if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
 1674             pi->link_config.link_ok)
 1675                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1676                     qs, txq->txq_timer.c_cpu);
 1677         if (m_head != NULL)
 1678                 m_freem(m_head);
 1679 }
 1680 
 1681 static int
 1682 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
 1683 {
 1684         struct port_info *pi = qs->port;
 1685         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1686         struct buf_ring *br = txq->txq_mr;
 1687         int error, avail;
 1688 
 1689         avail = txq->size - txq->in_use;
 1690         TXQ_LOCK_ASSERT(qs);
 1691 
 1692         /*
 1693          * We can only do a direct transmit if the following are true:
 1694          * - we aren't coalescing (ring < 3/4 full)
 1695          * - the link is up -- checked in caller
 1696          * - there are no packets enqueued already
 1697          * - there is space in hardware transmit queue 
 1698          */
 1699         if (check_pkt_coalesce(qs) == 0 &&
 1700             !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
 1701                 if (t3_encap(qs, &m)) {
 1702                         if (m != NULL &&
 1703                             (error = drbr_enqueue(ifp, br, m)) != 0) 
 1704                                 return (error);
 1705                 } else {
 1706                         if (txq->db_pending)
 1707                                 check_ring_tx_db(pi->adapter, txq, 1);
 1708 
 1709                         /*
 1710                          * We've bypassed the buf ring so we need to update
 1711                          * the stats directly
 1712                          */
 1713                         txq->txq_direct_packets++;
 1714                         txq->txq_direct_bytes += m->m_pkthdr.len;
 1715                 }
 1716         } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
 1717                 return (error);
 1718 
 1719         reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1720         if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
 1721             (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
 1722                 cxgb_start_locked(qs);
 1723         else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
 1724                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1725                     qs, txq->txq_timer.c_cpu);
 1726         return (0);
 1727 }
 1728 
 1729 int
 1730 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
 1731 {
 1732         struct sge_qset *qs;
 1733         struct port_info *pi = ifp->if_softc;
 1734         int error, qidx = pi->first_qset;
 1735 
 1736         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
 1737             ||(!pi->link_config.link_ok)) {
 1738                 m_freem(m);
 1739                 return (0);
 1740         }
 1741 
 1742         /* check if flowid is set */
 1743         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)       
 1744                 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
 1745 
 1746         qs = &pi->adapter->sge.qs[qidx];
 1747         
 1748         if (TXQ_TRYLOCK(qs)) {
 1749                 /* XXX running */
 1750                 error = cxgb_transmit_locked(ifp, qs, m);
 1751                 TXQ_UNLOCK(qs);
 1752         } else
 1753                 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
 1754         return (error);
 1755 }
 1756 
 1757 void
 1758 cxgb_qflush(struct ifnet *ifp)
 1759 {
 1760         /*
 1761          * flush any enqueued mbufs in the buf_rings
 1762          * and in the transmit queues
 1763          * no-op for now
 1764          */
 1765         return;
 1766 }
 1767 
 1768 /**
 1769  *      write_imm - write a packet into a Tx descriptor as immediate data
 1770  *      @d: the Tx descriptor to write
 1771  *      @m: the packet
 1772  *      @len: the length of packet data to write as immediate data
 1773  *      @gen: the generation bit value to write
 1774  *
 1775  *      Writes a packet as immediate data into a Tx descriptor.  The packet
 1776  *      contains a work request at its beginning.  We must write the packet
 1777  *      carefully so the SGE doesn't read accidentally before it's written in
 1778  *      its entirety.
 1779  */
 1780 static __inline void
 1781 write_imm(struct tx_desc *d, caddr_t src,
 1782           unsigned int len, unsigned int gen)
 1783 {
 1784         struct work_request_hdr *from = (struct work_request_hdr *)src;
 1785         struct work_request_hdr *to = (struct work_request_hdr *)d;
 1786         uint32_t wr_hi, wr_lo;
 1787 
 1788         KASSERT(len <= WR_LEN && len >= sizeof(*from),
 1789             ("%s: invalid len %d", __func__, len));
 1790         
 1791         memcpy(&to[1], &from[1], len - sizeof(*from));
 1792         wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
 1793             V_WR_BCNTLFLT(len & 7));
 1794         wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8));
 1795         set_wr_hdr(to, wr_hi, wr_lo);
 1796         wmb();
 1797         wr_gen2(d, gen);
 1798 }
 1799 
 1800 /**
 1801  *      check_desc_avail - check descriptor availability on a send queue
 1802  *      @adap: the adapter
 1803  *      @q: the TX queue
 1804  *      @m: the packet needing the descriptors
 1805  *      @ndesc: the number of Tx descriptors needed
 1806  *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
 1807  *
 1808  *      Checks if the requested number of Tx descriptors is available on an
 1809  *      SGE send queue.  If the queue is already suspended or not enough
 1810  *      descriptors are available the packet is queued for later transmission.
 1811  *      Must be called with the Tx queue locked.
 1812  *
 1813  *      Returns 0 if enough descriptors are available, 1 if there aren't
 1814  *      enough descriptors and the packet has been queued, and 2 if the caller
 1815  *      needs to retry because there weren't enough descriptors at the
 1816  *      beginning of the call but some freed up in the mean time.
 1817  */
 1818 static __inline int
 1819 check_desc_avail(adapter_t *adap, struct sge_txq *q,
 1820                  struct mbuf *m, unsigned int ndesc,
 1821                  unsigned int qid)
 1822 {
 1823         /* 
 1824          * XXX We currently only use this for checking the control queue
 1825          * the control queue is only used for binding qsets which happens
 1826          * at init time so we are guaranteed enough descriptors
 1827          */
 1828         if (__predict_false(!mbufq_empty(&q->sendq))) {
 1829 addq_exit:      mbufq_tail(&q->sendq, m);
 1830                 return 1;
 1831         }
 1832         if (__predict_false(q->size - q->in_use < ndesc)) {
 1833 
 1834                 struct sge_qset *qs = txq_to_qset(q, qid);
 1835 
 1836                 setbit(&qs->txq_stopped, qid);
 1837                 if (should_restart_tx(q) &&
 1838                     test_and_clear_bit(qid, &qs->txq_stopped))
 1839                         return 2;
 1840 
 1841                 q->stops++;
 1842                 goto addq_exit;
 1843         }
 1844         return 0;
 1845 }
 1846 
 1847 
 1848 /**
 1849  *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
 1850  *      @q: the SGE control Tx queue
 1851  *
 1852  *      This is a variant of reclaim_completed_tx() that is used for Tx queues
 1853  *      that send only immediate data (presently just the control queues) and
 1854  *      thus do not have any mbufs
 1855  */
 1856 static __inline void
 1857 reclaim_completed_tx_imm(struct sge_txq *q)
 1858 {
 1859         unsigned int reclaim = q->processed - q->cleaned;
 1860 
 1861         q->in_use -= reclaim;
 1862         q->cleaned += reclaim;
 1863 }
 1864 
 1865 /**
 1866  *      ctrl_xmit - send a packet through an SGE control Tx queue
 1867  *      @adap: the adapter
 1868  *      @q: the control queue
 1869  *      @m: the packet
 1870  *
 1871  *      Send a packet through an SGE control Tx queue.  Packets sent through
 1872  *      a control queue must fit entirely as immediate data in a single Tx
 1873  *      descriptor and have no page fragments.
 1874  */
 1875 static int
 1876 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 1877 {
 1878         int ret;
 1879         struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
 1880         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1881         
 1882         KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__));
 1883 
 1884         wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
 1885         wrp->wrh_lo = htonl(V_WR_TID(q->token));
 1886 
 1887         TXQ_LOCK(qs);
 1888 again:  reclaim_completed_tx_imm(q);
 1889 
 1890         ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
 1891         if (__predict_false(ret)) {
 1892                 if (ret == 1) {
 1893                         TXQ_UNLOCK(qs);
 1894                         return (ENOSPC);
 1895                 }
 1896                 goto again;
 1897         }
 1898         write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
 1899         
 1900         q->in_use++;
 1901         if (++q->pidx >= q->size) {
 1902                 q->pidx = 0;
 1903                 q->gen ^= 1;
 1904         }
 1905         TXQ_UNLOCK(qs);
 1906         wmb();
 1907         t3_write_reg(adap, A_SG_KDOORBELL,
 1908             F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1909 
 1910         m_free(m);
 1911         return (0);
 1912 }
 1913 
 1914 
 1915 /**
 1916  *      restart_ctrlq - restart a suspended control queue
 1917  *      @qs: the queue set cotaining the control queue
 1918  *
 1919  *      Resumes transmission on a suspended Tx control queue.
 1920  */
 1921 static void
 1922 restart_ctrlq(void *data, int npending)
 1923 {
 1924         struct mbuf *m;
 1925         struct sge_qset *qs = (struct sge_qset *)data;
 1926         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1927         adapter_t *adap = qs->port->adapter;
 1928 
 1929         TXQ_LOCK(qs);
 1930 again:  reclaim_completed_tx_imm(q);
 1931 
 1932         while (q->in_use < q->size &&
 1933                (m = mbufq_dequeue(&q->sendq)) != NULL) {
 1934 
 1935                 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
 1936                 m_free(m);
 1937 
 1938                 if (++q->pidx >= q->size) {
 1939                         q->pidx = 0;
 1940                         q->gen ^= 1;
 1941                 }
 1942                 q->in_use++;
 1943         }
 1944         if (!mbufq_empty(&q->sendq)) {
 1945                 setbit(&qs->txq_stopped, TXQ_CTRL);
 1946 
 1947                 if (should_restart_tx(q) &&
 1948                     test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
 1949                         goto again;
 1950                 q->stops++;
 1951         }
 1952         TXQ_UNLOCK(qs);
 1953         t3_write_reg(adap, A_SG_KDOORBELL,
 1954                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1955 }
 1956 
 1957 
 1958 /*
 1959  * Send a management message through control queue 0
 1960  */
 1961 int
 1962 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
 1963 {
 1964         return ctrl_xmit(adap, &adap->sge.qs[0], m);
 1965 }
 1966 
 1967 /**
 1968  *      free_qset - free the resources of an SGE queue set
 1969  *      @sc: the controller owning the queue set
 1970  *      @q: the queue set
 1971  *
 1972  *      Release the HW and SW resources associated with an SGE queue set, such
 1973  *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 1974  *      queue set must be quiesced prior to calling this.
 1975  */
 1976 static void
 1977 t3_free_qset(adapter_t *sc, struct sge_qset *q)
 1978 {
 1979         int i;
 1980         
 1981         reclaim_completed_tx(q, 0, TXQ_ETH);
 1982         if (q->txq[TXQ_ETH].txq_mr != NULL) 
 1983                 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
 1984         if (q->txq[TXQ_ETH].txq_ifq != NULL) {
 1985                 ifq_delete(q->txq[TXQ_ETH].txq_ifq);
 1986                 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
 1987         }
 1988 
 1989         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 1990                 if (q->fl[i].desc) {
 1991                         mtx_lock_spin(&sc->sge.reg_lock);
 1992                         t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
 1993                         mtx_unlock_spin(&sc->sge.reg_lock);
 1994                         bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
 1995                         bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
 1996                                         q->fl[i].desc_map);
 1997                         bus_dma_tag_destroy(q->fl[i].desc_tag);
 1998                         bus_dma_tag_destroy(q->fl[i].entry_tag);
 1999                 }
 2000                 if (q->fl[i].sdesc) {
 2001                         free_rx_bufs(sc, &q->fl[i]);
 2002                         free(q->fl[i].sdesc, M_DEVBUF);
 2003                 }
 2004         }
 2005 
 2006         mtx_unlock(&q->lock);
 2007         MTX_DESTROY(&q->lock);
 2008         for (i = 0; i < SGE_TXQ_PER_SET; i++) {
 2009                 if (q->txq[i].desc) {
 2010                         mtx_lock_spin(&sc->sge.reg_lock);
 2011                         t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
 2012                         mtx_unlock_spin(&sc->sge.reg_lock);
 2013                         bus_dmamap_unload(q->txq[i].desc_tag,
 2014                                         q->txq[i].desc_map);
 2015                         bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
 2016                                         q->txq[i].desc_map);
 2017                         bus_dma_tag_destroy(q->txq[i].desc_tag);
 2018                         bus_dma_tag_destroy(q->txq[i].entry_tag);
 2019                 }
 2020                 if (q->txq[i].sdesc) {
 2021                         free(q->txq[i].sdesc, M_DEVBUF);
 2022                 }
 2023         }
 2024 
 2025         if (q->rspq.desc) {
 2026                 mtx_lock_spin(&sc->sge.reg_lock);
 2027                 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
 2028                 mtx_unlock_spin(&sc->sge.reg_lock);
 2029                 
 2030                 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
 2031                 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
 2032                                 q->rspq.desc_map);
 2033                 bus_dma_tag_destroy(q->rspq.desc_tag);
 2034                 MTX_DESTROY(&q->rspq.lock);
 2035         }
 2036 
 2037 #if defined(INET6) || defined(INET)
 2038         tcp_lro_free(&q->lro.ctrl);
 2039 #endif
 2040 
 2041         bzero(q, sizeof(*q));
 2042 }
 2043 
 2044 /**
 2045  *      t3_free_sge_resources - free SGE resources
 2046  *      @sc: the adapter softc
 2047  *
 2048  *      Frees resources used by the SGE queue sets.
 2049  */
 2050 void
 2051 t3_free_sge_resources(adapter_t *sc, int nqsets)
 2052 {
 2053         int i;
 2054 
 2055         for (i = 0; i < nqsets; ++i) {
 2056                 TXQ_LOCK(&sc->sge.qs[i]);
 2057                 t3_free_qset(sc, &sc->sge.qs[i]);
 2058         }
 2059 }
 2060 
 2061 /**
 2062  *      t3_sge_start - enable SGE
 2063  *      @sc: the controller softc
 2064  *
 2065  *      Enables the SGE for DMAs.  This is the last step in starting packet
 2066  *      transfers.
 2067  */
 2068 void
 2069 t3_sge_start(adapter_t *sc)
 2070 {
 2071         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
 2072 }
 2073 
 2074 /**
 2075  *      t3_sge_stop - disable SGE operation
 2076  *      @sc: the adapter
 2077  *
 2078  *      Disables the DMA engine.  This can be called in emeregencies (e.g.,
 2079  *      from error interrupts) or from normal process context.  In the latter
 2080  *      case it also disables any pending queue restart tasklets.  Note that
 2081  *      if it is called in interrupt context it cannot disable the restart
 2082  *      tasklets as it cannot wait, however the tasklets will have no effect
 2083  *      since the doorbells are disabled and the driver will call this again
 2084  *      later from process context, at which time the tasklets will be stopped
 2085  *      if they are still running.
 2086  */
 2087 void
 2088 t3_sge_stop(adapter_t *sc)
 2089 {
 2090         int i, nqsets;
 2091         
 2092         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
 2093 
 2094         if (sc->tq == NULL)
 2095                 return;
 2096         
 2097         for (nqsets = i = 0; i < (sc)->params.nports; i++) 
 2098                 nqsets += sc->port[i].nqsets;
 2099 #ifdef notyet
 2100         /*
 2101          * 
 2102          * XXX
 2103          */
 2104         for (i = 0; i < nqsets; ++i) {
 2105                 struct sge_qset *qs = &sc->sge.qs[i];
 2106                 
 2107                 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2108                 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2109         }
 2110 #endif
 2111 }
 2112 
 2113 /**
 2114  *      t3_free_tx_desc - reclaims Tx descriptors and their buffers
 2115  *      @adapter: the adapter
 2116  *      @q: the Tx queue to reclaim descriptors from
 2117  *      @reclaimable: the number of descriptors to reclaim
 2118  *      @m_vec_size: maximum number of buffers to reclaim
 2119  *      @desc_reclaimed: returns the number of descriptors reclaimed
 2120  *
 2121  *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 2122  *      Tx buffers.  Called with the Tx queue lock held.
 2123  *
 2124  *      Returns number of buffers of reclaimed   
 2125  */
 2126 void
 2127 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
 2128 {
 2129         struct tx_sw_desc *txsd;
 2130         unsigned int cidx, mask;
 2131         struct sge_txq *q = &qs->txq[queue];
 2132 
 2133 #ifdef T3_TRACE
 2134         T3_TRACE2(sc->tb[q->cntxt_id & 7],
 2135                   "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
 2136 #endif
 2137         cidx = q->cidx;
 2138         mask = q->size - 1;
 2139         txsd = &q->sdesc[cidx];
 2140 
 2141         mtx_assert(&qs->lock, MA_OWNED);
 2142         while (reclaimable--) {
 2143                 prefetch(q->sdesc[(cidx + 1) & mask].m);
 2144                 prefetch(q->sdesc[(cidx + 2) & mask].m);
 2145 
 2146                 if (txsd->m != NULL) {
 2147                         if (txsd->flags & TX_SW_DESC_MAPPED) {
 2148                                 bus_dmamap_unload(q->entry_tag, txsd->map);
 2149                                 txsd->flags &= ~TX_SW_DESC_MAPPED;
 2150                         }
 2151                         m_freem_list(txsd->m);
 2152                         txsd->m = NULL;
 2153                 } else
 2154                         q->txq_skipped++;
 2155                 
 2156                 ++txsd;
 2157                 if (++cidx == q->size) {
 2158                         cidx = 0;
 2159                         txsd = q->sdesc;
 2160                 }
 2161         }
 2162         q->cidx = cidx;
 2163 
 2164 }
 2165 
 2166 /**
 2167  *      is_new_response - check if a response is newly written
 2168  *      @r: the response descriptor
 2169  *      @q: the response queue
 2170  *
 2171  *      Returns true if a response descriptor contains a yet unprocessed
 2172  *      response.
 2173  */
 2174 static __inline int
 2175 is_new_response(const struct rsp_desc *r,
 2176     const struct sge_rspq *q)
 2177 {
 2178         return (r->intr_gen & F_RSPD_GEN2) == q->gen;
 2179 }
 2180 
 2181 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
 2182 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
 2183                         V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
 2184                         V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
 2185                         V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
 2186 
 2187 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
 2188 #define NOMEM_INTR_DELAY 2500
 2189 
 2190 #ifdef TCP_OFFLOAD
 2191 /**
 2192  *      write_ofld_wr - write an offload work request
 2193  *      @adap: the adapter
 2194  *      @m: the packet to send
 2195  *      @q: the Tx queue
 2196  *      @pidx: index of the first Tx descriptor to write
 2197  *      @gen: the generation value to use
 2198  *      @ndesc: number of descriptors the packet will occupy
 2199  *
 2200  *      Write an offload work request to send the supplied packet.  The packet
 2201  *      data already carry the work request with most fields populated.
 2202  */
 2203 static void
 2204 write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q,
 2205     unsigned int pidx, unsigned int gen, unsigned int ndesc)
 2206 {
 2207         unsigned int sgl_flits, flits;
 2208         int i, idx, nsegs, wrlen;
 2209         struct work_request_hdr *from;
 2210         struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1];
 2211         struct tx_desc *d = &q->desc[pidx];
 2212         struct txq_state txqs;
 2213         struct sglist_seg *segs;
 2214         struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2215         struct sglist *sgl;
 2216 
 2217         from = (void *)(oh + 1);        /* Start of WR within mbuf */
 2218         wrlen = m->m_len - sizeof(*oh);
 2219 
 2220         if (!(oh->flags & F_HDR_SGL)) {
 2221                 write_imm(d, (caddr_t)from, wrlen, gen);
 2222 
 2223                 /*
 2224                  * mbuf with "real" immediate tx data will be enqueue_wr'd by
 2225                  * t3_push_frames and freed in wr_ack.  Others, like those sent
 2226                  * down by close_conn, t3_send_reset, etc. should be freed here.
 2227                  */
 2228                 if (!(oh->flags & F_HDR_DF))
 2229                         m_free(m);
 2230                 return;
 2231         }
 2232 
 2233         memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from));
 2234 
 2235         sgl = oh->sgl;
 2236         flits = wrlen / 8;
 2237         sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl;
 2238 
 2239         nsegs = sgl->sg_nseg;
 2240         segs = sgl->sg_segs;
 2241         for (idx = 0, i = 0; i < nsegs; i++) {
 2242                 KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__));
 2243                 if (i && idx == 0) 
 2244                         ++sgp;
 2245                 sgp->len[idx] = htobe32(segs[i].ss_len);
 2246                 sgp->addr[idx] = htobe64(segs[i].ss_paddr);
 2247                 idx ^= 1;
 2248         }
 2249         if (idx) {
 2250                 sgp->len[idx] = 0;
 2251                 sgp->addr[idx] = 0;
 2252         }
 2253 
 2254         sgl_flits = sgl_len(nsegs);
 2255         txqs.gen = gen;
 2256         txqs.pidx = pidx;
 2257         txqs.compl = 0;
 2258 
 2259         write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits,
 2260             from->wrh_hi, from->wrh_lo);
 2261 }
 2262 
 2263 /**
 2264  *      ofld_xmit - send a packet through an offload queue
 2265  *      @adap: the adapter
 2266  *      @q: the Tx offload queue
 2267  *      @m: the packet
 2268  *
 2269  *      Send an offload packet through an SGE offload queue.
 2270  */
 2271 static int
 2272 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 2273 {
 2274         int ret;
 2275         unsigned int ndesc;
 2276         unsigned int pidx, gen;
 2277         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2278         struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2279 
 2280         ndesc = G_HDR_NDESC(oh->flags);
 2281 
 2282         TXQ_LOCK(qs);
 2283 again:  reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2284         ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
 2285         if (__predict_false(ret)) {
 2286                 if (ret == 1) {
 2287                         TXQ_UNLOCK(qs);
 2288                         return (EINTR);
 2289                 }
 2290                 goto again;
 2291         }
 2292 
 2293         gen = q->gen;
 2294         q->in_use += ndesc;
 2295         pidx = q->pidx;
 2296         q->pidx += ndesc;
 2297         if (q->pidx >= q->size) {
 2298                 q->pidx -= q->size;
 2299                 q->gen ^= 1;
 2300         }
 2301 
 2302         write_ofld_wr(adap, m, q, pidx, gen, ndesc);
 2303         check_ring_tx_db(adap, q, 1);
 2304         TXQ_UNLOCK(qs);
 2305 
 2306         return (0);
 2307 }
 2308 
 2309 /**
 2310  *      restart_offloadq - restart a suspended offload queue
 2311  *      @qs: the queue set cotaining the offload queue
 2312  *
 2313  *      Resumes transmission on a suspended Tx offload queue.
 2314  */
 2315 static void
 2316 restart_offloadq(void *data, int npending)
 2317 {
 2318         struct mbuf *m;
 2319         struct sge_qset *qs = data;
 2320         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2321         adapter_t *adap = qs->port->adapter;
 2322         int cleaned;
 2323                 
 2324         TXQ_LOCK(qs);
 2325 again:  cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2326 
 2327         while ((m = mbufq_peek(&q->sendq)) != NULL) {
 2328                 unsigned int gen, pidx;
 2329                 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2330                 unsigned int ndesc = G_HDR_NDESC(oh->flags);
 2331 
 2332                 if (__predict_false(q->size - q->in_use < ndesc)) {
 2333                         setbit(&qs->txq_stopped, TXQ_OFLD);
 2334                         if (should_restart_tx(q) &&
 2335                             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
 2336                                 goto again;
 2337                         q->stops++;
 2338                         break;
 2339                 }
 2340 
 2341                 gen = q->gen;
 2342                 q->in_use += ndesc;
 2343                 pidx = q->pidx;
 2344                 q->pidx += ndesc;
 2345                 if (q->pidx >= q->size) {
 2346                         q->pidx -= q->size;
 2347                         q->gen ^= 1;
 2348                 }
 2349                 
 2350                 (void)mbufq_dequeue(&q->sendq);
 2351                 TXQ_UNLOCK(qs);
 2352                 write_ofld_wr(adap, m, q, pidx, gen, ndesc);
 2353                 TXQ_LOCK(qs);
 2354         }
 2355 #if USE_GTS
 2356         set_bit(TXQ_RUNNING, &q->flags);
 2357         set_bit(TXQ_LAST_PKT_DB, &q->flags);
 2358 #endif
 2359         TXQ_UNLOCK(qs);
 2360         wmb();
 2361         t3_write_reg(adap, A_SG_KDOORBELL,
 2362                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 2363 }
 2364 
 2365 /**
 2366  *      t3_offload_tx - send an offload packet
 2367  *      @m: the packet
 2368  *
 2369  *      Sends an offload packet.  We use the packet priority to select the
 2370  *      appropriate Tx queue as follows: bit 0 indicates whether the packet
 2371  *      should be sent as regular or control, bits 1-3 select the queue set.
 2372  */
 2373 int
 2374 t3_offload_tx(struct adapter *sc, struct mbuf *m)
 2375 {
 2376         struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2377         struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)];
 2378 
 2379         if (oh->flags & F_HDR_CTRL) {
 2380                 m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */
 2381                 return (ctrl_xmit(sc, qs, m));
 2382         } else
 2383                 return (ofld_xmit(sc, qs, m));
 2384 }
 2385 #endif
 2386 
 2387 static void
 2388 restart_tx(struct sge_qset *qs)
 2389 {
 2390         struct adapter *sc = qs->port->adapter;
 2391 
 2392         if (isset(&qs->txq_stopped, TXQ_OFLD) &&
 2393             should_restart_tx(&qs->txq[TXQ_OFLD]) &&
 2394             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
 2395                 qs->txq[TXQ_OFLD].restarts++;
 2396                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2397         }
 2398 
 2399         if (isset(&qs->txq_stopped, TXQ_CTRL) &&
 2400             should_restart_tx(&qs->txq[TXQ_CTRL]) &&
 2401             test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
 2402                 qs->txq[TXQ_CTRL].restarts++;
 2403                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2404         }
 2405 }
 2406 
 2407 /**
 2408  *      t3_sge_alloc_qset - initialize an SGE queue set
 2409  *      @sc: the controller softc
 2410  *      @id: the queue set id
 2411  *      @nports: how many Ethernet ports will be using this queue set
 2412  *      @irq_vec_idx: the IRQ vector index for response queue interrupts
 2413  *      @p: configuration parameters for this queue set
 2414  *      @ntxq: number of Tx queues for the queue set
 2415  *      @pi: port info for queue set
 2416  *
 2417  *      Allocate resources and initialize an SGE queue set.  A queue set
 2418  *      comprises a response queue, two Rx free-buffer queues, and up to 3
 2419  *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
 2420  *      queue, offload queue, and control queue.
 2421  */
 2422 int
 2423 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
 2424                   const struct qset_params *p, int ntxq, struct port_info *pi)
 2425 {
 2426         struct sge_qset *q = &sc->sge.qs[id];
 2427         int i, ret = 0;
 2428 
 2429         MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
 2430         q->port = pi;
 2431         q->adap = sc;
 2432 
 2433         if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
 2434             M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
 2435                 device_printf(sc->dev, "failed to allocate mbuf ring\n");
 2436                 goto err;
 2437         }
 2438         if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
 2439             M_NOWAIT | M_ZERO)) == NULL) {
 2440                 device_printf(sc->dev, "failed to allocate ifq\n");
 2441                 goto err;
 2442         }
 2443         ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);     
 2444         callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
 2445         callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
 2446         q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
 2447         q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
 2448 
 2449         init_qset_cntxt(q, id);
 2450         q->idx = id;
 2451         if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
 2452                     sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
 2453                     &q->fl[0].desc, &q->fl[0].sdesc,
 2454                     &q->fl[0].desc_tag, &q->fl[0].desc_map,
 2455                     sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
 2456                 printf("error %d from alloc ring fl0\n", ret);
 2457                 goto err;
 2458         }
 2459 
 2460         if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
 2461                     sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
 2462                     &q->fl[1].desc, &q->fl[1].sdesc,
 2463                     &q->fl[1].desc_tag, &q->fl[1].desc_map,
 2464                     sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
 2465                 printf("error %d from alloc ring fl1\n", ret);
 2466                 goto err;
 2467         }
 2468 
 2469         if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
 2470                     &q->rspq.phys_addr, &q->rspq.desc, NULL,
 2471                     &q->rspq.desc_tag, &q->rspq.desc_map,
 2472                     NULL, NULL)) != 0) {
 2473                 printf("error %d from alloc ring rspq\n", ret);
 2474                 goto err;
 2475         }
 2476 
 2477         snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
 2478             device_get_unit(sc->dev), irq_vec_idx);
 2479         MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
 2480 
 2481         for (i = 0; i < ntxq; ++i) {
 2482                 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
 2483 
 2484                 if ((ret = alloc_ring(sc, p->txq_size[i],
 2485                             sizeof(struct tx_desc), sz,
 2486                             &q->txq[i].phys_addr, &q->txq[i].desc,
 2487                             &q->txq[i].sdesc, &q->txq[i].desc_tag,
 2488                             &q->txq[i].desc_map,
 2489                             sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
 2490                         printf("error %d from alloc ring tx %i\n", ret, i);
 2491                         goto err;
 2492                 }
 2493                 mbufq_init(&q->txq[i].sendq);
 2494                 q->txq[i].gen = 1;
 2495                 q->txq[i].size = p->txq_size[i];
 2496         }
 2497 
 2498 #ifdef TCP_OFFLOAD
 2499         TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
 2500 #endif
 2501         TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
 2502         TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2503         TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2504 
 2505         q->fl[0].gen = q->fl[1].gen = 1;
 2506         q->fl[0].size = p->fl_size;
 2507         q->fl[1].size = p->jumbo_size;
 2508 
 2509         q->rspq.gen = 1;
 2510         q->rspq.cidx = 0;
 2511         q->rspq.size = p->rspq_size;
 2512 
 2513         q->txq[TXQ_ETH].stop_thres = nports *
 2514             flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
 2515 
 2516         q->fl[0].buf_size = MCLBYTES;
 2517         q->fl[0].zone = zone_pack;
 2518         q->fl[0].type = EXT_PACKET;
 2519 
 2520         if (p->jumbo_buf_size ==  MJUM16BYTES) {
 2521                 q->fl[1].zone = zone_jumbo16;
 2522                 q->fl[1].type = EXT_JUMBO16;
 2523         } else if (p->jumbo_buf_size ==  MJUM9BYTES) {
 2524                 q->fl[1].zone = zone_jumbo9;
 2525                 q->fl[1].type = EXT_JUMBO9;             
 2526         } else if (p->jumbo_buf_size ==  MJUMPAGESIZE) {
 2527                 q->fl[1].zone = zone_jumbop;
 2528                 q->fl[1].type = EXT_JUMBOP;
 2529         } else {
 2530                 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
 2531                 ret = EDOOFUS;
 2532                 goto err;
 2533         }
 2534         q->fl[1].buf_size = p->jumbo_buf_size;
 2535 
 2536         /* Allocate and setup the lro_ctrl structure */
 2537         q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
 2538 #if defined(INET6) || defined(INET)
 2539         ret = tcp_lro_init(&q->lro.ctrl);
 2540         if (ret) {
 2541                 printf("error %d from tcp_lro_init\n", ret);
 2542                 goto err;
 2543         }
 2544 #endif
 2545         q->lro.ctrl.ifp = pi->ifp;
 2546 
 2547         mtx_lock_spin(&sc->sge.reg_lock);
 2548         ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
 2549                                    q->rspq.phys_addr, q->rspq.size,
 2550                                    q->fl[0].buf_size, 1, 0);
 2551         if (ret) {
 2552                 printf("error %d from t3_sge_init_rspcntxt\n", ret);
 2553                 goto err_unlock;
 2554         }
 2555 
 2556         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2557                 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
 2558                                           q->fl[i].phys_addr, q->fl[i].size,
 2559                                           q->fl[i].buf_size, p->cong_thres, 1,
 2560                                           0);
 2561                 if (ret) {
 2562                         printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
 2563                         goto err_unlock;
 2564                 }
 2565         }
 2566 
 2567         ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
 2568                                  SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
 2569                                  q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
 2570                                  1, 0);
 2571         if (ret) {
 2572                 printf("error %d from t3_sge_init_ecntxt\n", ret);
 2573                 goto err_unlock;
 2574         }
 2575 
 2576         if (ntxq > 1) {
 2577                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
 2578                                          USE_GTS, SGE_CNTXT_OFLD, id,
 2579                                          q->txq[TXQ_OFLD].phys_addr,
 2580                                          q->txq[TXQ_OFLD].size, 0, 1, 0);
 2581                 if (ret) {
 2582                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2583                         goto err_unlock;
 2584                 }
 2585         }
 2586 
 2587         if (ntxq > 2) {
 2588                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
 2589                                          SGE_CNTXT_CTRL, id,
 2590                                          q->txq[TXQ_CTRL].phys_addr,
 2591                                          q->txq[TXQ_CTRL].size,
 2592                                          q->txq[TXQ_CTRL].token, 1, 0);
 2593                 if (ret) {
 2594                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2595                         goto err_unlock;
 2596                 }
 2597         }
 2598 
 2599         mtx_unlock_spin(&sc->sge.reg_lock);
 2600         t3_update_qset_coalesce(q, p);
 2601 
 2602         refill_fl(sc, &q->fl[0], q->fl[0].size);
 2603         refill_fl(sc, &q->fl[1], q->fl[1].size);
 2604         refill_rspq(sc, &q->rspq, q->rspq.size - 1);
 2605 
 2606         t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
 2607                      V_NEWTIMER(q->rspq.holdoff_tmr));
 2608 
 2609         return (0);
 2610 
 2611 err_unlock:
 2612         mtx_unlock_spin(&sc->sge.reg_lock);
 2613 err:    
 2614         TXQ_LOCK(q);
 2615         t3_free_qset(sc, q);
 2616 
 2617         return (ret);
 2618 }
 2619 
 2620 /*
 2621  * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
 2622  * ethernet data.  Hardware assistance with various checksums and any vlan tag
 2623  * will also be taken into account here.
 2624  */
 2625 void
 2626 t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad)
 2627 {
 2628         struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
 2629         struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
 2630         struct ifnet *ifp = pi->ifp;
 2631         
 2632         if (cpl->vlan_valid) {
 2633                 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
 2634                 m->m_flags |= M_VLANTAG;
 2635         } 
 2636 
 2637         m->m_pkthdr.rcvif = ifp;
 2638         /*
 2639          * adjust after conversion to mbuf chain
 2640          */
 2641         m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
 2642         m->m_len -= (sizeof(*cpl) + ethpad);
 2643         m->m_data += (sizeof(*cpl) + ethpad);
 2644 
 2645         if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) {
 2646                 struct ether_header *eh = mtod(m, void *);
 2647                 uint16_t eh_type;
 2648 
 2649                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 2650                         struct ether_vlan_header *evh = mtod(m, void *);
 2651 
 2652                         eh_type = evh->evl_proto;
 2653                 } else
 2654                         eh_type = eh->ether_type;
 2655 
 2656                 if (ifp->if_capenable & IFCAP_RXCSUM &&
 2657                     eh_type == htons(ETHERTYPE_IP)) {
 2658                         m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
 2659                             CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 2660                         m->m_pkthdr.csum_data = 0xffff;
 2661                 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
 2662                     eh_type == htons(ETHERTYPE_IPV6)) {
 2663                         m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
 2664                             CSUM_PSEUDO_HDR);
 2665                         m->m_pkthdr.csum_data = 0xffff;
 2666                 }
 2667         }
 2668 }
 2669 
 2670 /**
 2671  *      get_packet - return the next ingress packet buffer from a free list
 2672  *      @adap: the adapter that received the packet
 2673  *      @drop_thres: # of remaining buffers before we start dropping packets
 2674  *      @qs: the qset that the SGE free list holding the packet belongs to
 2675  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
 2676  *      @r: response descriptor 
 2677  *
 2678  *      Get the next packet from a free list and complete setup of the
 2679  *      sk_buff.  If the packet is small we make a copy and recycle the
 2680  *      original buffer, otherwise we use the original buffer itself.  If a
 2681  *      positive drop threshold is supplied packets are dropped and their
 2682  *      buffers recycled if (a) the number of remaining buffers is under the
 2683  *      threshold and the packet is too big to copy, or (b) the packet should
 2684  *      be copied but there is no memory for the copy.
 2685  */
 2686 static int
 2687 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
 2688     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
 2689 {
 2690 
 2691         unsigned int len_cq =  ntohl(r->len_cq);
 2692         struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
 2693         int mask, cidx = fl->cidx;
 2694         struct rx_sw_desc *sd = &fl->sdesc[cidx];
 2695         uint32_t len = G_RSPD_LEN(len_cq);
 2696         uint32_t flags = M_EXT;
 2697         uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
 2698         caddr_t cl;
 2699         struct mbuf *m;
 2700         int ret = 0;
 2701 
 2702         mask = fl->size - 1;
 2703         prefetch(fl->sdesc[(cidx + 1) & mask].m);
 2704         prefetch(fl->sdesc[(cidx + 2) & mask].m);
 2705         prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
 2706         prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 
 2707 
 2708         fl->credits--;
 2709         bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
 2710         
 2711         if (recycle_enable && len <= SGE_RX_COPY_THRES &&
 2712             sopeop == RSPQ_SOP_EOP) {
 2713                 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
 2714                         goto skip_recycle;
 2715                 cl = mtod(m, void *);
 2716                 memcpy(cl, sd->rxsd_cl, len);
 2717                 recycle_rx_buf(adap, fl, fl->cidx);
 2718                 m->m_pkthdr.len = m->m_len = len;
 2719                 m->m_flags = 0;
 2720                 mh->mh_head = mh->mh_tail = m;
 2721                 ret = 1;
 2722                 goto done;
 2723         } else {
 2724         skip_recycle:
 2725                 bus_dmamap_unload(fl->entry_tag, sd->map);
 2726                 cl = sd->rxsd_cl;
 2727                 m = sd->m;
 2728 
 2729                 if ((sopeop == RSPQ_SOP_EOP) ||
 2730                     (sopeop == RSPQ_SOP))
 2731                         flags |= M_PKTHDR;
 2732                 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags);
 2733                 if (fl->zone == zone_pack) {
 2734                         /*
 2735                          * restore clobbered data pointer
 2736                          */
 2737                         m->m_data = m->m_ext.ext_buf;
 2738                 } else {
 2739                         m_cljset(m, cl, fl->type);
 2740                 }
 2741                 m->m_len = len;
 2742         }               
 2743         switch(sopeop) {
 2744         case RSPQ_SOP_EOP:
 2745                 ret = 1;
 2746                 /* FALLTHROUGH */
 2747         case RSPQ_SOP:
 2748                 mh->mh_head = mh->mh_tail = m;
 2749                 m->m_pkthdr.len = len;
 2750                 break;
 2751         case RSPQ_EOP:
 2752                 ret = 1;
 2753                 /* FALLTHROUGH */
 2754         case RSPQ_NSOP_NEOP:
 2755                 if (mh->mh_tail == NULL) {
 2756                         log(LOG_ERR, "discarding intermediate descriptor entry\n");
 2757                         m_freem(m);
 2758                         break;
 2759                 }
 2760                 mh->mh_tail->m_next = m;
 2761                 mh->mh_tail = m;
 2762                 mh->mh_head->m_pkthdr.len += len;
 2763                 break;
 2764         }
 2765         if (cxgb_debug)
 2766                 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
 2767 done:
 2768         if (++fl->cidx == fl->size)
 2769                 fl->cidx = 0;
 2770 
 2771         return (ret);
 2772 }
 2773 
 2774 /**
 2775  *      handle_rsp_cntrl_info - handles control information in a response
 2776  *      @qs: the queue set corresponding to the response
 2777  *      @flags: the response control flags
 2778  *
 2779  *      Handles the control information of an SGE response, such as GTS
 2780  *      indications and completion credits for the queue set's Tx queues.
 2781  *      HW coalesces credits, we don't do any extra SW coalescing.
 2782  */
 2783 static __inline void
 2784 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
 2785 {
 2786         unsigned int credits;
 2787 
 2788 #if USE_GTS
 2789         if (flags & F_RSPD_TXQ0_GTS)
 2790                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
 2791 #endif
 2792         credits = G_RSPD_TXQ0_CR(flags);
 2793         if (credits) 
 2794                 qs->txq[TXQ_ETH].processed += credits;
 2795 
 2796         credits = G_RSPD_TXQ2_CR(flags);
 2797         if (credits)
 2798                 qs->txq[TXQ_CTRL].processed += credits;
 2799 
 2800 # if USE_GTS
 2801         if (flags & F_RSPD_TXQ1_GTS)
 2802                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
 2803 # endif
 2804         credits = G_RSPD_TXQ1_CR(flags);
 2805         if (credits)
 2806                 qs->txq[TXQ_OFLD].processed += credits;
 2807 
 2808 }
 2809 
 2810 static void
 2811 check_ring_db(adapter_t *adap, struct sge_qset *qs,
 2812     unsigned int sleeping)
 2813 {
 2814         ;
 2815 }
 2816 
 2817 /**
 2818  *      process_responses - process responses from an SGE response queue
 2819  *      @adap: the adapter
 2820  *      @qs: the queue set to which the response queue belongs
 2821  *      @budget: how many responses can be processed in this round
 2822  *
 2823  *      Process responses from an SGE response queue up to the supplied budget.
 2824  *      Responses include received packets as well as credits and other events
 2825  *      for the queues that belong to the response queue's queue set.
 2826  *      A negative budget is effectively unlimited.
 2827  *
 2828  *      Additionally choose the interrupt holdoff time for the next interrupt
 2829  *      on this queue.  If the system is under memory shortage use a fairly
 2830  *      long delay to help recovery.
 2831  */
 2832 static int
 2833 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
 2834 {
 2835         struct sge_rspq *rspq = &qs->rspq;
 2836         struct rsp_desc *r = &rspq->desc[rspq->cidx];
 2837         int budget_left = budget;
 2838         unsigned int sleeping = 0;
 2839 #if defined(INET6) || defined(INET)
 2840         int lro_enabled = qs->lro.enabled;
 2841         int skip_lro;
 2842         struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
 2843 #endif
 2844         struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
 2845 #ifdef DEBUG    
 2846         static int last_holdoff = 0;
 2847         if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
 2848                 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
 2849                 last_holdoff = rspq->holdoff_tmr;
 2850         }
 2851 #endif
 2852         rspq->next_holdoff = rspq->holdoff_tmr;
 2853 
 2854         while (__predict_true(budget_left && is_new_response(r, rspq))) {
 2855                 int eth, eop = 0, ethpad = 0;
 2856                 uint32_t flags = ntohl(r->flags);
 2857                 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
 2858                 uint8_t opcode = r->rss_hdr.opcode;
 2859                 
 2860                 eth = (opcode == CPL_RX_PKT);
 2861                 
 2862                 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
 2863                         struct mbuf *m;
 2864 
 2865                         if (cxgb_debug)
 2866                                 printf("async notification\n");
 2867 
 2868                         if (mh->mh_head == NULL) {
 2869                                 mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA);
 2870                                 m = mh->mh_head;
 2871                         } else {
 2872                                 m = m_gethdr(M_NOWAIT, MT_DATA);
 2873                         }
 2874                         if (m == NULL)
 2875                                 goto no_mem;
 2876 
 2877                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
 2878                         m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
 2879                         *mtod(m, char *) = CPL_ASYNC_NOTIF;
 2880                         opcode = CPL_ASYNC_NOTIF;
 2881                         eop = 1;
 2882                         rspq->async_notif++;
 2883                         goto skip;
 2884                 } else if  (flags & F_RSPD_IMM_DATA_VALID) {
 2885                         struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
 2886 
 2887                         if (m == NULL) {        
 2888                 no_mem:
 2889                                 rspq->next_holdoff = NOMEM_INTR_DELAY;
 2890                                 budget_left--;
 2891                                 break;
 2892                         }
 2893                         if (mh->mh_head == NULL)
 2894                                 mh->mh_head = m;
 2895                         else 
 2896                                 mh->mh_tail->m_next = m;
 2897                         mh->mh_tail = m;
 2898 
 2899                         get_imm_packet(adap, r, m);
 2900                         mh->mh_head->m_pkthdr.len += m->m_len;
 2901                         eop = 1;
 2902                         rspq->imm_data++;
 2903                 } else if (r->len_cq) {
 2904                         int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
 2905                         
 2906                         eop = get_packet(adap, drop_thresh, qs, mh, r);
 2907                         if (eop) {
 2908                                 if (r->rss_hdr.hash_type && !adap->timestamp) {
 2909                                         M_HASHTYPE_SET(mh->mh_head, M_HASHTYPE_OPAQUE);
 2910                                         mh->mh_head->m_pkthdr.flowid = rss_hash;
 2911                                 }
 2912                         }
 2913                         
 2914                         ethpad = 2;
 2915                 } else {
 2916                         rspq->pure_rsps++;
 2917                 }
 2918         skip:
 2919                 if (flags & RSPD_CTRL_MASK) {
 2920                         sleeping |= flags & RSPD_GTS_MASK;
 2921                         handle_rsp_cntrl_info(qs, flags);
 2922                 }
 2923 
 2924                 if (!eth && eop) {
 2925                         rspq->offload_pkts++;
 2926 #ifdef TCP_OFFLOAD
 2927                         adap->cpl_handler[opcode](qs, r, mh->mh_head);
 2928 #else
 2929                         m_freem(mh->mh_head);
 2930 #endif
 2931                         mh->mh_head = NULL;
 2932                 } else if (eth && eop) {
 2933                         struct mbuf *m = mh->mh_head;
 2934 
 2935                         t3_rx_eth(adap, m, ethpad);
 2936 
 2937                         /*
 2938                          * The T304 sends incoming packets on any qset.  If LRO
 2939                          * is also enabled, we could end up sending packet up
 2940                          * lro_ctrl->ifp's input.  That is incorrect.
 2941                          *
 2942                          * The mbuf's rcvif was derived from the cpl header and
 2943                          * is accurate.  Skip LRO and just use that.
 2944                          */
 2945 #if defined(INET6) || defined(INET)
 2946                         skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
 2947 
 2948                         if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
 2949                             && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
 2950                             ) {
 2951                                 /* successfully queue'd for LRO */
 2952                         } else
 2953 #endif
 2954                         {
 2955                                 /*
 2956                                  * LRO not enabled, packet unsuitable for LRO,
 2957                                  * or unable to queue.  Pass it up right now in
 2958                                  * either case.
 2959                                  */
 2960                                 struct ifnet *ifp = m->m_pkthdr.rcvif;
 2961                                 (*ifp->if_input)(ifp, m);
 2962                         }
 2963                         mh->mh_head = NULL;
 2964 
 2965                 }
 2966 
 2967                 r++;
 2968                 if (__predict_false(++rspq->cidx == rspq->size)) {
 2969                         rspq->cidx = 0;
 2970                         rspq->gen ^= 1;
 2971                         r = rspq->desc;
 2972                 }
 2973 
 2974                 if (++rspq->credits >= 64) {
 2975                         refill_rspq(adap, rspq, rspq->credits);
 2976                         rspq->credits = 0;
 2977                 }
 2978                 __refill_fl_lt(adap, &qs->fl[0], 32);
 2979                 __refill_fl_lt(adap, &qs->fl[1], 32);
 2980                 --budget_left;
 2981         }
 2982 
 2983 #if defined(INET6) || defined(INET)
 2984         /* Flush LRO */
 2985         while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
 2986                 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
 2987                 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
 2988                 tcp_lro_flush(lro_ctrl, queued);
 2989         }
 2990 #endif
 2991 
 2992         if (sleeping)
 2993                 check_ring_db(adap, qs, sleeping);
 2994 
 2995         mb();  /* commit Tx queue processed updates */
 2996         if (__predict_false(qs->txq_stopped > 1))
 2997                 restart_tx(qs);
 2998 
 2999         __refill_fl_lt(adap, &qs->fl[0], 512);
 3000         __refill_fl_lt(adap, &qs->fl[1], 512);
 3001         budget -= budget_left;
 3002         return (budget);
 3003 }
 3004 
 3005 /*
 3006  * A helper function that processes responses and issues GTS.
 3007  */
 3008 static __inline int
 3009 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
 3010 {
 3011         int work;
 3012         static int last_holdoff = 0;
 3013         
 3014         work = process_responses(adap, rspq_to_qset(rq), -1);
 3015 
 3016         if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
 3017                 printf("next_holdoff=%d\n", rq->next_holdoff);
 3018                 last_holdoff = rq->next_holdoff;
 3019         }
 3020         t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
 3021             V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
 3022         
 3023         return (work);
 3024 }
 3025 
 3026 
 3027 /*
 3028  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
 3029  * Handles data events from SGE response queues as well as error and other
 3030  * async events as they all use the same interrupt pin.  We use one SGE
 3031  * response queue per port in this mode and protect all response queues with
 3032  * queue 0's lock.
 3033  */
 3034 void
 3035 t3b_intr(void *data)
 3036 {
 3037         uint32_t i, map;
 3038         adapter_t *adap = data;
 3039         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3040         
 3041         t3_write_reg(adap, A_PL_CLI, 0);
 3042         map = t3_read_reg(adap, A_SG_DATA_INTR);
 3043 
 3044         if (!map) 
 3045                 return;
 3046 
 3047         if (__predict_false(map & F_ERRINTR)) {
 3048                 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
 3049                 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
 3050                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3051         }
 3052 
 3053         mtx_lock(&q0->lock);
 3054         for_each_port(adap, i)
 3055             if (map & (1 << i))
 3056                         process_responses_gts(adap, &adap->sge.qs[i].rspq);
 3057         mtx_unlock(&q0->lock);
 3058 }
 3059 
 3060 /*
 3061  * The MSI interrupt handler.  This needs to handle data events from SGE
 3062  * response queues as well as error and other async events as they all use
 3063  * the same MSI vector.  We use one SGE response queue per port in this mode
 3064  * and protect all response queues with queue 0's lock.
 3065  */
 3066 void
 3067 t3_intr_msi(void *data)
 3068 {
 3069         adapter_t *adap = data;
 3070         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3071         int i, new_packets = 0;
 3072 
 3073         mtx_lock(&q0->lock);
 3074 
 3075         for_each_port(adap, i)
 3076             if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 
 3077                     new_packets = 1;
 3078         mtx_unlock(&q0->lock);
 3079         if (new_packets == 0) {
 3080                 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
 3081                 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
 3082                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3083         }
 3084 }
 3085 
 3086 void
 3087 t3_intr_msix(void *data)
 3088 {
 3089         struct sge_qset *qs = data;
 3090         adapter_t *adap = qs->port->adapter;
 3091         struct sge_rspq *rspq = &qs->rspq;
 3092 
 3093         if (process_responses_gts(adap, rspq) == 0)
 3094                 rspq->unhandled_irqs++;
 3095 }
 3096 
 3097 #define QDUMP_SBUF_SIZE         32 * 400
 3098 static int
 3099 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
 3100 {
 3101         struct sge_rspq *rspq;
 3102         struct sge_qset *qs;
 3103         int i, err, dump_end, idx;
 3104         struct sbuf *sb;
 3105         struct rsp_desc *rspd;
 3106         uint32_t data[4];
 3107         
 3108         rspq = arg1;
 3109         qs = rspq_to_qset(rspq);
 3110         if (rspq->rspq_dump_count == 0) 
 3111                 return (0);
 3112         if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
 3113                 log(LOG_WARNING,
 3114                     "dump count is too large %d\n", rspq->rspq_dump_count);
 3115                 rspq->rspq_dump_count = 0;
 3116                 return (EINVAL);
 3117         }
 3118         if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
 3119                 log(LOG_WARNING,
 3120                     "dump start of %d is greater than queue size\n",
 3121                     rspq->rspq_dump_start);
 3122                 rspq->rspq_dump_start = 0;
 3123                 return (EINVAL);
 3124         }
 3125         err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
 3126         if (err)
 3127                 return (err);
 3128         err = sysctl_wire_old_buffer(req, 0);
 3129         if (err)
 3130                 return (err);
 3131         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3132 
 3133         sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
 3134             (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
 3135             ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
 3136         sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
 3137             ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
 3138         
 3139         sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
 3140             (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
 3141         
 3142         dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
 3143         for (i = rspq->rspq_dump_start; i < dump_end; i++) {
 3144                 idx = i & (RSPQ_Q_SIZE-1);
 3145                 
 3146                 rspd = &rspq->desc[idx];
 3147                 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
 3148                     idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
 3149                     rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
 3150                 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
 3151                     rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
 3152                     be32toh(rspd->len_cq), rspd->intr_gen);
 3153         }
 3154 
 3155         err = sbuf_finish(sb);
 3156         /* Output a trailing NUL. */
 3157         if (err == 0)
 3158                 err = SYSCTL_OUT(req, "", 1);
 3159         sbuf_delete(sb);
 3160         return (err);
 3161 }       
 3162 
 3163 static int
 3164 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
 3165 {
 3166         struct sge_txq *txq;
 3167         struct sge_qset *qs;
 3168         int i, j, err, dump_end;
 3169         struct sbuf *sb;
 3170         struct tx_desc *txd;
 3171         uint32_t *WR, wr_hi, wr_lo, gen;
 3172         uint32_t data[4];
 3173         
 3174         txq = arg1;
 3175         qs = txq_to_qset(txq, TXQ_ETH);
 3176         if (txq->txq_dump_count == 0) {
 3177                 return (0);
 3178         }
 3179         if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
 3180                 log(LOG_WARNING,
 3181                     "dump count is too large %d\n", txq->txq_dump_count);
 3182                 txq->txq_dump_count = 1;
 3183                 return (EINVAL);
 3184         }
 3185         if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
 3186                 log(LOG_WARNING,
 3187                     "dump start of %d is greater than queue size\n",
 3188                     txq->txq_dump_start);
 3189                 txq->txq_dump_start = 0;
 3190                 return (EINVAL);
 3191         }
 3192         err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
 3193         if (err)
 3194                 return (err);
 3195         err = sysctl_wire_old_buffer(req, 0);
 3196         if (err)
 3197                 return (err);
 3198         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3199 
 3200         sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
 3201             (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 
 3202             (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
 3203         sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
 3204             ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
 3205             ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
 3206         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3207             txq->txq_dump_start,
 3208             (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
 3209 
 3210         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3211         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3212                 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
 3213                 WR = (uint32_t *)txd->flit;
 3214                 wr_hi = ntohl(WR[0]);
 3215                 wr_lo = ntohl(WR[1]);           
 3216                 gen = G_WR_GEN(wr_lo);
 3217                 
 3218                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3219                     wr_hi, wr_lo, gen);
 3220                 for (j = 2; j < 30; j += 4) 
 3221                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3222                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3223 
 3224         }
 3225         err = sbuf_finish(sb);
 3226         /* Output a trailing NUL. */
 3227         if (err == 0)
 3228                 err = SYSCTL_OUT(req, "", 1);
 3229         sbuf_delete(sb);
 3230         return (err);
 3231 }
 3232 
 3233 static int
 3234 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
 3235 {
 3236         struct sge_txq *txq;
 3237         struct sge_qset *qs;
 3238         int i, j, err, dump_end;
 3239         struct sbuf *sb;
 3240         struct tx_desc *txd;
 3241         uint32_t *WR, wr_hi, wr_lo, gen;
 3242         
 3243         txq = arg1;
 3244         qs = txq_to_qset(txq, TXQ_CTRL);
 3245         if (txq->txq_dump_count == 0) {
 3246                 return (0);
 3247         }
 3248         if (txq->txq_dump_count > 256) {
 3249                 log(LOG_WARNING,
 3250                     "dump count is too large %d\n", txq->txq_dump_count);
 3251                 txq->txq_dump_count = 1;
 3252                 return (EINVAL);
 3253         }
 3254         if (txq->txq_dump_start > 255) {
 3255                 log(LOG_WARNING,
 3256                     "dump start of %d is greater than queue size\n",
 3257                     txq->txq_dump_start);
 3258                 txq->txq_dump_start = 0;
 3259                 return (EINVAL);
 3260         }
 3261 
 3262         err = sysctl_wire_old_buffer(req, 0);
 3263         if (err != 0)
 3264                 return (err);
 3265         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3266         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3267             txq->txq_dump_start,
 3268             (txq->txq_dump_start + txq->txq_dump_count) & 255);
 3269 
 3270         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3271         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3272                 txd = &txq->desc[i & (255)];
 3273                 WR = (uint32_t *)txd->flit;
 3274                 wr_hi = ntohl(WR[0]);
 3275                 wr_lo = ntohl(WR[1]);           
 3276                 gen = G_WR_GEN(wr_lo);
 3277                 
 3278                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3279                     wr_hi, wr_lo, gen);
 3280                 for (j = 2; j < 30; j += 4) 
 3281                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3282                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3283 
 3284         }
 3285         err = sbuf_finish(sb);
 3286         /* Output a trailing NUL. */
 3287         if (err == 0)
 3288                 err = SYSCTL_OUT(req, "", 1);
 3289         sbuf_delete(sb);
 3290         return (err);
 3291 }
 3292 
 3293 static int
 3294 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
 3295 {
 3296         adapter_t *sc = arg1;
 3297         struct qset_params *qsp = &sc->params.sge.qset[0]; 
 3298         int coalesce_usecs;     
 3299         struct sge_qset *qs;
 3300         int i, j, err, nqsets = 0;
 3301         struct mtx *lock;
 3302 
 3303         if ((sc->flags & FULL_INIT_DONE) == 0)
 3304                 return (ENXIO);
 3305                 
 3306         coalesce_usecs = qsp->coalesce_usecs;
 3307         err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
 3308 
 3309         if (err != 0) {
 3310                 return (err);
 3311         }
 3312         if (coalesce_usecs == qsp->coalesce_usecs)
 3313                 return (0);
 3314 
 3315         for (i = 0; i < sc->params.nports; i++) 
 3316                 for (j = 0; j < sc->port[i].nqsets; j++)
 3317                         nqsets++;
 3318 
 3319         coalesce_usecs = max(1, coalesce_usecs);
 3320 
 3321         for (i = 0; i < nqsets; i++) {
 3322                 qs = &sc->sge.qs[i];
 3323                 qsp = &sc->params.sge.qset[i];
 3324                 qsp->coalesce_usecs = coalesce_usecs;
 3325                 
 3326                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 3327                             &sc->sge.qs[0].rspq.lock;
 3328 
 3329                 mtx_lock(lock);
 3330                 t3_update_qset_coalesce(qs, qsp);
 3331                 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
 3332                     V_NEWTIMER(qs->rspq.holdoff_tmr));
 3333                 mtx_unlock(lock);
 3334         }
 3335 
 3336         return (0);
 3337 }
 3338 
 3339 static int
 3340 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)
 3341 {
 3342         adapter_t *sc = arg1;
 3343         int rc, timestamp;
 3344 
 3345         if ((sc->flags & FULL_INIT_DONE) == 0)
 3346                 return (ENXIO);
 3347 
 3348         timestamp = sc->timestamp;
 3349         rc = sysctl_handle_int(oidp, &timestamp, arg2, req);
 3350 
 3351         if (rc != 0)
 3352                 return (rc);
 3353 
 3354         if (timestamp != sc->timestamp) {
 3355                 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS,
 3356                     timestamp ? F_ENABLERXPKTTMSTPRSS : 0);
 3357                 sc->timestamp = timestamp;
 3358         }
 3359 
 3360         return (0);
 3361 }
 3362 
 3363 void
 3364 t3_add_attach_sysctls(adapter_t *sc)
 3365 {
 3366         struct sysctl_ctx_list *ctx;
 3367         struct sysctl_oid_list *children;
 3368 
 3369         ctx = device_get_sysctl_ctx(sc->dev);
 3370         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3371 
 3372         /* random information */
 3373         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3374             "firmware_version",
 3375             CTLFLAG_RD, sc->fw_version,
 3376             0, "firmware version");
 3377         SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
 3378             "hw_revision",
 3379             CTLFLAG_RD, &sc->params.rev,
 3380             0, "chip model");
 3381         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3382             "port_types",
 3383             CTLFLAG_RD, sc->port_types,
 3384             0, "type of ports");
 3385         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3386             "enable_debug",
 3387             CTLFLAG_RW, &cxgb_debug,
 3388             0, "enable verbose debugging output");
 3389         SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce",
 3390             CTLFLAG_RD, &sc->tunq_coalesce,
 3391             "#tunneled packets freed");
 3392         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3393             "txq_overrun",
 3394             CTLFLAG_RD, &txq_fills,
 3395             0, "#times txq overrun");
 3396         SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
 3397             "core_clock",
 3398             CTLFLAG_RD, &sc->params.vpd.cclk,
 3399             0, "core clock frequency (in KHz)");
 3400 }
 3401 
 3402 
 3403 static const char *rspq_name = "rspq";
 3404 static const char *txq_names[] =
 3405 {
 3406         "txq_eth",
 3407         "txq_ofld",
 3408         "txq_ctrl"      
 3409 };
 3410 
 3411 static int
 3412 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
 3413 {
 3414         struct port_info *p = arg1;
 3415         uint64_t *parg;
 3416 
 3417         if (!p)
 3418                 return (EINVAL);
 3419 
 3420         parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
 3421         PORT_LOCK(p);
 3422         t3_mac_update_stats(&p->mac);
 3423         PORT_UNLOCK(p);
 3424 
 3425         return (sysctl_handle_64(oidp, parg, 0, req));
 3426 }
 3427 
 3428 void
 3429 t3_add_configured_sysctls(adapter_t *sc)
 3430 {
 3431         struct sysctl_ctx_list *ctx;
 3432         struct sysctl_oid_list *children;
 3433         int i, j;
 3434         
 3435         ctx = device_get_sysctl_ctx(sc->dev);
 3436         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3437 
 3438         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3439             "intr_coal",
 3440             CTLTYPE_INT|CTLFLAG_RW, sc,
 3441             0, t3_set_coalesce_usecs,
 3442             "I", "interrupt coalescing timer (us)");
 3443 
 3444         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3445             "pkt_timestamp",
 3446             CTLTYPE_INT | CTLFLAG_RW, sc,
 3447             0, t3_pkt_timestamp,
 3448             "I", "provide packet timestamp instead of connection hash");
 3449 
 3450         for (i = 0; i < sc->params.nports; i++) {
 3451                 struct port_info *pi = &sc->port[i];
 3452                 struct sysctl_oid *poid;
 3453                 struct sysctl_oid_list *poidlist;
 3454                 struct mac_stats *mstats = &pi->mac.stats;
 3455                 
 3456                 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
 3457                 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 
 3458                     pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
 3459                 poidlist = SYSCTL_CHILDREN(poid);
 3460                 SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO,
 3461                     "nqsets", CTLFLAG_RD, &pi->nqsets,
 3462                     0, "#queue sets");
 3463 
 3464                 for (j = 0; j < pi->nqsets; j++) {
 3465                         struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
 3466                         struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
 3467                                           *ctrlqpoid, *lropoid;
 3468                         struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
 3469                                                *txqpoidlist, *ctrlqpoidlist,
 3470                                                *lropoidlist;
 3471                         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 3472                         
 3473                         snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
 3474                         
 3475                         qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 
 3476                             qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
 3477                         qspoidlist = SYSCTL_CHILDREN(qspoid);
 3478 
 3479                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
 3480                                         CTLFLAG_RD, &qs->fl[0].empty, 0,
 3481                                         "freelist #0 empty");
 3482                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
 3483                                         CTLFLAG_RD, &qs->fl[1].empty, 0,
 3484                                         "freelist #1 empty");
 3485 
 3486                         rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3487                             rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
 3488                         rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
 3489 
 3490                         txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3491                             txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
 3492                         txqpoidlist = SYSCTL_CHILDREN(txqpoid);
 3493 
 3494                         ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3495                             txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
 3496                         ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
 3497 
 3498                         lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3499                             "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
 3500                         lropoidlist = SYSCTL_CHILDREN(lropoid);
 3501 
 3502                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
 3503                             CTLFLAG_RD, &qs->rspq.size,
 3504                             0, "#entries in response queue");
 3505                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
 3506                             CTLFLAG_RD, &qs->rspq.cidx,
 3507                             0, "consumer index");
 3508                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
 3509                             CTLFLAG_RD, &qs->rspq.credits,
 3510                             0, "#credits");
 3511                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
 3512                             CTLFLAG_RD, &qs->rspq.starved,
 3513                             0, "#times starved");
 3514                         SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
 3515                             CTLFLAG_RD, &qs->rspq.phys_addr,
 3516                             "physical_address_of the queue");
 3517                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
 3518                             CTLFLAG_RW, &qs->rspq.rspq_dump_start,
 3519                             0, "start rspq dump entry");
 3520                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
 3521                             CTLFLAG_RW, &qs->rspq.rspq_dump_count,
 3522                             0, "#rspq entries to dump");
 3523                         SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
 3524                             CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
 3525                             0, t3_dump_rspq, "A", "dump of the response queue");
 3526 
 3527                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
 3528                             CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
 3529                             "#tunneled packets dropped");
 3530                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
 3531                             CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
 3532                             0, "#tunneled packets waiting to be sent");
 3533 #if 0                   
 3534                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
 3535                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
 3536                             0, "#tunneled packets queue producer index");
 3537                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
 3538                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
 3539                             0, "#tunneled packets queue consumer index");
 3540 #endif                  
 3541                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed",
 3542                             CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
 3543                             0, "#tunneled packets processed by the card");
 3544                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
 3545                             CTLFLAG_RD, &txq->cleaned,
 3546                             0, "#tunneled packets cleaned");
 3547                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
 3548                             CTLFLAG_RD, &txq->in_use,
 3549                             0, "#tunneled packet slots in use");
 3550                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees",
 3551                             CTLFLAG_RD, &txq->txq_frees,
 3552                             "#tunneled packets freed");
 3553                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
 3554                             CTLFLAG_RD, &txq->txq_skipped,
 3555                             0, "#tunneled packet descriptors skipped");
 3556                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
 3557                             CTLFLAG_RD, &txq->txq_coalesced,
 3558                             "#tunneled packets coalesced");
 3559                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
 3560                             CTLFLAG_RD, &txq->txq_enqueued,
 3561                             0, "#tunneled packets enqueued to hardware");
 3562                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
 3563                             CTLFLAG_RD, &qs->txq_stopped,
 3564                             0, "tx queues stopped");
 3565                         SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr",
 3566                             CTLFLAG_RD, &txq->phys_addr,
 3567                             "physical_address_of the queue");
 3568                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
 3569                             CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
 3570                             0, "txq generation");
 3571                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
 3572                             CTLFLAG_RD, &txq->cidx,
 3573                             0, "hardware queue cidx");                  
 3574                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
 3575                             CTLFLAG_RD, &txq->pidx,
 3576                             0, "hardware queue pidx");
 3577                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
 3578                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
 3579                             0, "txq start idx for dump");
 3580                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
 3581                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
 3582                             0, "txq #entries to dump");                 
 3583                         SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
 3584                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
 3585                             0, t3_dump_txq_eth, "A", "dump of the transmit queue");
 3586 
 3587                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
 3588                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
 3589                             0, "ctrlq start idx for dump");
 3590                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
 3591                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
 3592                             0, "ctrl #entries to dump");                        
 3593                         SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
 3594                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
 3595                             0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
 3596 
 3597                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
 3598                             CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
 3599                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
 3600                             CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
 3601                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
 3602                             CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
 3603                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
 3604                             CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
 3605                 }
 3606 
 3607                 /* Now add a node for mac stats. */
 3608                 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
 3609                     CTLFLAG_RD, NULL, "MAC statistics");
 3610                 poidlist = SYSCTL_CHILDREN(poid);
 3611 
 3612                 /*
 3613                  * We (ab)use the length argument (arg2) to pass on the offset
 3614                  * of the data that we are interested in.  This is only required
 3615                  * for the quad counters that are updated from the hardware (we
 3616                  * make sure that we return the latest value).
 3617                  * sysctl_handle_macstat first updates *all* the counters from
 3618                  * the hardware, and then returns the latest value of the
 3619                  * requested counter.  Best would be to update only the
 3620                  * requested counter from hardware, but t3_mac_update_stats()
 3621                  * hides all the register details and we don't want to dive into
 3622                  * all that here.
 3623                  */
 3624 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
 3625     (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
 3626     sysctl_handle_macstat, "QU", 0)
 3627                 CXGB_SYSCTL_ADD_QUAD(tx_octets);
 3628                 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
 3629                 CXGB_SYSCTL_ADD_QUAD(tx_frames);
 3630                 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
 3631                 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
 3632                 CXGB_SYSCTL_ADD_QUAD(tx_pause);
 3633                 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
 3634                 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
 3635                 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
 3636                 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
 3637                 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
 3638                 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
 3639                 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
 3640                 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
 3641                 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
 3642                 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
 3643                 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
 3644                 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
 3645                 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
 3646                 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
 3647                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
 3648                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
 3649                 CXGB_SYSCTL_ADD_QUAD(rx_octets);
 3650                 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
 3651                 CXGB_SYSCTL_ADD_QUAD(rx_frames);
 3652                 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
 3653                 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
 3654                 CXGB_SYSCTL_ADD_QUAD(rx_pause);
 3655                 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
 3656                 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
 3657                 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
 3658                 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
 3659                 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
 3660                 CXGB_SYSCTL_ADD_QUAD(rx_runt);
 3661                 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
 3662                 CXGB_SYSCTL_ADD_QUAD(rx_short);
 3663                 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
 3664                 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
 3665                 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
 3666                 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
 3667                 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
 3668                 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
 3669                 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
 3670                 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
 3671                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
 3672                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
 3673 #undef CXGB_SYSCTL_ADD_QUAD
 3674 
 3675 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
 3676     CTLFLAG_RD, &mstats->a, 0)
 3677                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
 3678                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
 3679                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
 3680                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
 3681                 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
 3682                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
 3683                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
 3684                 CXGB_SYSCTL_ADD_ULONG(num_toggled);
 3685                 CXGB_SYSCTL_ADD_ULONG(num_resets);
 3686                 CXGB_SYSCTL_ADD_ULONG(link_faults);
 3687 #undef CXGB_SYSCTL_ADD_ULONG
 3688         }
 3689 }
 3690         
 3691 /**
 3692  *      t3_get_desc - dump an SGE descriptor for debugging purposes
 3693  *      @qs: the queue set
 3694  *      @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
 3695  *      @idx: the descriptor index in the queue
 3696  *      @data: where to dump the descriptor contents
 3697  *
 3698  *      Dumps the contents of a HW descriptor of an SGE queue.  Returns the
 3699  *      size of the descriptor.
 3700  */
 3701 int
 3702 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
 3703                 unsigned char *data)
 3704 {
 3705         if (qnum >= 6)
 3706                 return (EINVAL);
 3707 
 3708         if (qnum < 3) {
 3709                 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
 3710                         return -EINVAL;
 3711                 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
 3712                 return sizeof(struct tx_desc);
 3713         }
 3714 
 3715         if (qnum == 3) {
 3716                 if (!qs->rspq.desc || idx >= qs->rspq.size)
 3717                         return (EINVAL);
 3718                 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
 3719                 return sizeof(struct rsp_desc);
 3720         }
 3721 
 3722         qnum -= 4;
 3723         if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
 3724                 return (EINVAL);
 3725         memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
 3726         return sizeof(struct rx_desc);
 3727 }

Cache object: a73576e5182346ccf7cfb28f178500ee


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.