The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/cxgb/cxgb_sge.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /**************************************************************************
    2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3 
    4 Copyright (c) 2007-2009, Chelsio Inc.
    5 All rights reserved.
    6 
    7 Redistribution and use in source and binary forms, with or without
    8 modification, are permitted provided that the following conditions are met:
    9 
   10  1. Redistributions of source code must retain the above copyright notice,
   11     this list of conditions and the following disclaimer.
   12 
   13  2. Neither the name of the Chelsio Corporation nor the names of its
   14     contributors may be used to endorse or promote products derived from
   15     this software without specific prior written permission.
   16  
   17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   27 POSSIBILITY OF SUCH DAMAGE.
   28 
   29 ***************************************************************************/
   30 
   31 #include <sys/cdefs.h>
   32 __FBSDID("$FreeBSD$");
   33 
   34 #include "opt_inet6.h"
   35 #include "opt_inet.h"
   36 
   37 #include <sys/param.h>
   38 #include <sys/systm.h>
   39 #include <sys/kernel.h>
   40 #include <sys/module.h>
   41 #include <sys/bus.h>
   42 #include <sys/conf.h>
   43 #include <machine/bus.h>
   44 #include <machine/resource.h>
   45 #include <sys/rman.h>
   46 #include <sys/queue.h>
   47 #include <sys/sysctl.h>
   48 #include <sys/taskqueue.h>
   49 
   50 #include <sys/proc.h>
   51 #include <sys/sbuf.h>
   52 #include <sys/sched.h>
   53 #include <sys/smp.h>
   54 #include <sys/systm.h>
   55 #include <sys/syslog.h>
   56 #include <sys/socket.h>
   57 #include <sys/sglist.h>
   58 
   59 #include <net/if.h>
   60 #include <net/if_var.h>
   61 #include <net/bpf.h>    
   62 #include <net/ethernet.h>
   63 #include <net/if_vlan_var.h>
   64 
   65 #include <netinet/in_systm.h>
   66 #include <netinet/in.h>
   67 #include <netinet/ip.h>
   68 #include <netinet/ip6.h>
   69 #include <netinet/tcp.h>
   70 
   71 #include <dev/pci/pcireg.h>
   72 #include <dev/pci/pcivar.h>
   73 
   74 #include <vm/vm.h>
   75 #include <vm/pmap.h>
   76 
   77 #include <cxgb_include.h>
   78 #include <sys/mvec.h>
   79 
   80 int     txq_fills = 0;
   81 int     multiq_tx_enable = 1;
   82 
   83 #ifdef TCP_OFFLOAD
   84 CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS);
   85 #endif
   86 
   87 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
   88 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
   89 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
   90     "size of per-queue mbuf ring");
   91 
   92 static int cxgb_tx_coalesce_force = 0;
   93 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RWTUN,
   94     &cxgb_tx_coalesce_force, 0,
   95     "coalesce small packets into a single work request regardless of ring state");
   96 
   97 #define COALESCE_START_DEFAULT          TX_ETH_Q_SIZE>>1
   98 #define COALESCE_START_MAX              (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
   99 #define COALESCE_STOP_DEFAULT           TX_ETH_Q_SIZE>>2
  100 #define COALESCE_STOP_MIN               TX_ETH_Q_SIZE>>5
  101 #define TX_RECLAIM_DEFAULT              TX_ETH_Q_SIZE>>5
  102 #define TX_RECLAIM_MAX                  TX_ETH_Q_SIZE>>2
  103 #define TX_RECLAIM_MIN                  TX_ETH_Q_SIZE>>6
  104 
  105 
  106 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
  107 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RWTUN,
  108     &cxgb_tx_coalesce_enable_start, 0,
  109     "coalesce enable threshold");
  110 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
  111 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RWTUN,
  112     &cxgb_tx_coalesce_enable_stop, 0,
  113     "coalesce disable threshold");
  114 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  115 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RWTUN,
  116     &cxgb_tx_reclaim_threshold, 0,
  117     "tx cleaning minimum threshold");
  118 
  119 /*
  120  * XXX don't re-enable this until TOE stops assuming
  121  * we have an m_ext
  122  */
  123 static int recycle_enable = 0;
  124 
  125 extern int cxgb_use_16k_clusters;
  126 extern int nmbjumbop;
  127 extern int nmbjumbo9;
  128 extern int nmbjumbo16;
  129 
  130 #define USE_GTS 0
  131 
  132 #define SGE_RX_SM_BUF_SIZE      1536
  133 #define SGE_RX_DROP_THRES       16
  134 #define SGE_RX_COPY_THRES       128
  135 
  136 /*
  137  * Period of the Tx buffer reclaim timer.  This timer does not need to run
  138  * frequently as Tx buffers are usually reclaimed by new Tx packets.
  139  */
  140 #define TX_RECLAIM_PERIOD       (hz >> 1)
  141 
  142 /* 
  143  * Values for sge_txq.flags
  144  */
  145 enum {
  146         TXQ_RUNNING     = 1 << 0,  /* fetch engine is running */
  147         TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
  148 };
  149 
  150 struct tx_desc {
  151         uint64_t        flit[TX_DESC_FLITS];
  152 } __packed;
  153 
  154 struct rx_desc {
  155         uint32_t        addr_lo;
  156         uint32_t        len_gen;
  157         uint32_t        gen2;
  158         uint32_t        addr_hi;
  159 } __packed;
  160 
  161 struct rsp_desc {               /* response queue descriptor */
  162         struct rss_header       rss_hdr;
  163         uint32_t                flags;
  164         uint32_t                len_cq;
  165         uint8_t                 imm_data[47];
  166         uint8_t                 intr_gen;
  167 } __packed;
  168 
  169 #define RX_SW_DESC_MAP_CREATED  (1 << 0)
  170 #define TX_SW_DESC_MAP_CREATED  (1 << 1)
  171 #define RX_SW_DESC_INUSE        (1 << 3)
  172 #define TX_SW_DESC_MAPPED       (1 << 4)
  173 
  174 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
  175 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
  176 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
  177 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
  178 
  179 struct tx_sw_desc {                /* SW state per Tx descriptor */
  180         struct mbuf     *m;
  181         bus_dmamap_t    map;
  182         int             flags;
  183 };
  184 
  185 struct rx_sw_desc {                /* SW state per Rx descriptor */
  186         caddr_t         rxsd_cl;
  187         struct mbuf     *m;
  188         bus_dmamap_t    map;
  189         int             flags;
  190 };
  191 
  192 struct txq_state {
  193         unsigned int    compl;
  194         unsigned int    gen;
  195         unsigned int    pidx;
  196 };
  197 
  198 struct refill_fl_cb_arg {
  199         int               error;
  200         bus_dma_segment_t seg;
  201         int               nseg;
  202 };
  203 
  204 
  205 /*
  206  * Maps a number of flits to the number of Tx descriptors that can hold them.
  207  * The formula is
  208  *
  209  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
  210  *
  211  * HW allows up to 4 descriptors to be combined into a WR.
  212  */
  213 static uint8_t flit_desc_map[] = {
  214         0,
  215 #if SGE_NUM_GENBITS == 1
  216         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  217         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  218         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  219         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
  220 #elif SGE_NUM_GENBITS == 2
  221         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  222         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  223         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  224         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  225 #else
  226 # error "SGE_NUM_GENBITS must be 1 or 2"
  227 #endif
  228 };
  229 
  230 #define TXQ_LOCK_ASSERT(qs)     mtx_assert(&(qs)->lock, MA_OWNED)
  231 #define TXQ_TRYLOCK(qs)         mtx_trylock(&(qs)->lock)        
  232 #define TXQ_LOCK(qs)            mtx_lock(&(qs)->lock)   
  233 #define TXQ_UNLOCK(qs)          mtx_unlock(&(qs)->lock) 
  234 #define TXQ_RING_EMPTY(qs)      drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  235 #define TXQ_RING_NEEDS_ENQUEUE(qs)                                      \
  236         drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  237 #define TXQ_RING_FLUSH(qs)      drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  238 #define TXQ_RING_DEQUEUE_COND(qs, func, arg)                            \
  239         drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
  240 #define TXQ_RING_DEQUEUE(qs) \
  241         drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  242 
  243 int cxgb_debug = 0;
  244 
  245 static void sge_timer_cb(void *arg);
  246 static void sge_timer_reclaim(void *arg, int ncount);
  247 static void sge_txq_reclaim_handler(void *arg, int ncount);
  248 static void cxgb_start_locked(struct sge_qset *qs);
  249 
  250 /*
  251  * XXX need to cope with bursty scheduling by looking at a wider
  252  * window than we are now for determining the need for coalescing
  253  *
  254  */
  255 static __inline uint64_t
  256 check_pkt_coalesce(struct sge_qset *qs) 
  257 { 
  258         struct adapter *sc; 
  259         struct sge_txq *txq; 
  260         uint8_t *fill;
  261 
  262         if (__predict_false(cxgb_tx_coalesce_force))
  263                 return (1);
  264         txq = &qs->txq[TXQ_ETH]; 
  265         sc = qs->port->adapter; 
  266         fill = &sc->tunq_fill[qs->idx];
  267 
  268         if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
  269                 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
  270         if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
  271                 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
  272         /*
  273          * if the hardware transmit queue is more than 1/8 full
  274          * we mark it as coalescing - we drop back from coalescing
  275          * when we go below 1/32 full and there are no packets enqueued, 
  276          * this provides us with some degree of hysteresis
  277          */
  278         if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
  279             TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
  280                 *fill = 0; 
  281         else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
  282                 *fill = 1; 
  283 
  284         return (sc->tunq_coalesce);
  285 } 
  286 
  287 #ifdef __LP64__
  288 static void
  289 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  290 {
  291         uint64_t wr_hilo;
  292 #if _BYTE_ORDER == _LITTLE_ENDIAN
  293         wr_hilo = wr_hi;
  294         wr_hilo |= (((uint64_t)wr_lo)<<32);
  295 #else
  296         wr_hilo = wr_lo;
  297         wr_hilo |= (((uint64_t)wr_hi)<<32);
  298 #endif  
  299         wrp->wrh_hilo = wr_hilo;
  300 }
  301 #else
  302 static void
  303 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  304 {
  305 
  306         wrp->wrh_hi = wr_hi;
  307         wmb();
  308         wrp->wrh_lo = wr_lo;
  309 }
  310 #endif
  311 
  312 struct coalesce_info {
  313         int count;
  314         int nbytes;
  315         int noncoal;
  316 };
  317 
  318 static int
  319 coalesce_check(struct mbuf *m, void *arg)
  320 {
  321         struct coalesce_info *ci = arg;
  322 
  323         if ((m->m_next != NULL) ||
  324             ((mtod(m, vm_offset_t) & PAGE_MASK) + m->m_len > PAGE_SIZE))
  325                 ci->noncoal = 1;
  326 
  327         if ((ci->count == 0) || (ci->noncoal == 0 && (ci->count < 7) &&
  328             (ci->nbytes + m->m_len <= 10500))) {
  329                 ci->count++;
  330                 ci->nbytes += m->m_len;
  331                 return (1);
  332         }
  333         return (0);
  334 }
  335 
  336 static struct mbuf *
  337 cxgb_dequeue(struct sge_qset *qs)
  338 {
  339         struct mbuf *m, *m_head, *m_tail;
  340         struct coalesce_info ci;
  341 
  342         
  343         if (check_pkt_coalesce(qs) == 0) 
  344                 return TXQ_RING_DEQUEUE(qs);
  345 
  346         m_head = m_tail = NULL;
  347         ci.count = ci.nbytes = ci.noncoal = 0;
  348         do {
  349                 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
  350                 if (m_head == NULL) {
  351                         m_tail = m_head = m;
  352                 } else if (m != NULL) {
  353                         m_tail->m_nextpkt = m;
  354                         m_tail = m;
  355                 }
  356         } while (m != NULL);
  357         if (ci.count > 7)
  358                 panic("trying to coalesce %d packets in to one WR", ci.count);
  359         return (m_head);
  360 }
  361         
  362 /**
  363  *      reclaim_completed_tx - reclaims completed Tx descriptors
  364  *      @adapter: the adapter
  365  *      @q: the Tx queue to reclaim completed descriptors from
  366  *
  367  *      Reclaims Tx descriptors that the SGE has indicated it has processed,
  368  *      and frees the associated buffers if possible.  Called with the Tx
  369  *      queue's lock held.
  370  */
  371 static __inline int
  372 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
  373 {
  374         struct sge_txq *q = &qs->txq[queue];
  375         int reclaim = desc_reclaimable(q);
  376 
  377         if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
  378             (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
  379                 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  380 
  381         if (reclaim < reclaim_min)
  382                 return (0);
  383 
  384         mtx_assert(&qs->lock, MA_OWNED);
  385         if (reclaim > 0) {
  386                 t3_free_tx_desc(qs, reclaim, queue);
  387                 q->cleaned += reclaim;
  388                 q->in_use -= reclaim;
  389         }
  390         if (isset(&qs->txq_stopped, TXQ_ETH))
  391                 clrbit(&qs->txq_stopped, TXQ_ETH);
  392 
  393         return (reclaim);
  394 }
  395 
  396 #ifdef NETDUMP
  397 int
  398 cxgb_netdump_poll_tx(struct sge_qset *qs)
  399 {
  400 
  401         return (reclaim_completed_tx(qs, TX_RECLAIM_MAX, TXQ_ETH));
  402 }
  403 #endif
  404 
  405 /**
  406  *      should_restart_tx - are there enough resources to restart a Tx queue?
  407  *      @q: the Tx queue
  408  *
  409  *      Checks if there are enough descriptors to restart a suspended Tx queue.
  410  */
  411 static __inline int
  412 should_restart_tx(const struct sge_txq *q)
  413 {
  414         unsigned int r = q->processed - q->cleaned;
  415 
  416         return q->in_use - r < (q->size >> 1);
  417 }
  418 
  419 /**
  420  *      t3_sge_init - initialize SGE
  421  *      @adap: the adapter
  422  *      @p: the SGE parameters
  423  *
  424  *      Performs SGE initialization needed every time after a chip reset.
  425  *      We do not initialize any of the queue sets here, instead the driver
  426  *      top-level must request those individually.  We also do not enable DMA
  427  *      here, that should be done after the queues have been set up.
  428  */
  429 void
  430 t3_sge_init(adapter_t *adap, struct sge_params *p)
  431 {
  432         u_int ctrl, ups;
  433 
  434         ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
  435 
  436         ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
  437                F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
  438                V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
  439                V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
  440 #if SGE_NUM_GENBITS == 1
  441         ctrl |= F_EGRGENCTRL;
  442 #endif
  443         if (adap->params.rev > 0) {
  444                 if (!(adap->flags & (USING_MSIX | USING_MSI)))
  445                         ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
  446         }
  447         t3_write_reg(adap, A_SG_CONTROL, ctrl);
  448         t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
  449                      V_LORCQDRBTHRSH(512));
  450         t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
  451         t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
  452                      V_TIMEOUT(200 * core_ticks_per_usec(adap)));
  453         t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
  454                      adap->params.rev < T3_REV_C ? 1000 : 500);
  455         t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
  456         t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
  457         t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
  458         t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
  459         t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
  460 }
  461 
  462 
  463 /**
  464  *      sgl_len - calculates the size of an SGL of the given capacity
  465  *      @n: the number of SGL entries
  466  *
  467  *      Calculates the number of flits needed for a scatter/gather list that
  468  *      can hold the given number of entries.
  469  */
  470 static __inline unsigned int
  471 sgl_len(unsigned int n)
  472 {
  473         return ((3 * n) / 2 + (n & 1));
  474 }
  475 
  476 /**
  477  *      get_imm_packet - return the next ingress packet buffer from a response
  478  *      @resp: the response descriptor containing the packet data
  479  *
  480  *      Return a packet containing the immediate data of the given response.
  481  */
  482 static int
  483 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
  484 {
  485 
  486         if (resp->rss_hdr.opcode == CPL_RX_DATA) {
  487                 const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0];
  488                 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
  489         } else if (resp->rss_hdr.opcode == CPL_RX_PKT) {
  490                 const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0];
  491                 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
  492         } else
  493                 m->m_len = IMMED_PKT_SIZE;
  494         m->m_ext.ext_buf = NULL;
  495         m->m_ext.ext_type = 0;
  496         memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len); 
  497         return (0);     
  498 }
  499 
  500 static __inline u_int
  501 flits_to_desc(u_int n)
  502 {
  503         return (flit_desc_map[n]);
  504 }
  505 
  506 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
  507                     F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
  508                     V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
  509                     F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
  510                     F_HIRCQPARITYERROR)
  511 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
  512 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
  513                       F_RSPQDISABLED)
  514 
  515 /**
  516  *      t3_sge_err_intr_handler - SGE async event interrupt handler
  517  *      @adapter: the adapter
  518  *
  519  *      Interrupt handler for SGE asynchronous (non-data) events.
  520  */
  521 void
  522 t3_sge_err_intr_handler(adapter_t *adapter)
  523 {
  524         unsigned int v, status;
  525 
  526         status = t3_read_reg(adapter, A_SG_INT_CAUSE);
  527         if (status & SGE_PARERR)
  528                 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
  529                          status & SGE_PARERR);
  530         if (status & SGE_FRAMINGERR)
  531                 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
  532                          status & SGE_FRAMINGERR);
  533         if (status & F_RSPQCREDITOVERFOW)
  534                 CH_ALERT(adapter, "SGE response queue credit overflow\n");
  535 
  536         if (status & F_RSPQDISABLED) {
  537                 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
  538 
  539                 CH_ALERT(adapter,
  540                          "packet delivered to disabled response queue (0x%x)\n",
  541                          (v >> S_RSPQ0DISABLED) & 0xff);
  542         }
  543 
  544         t3_write_reg(adapter, A_SG_INT_CAUSE, status);
  545         if (status & SGE_FATALERR)
  546                 t3_fatal_err(adapter);
  547 }
  548 
  549 void
  550 t3_sge_prep(adapter_t *adap, struct sge_params *p)
  551 {
  552         int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
  553 
  554         nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
  555         nqsets *= adap->params.nports;
  556 
  557         fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
  558 
  559         while (!powerof2(fl_q_size))
  560                 fl_q_size--;
  561 
  562         use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
  563             is_offload(adap);
  564 
  565 #if __FreeBSD_version >= 700111
  566         if (use_16k) {
  567                 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
  568                 jumbo_buf_size = MJUM16BYTES;
  569         } else {
  570                 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
  571                 jumbo_buf_size = MJUM9BYTES;
  572         }
  573 #else
  574         jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE);
  575         jumbo_buf_size = MJUMPAGESIZE;
  576 #endif
  577         while (!powerof2(jumbo_q_size))
  578                 jumbo_q_size--;
  579 
  580         if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
  581                 device_printf(adap->dev,
  582                     "Insufficient clusters and/or jumbo buffers.\n");
  583 
  584         p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
  585 
  586         for (i = 0; i < SGE_QSETS; ++i) {
  587                 struct qset_params *q = p->qset + i;
  588 
  589                 if (adap->params.nports > 2) {
  590                         q->coalesce_usecs = 50;
  591                 } else {
  592 #ifdef INVARIANTS                       
  593                         q->coalesce_usecs = 10;
  594 #else
  595                         q->coalesce_usecs = 5;
  596 #endif                  
  597                 }
  598                 q->polling = 0;
  599                 q->rspq_size = RSPQ_Q_SIZE;
  600                 q->fl_size = fl_q_size;
  601                 q->jumbo_size = jumbo_q_size;
  602                 q->jumbo_buf_size = jumbo_buf_size;
  603                 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
  604                 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
  605                 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
  606                 q->cong_thres = 0;
  607         }
  608 }
  609 
  610 int
  611 t3_sge_alloc(adapter_t *sc)
  612 {
  613 
  614         /* The parent tag. */
  615         if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */
  616                                 1, 0,                   /* algnmnt, boundary */
  617                                 BUS_SPACE_MAXADDR,      /* lowaddr */
  618                                 BUS_SPACE_MAXADDR,      /* highaddr */
  619                                 NULL, NULL,             /* filter, filterarg */
  620                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
  621                                 BUS_SPACE_UNRESTRICTED, /* nsegments */
  622                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
  623                                 0,                      /* flags */
  624                                 NULL, NULL,             /* lock, lockarg */
  625                                 &sc->parent_dmat)) {
  626                 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
  627                 return (ENOMEM);
  628         }
  629 
  630         /*
  631          * DMA tag for normal sized RX frames
  632          */
  633         if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
  634                 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
  635                 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
  636                 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
  637                 return (ENOMEM);
  638         }
  639 
  640         /* 
  641          * DMA tag for jumbo sized RX frames.
  642          */
  643         if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
  644                 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
  645                 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
  646                 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
  647                 return (ENOMEM);
  648         }
  649 
  650         /* 
  651          * DMA tag for TX frames.
  652          */
  653         if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
  654                 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  655                 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  656                 NULL, NULL, &sc->tx_dmat)) {
  657                 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
  658                 return (ENOMEM);
  659         }
  660 
  661         return (0);
  662 }
  663 
  664 int
  665 t3_sge_free(struct adapter * sc)
  666 {
  667 
  668         if (sc->tx_dmat != NULL)
  669                 bus_dma_tag_destroy(sc->tx_dmat);
  670 
  671         if (sc->rx_jumbo_dmat != NULL)
  672                 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
  673 
  674         if (sc->rx_dmat != NULL)
  675                 bus_dma_tag_destroy(sc->rx_dmat);
  676 
  677         if (sc->parent_dmat != NULL)
  678                 bus_dma_tag_destroy(sc->parent_dmat);
  679 
  680         return (0);
  681 }
  682 
  683 void
  684 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
  685 {
  686 
  687         qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
  688         qs->rspq.polling = 0 /* p->polling */;
  689 }
  690 
  691 #if !defined(__i386__) && !defined(__amd64__)
  692 static void
  693 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
  694 {
  695         struct refill_fl_cb_arg *cb_arg = arg;
  696         
  697         cb_arg->error = error;
  698         cb_arg->seg = segs[0];
  699         cb_arg->nseg = nseg;
  700 
  701 }
  702 #endif
  703 /**
  704  *      refill_fl - refill an SGE free-buffer list
  705  *      @sc: the controller softc
  706  *      @q: the free-list to refill
  707  *      @n: the number of new buffers to allocate
  708  *
  709  *      (Re)populate an SGE free-buffer list with up to @n new packet buffers.
  710  *      The caller must assure that @n does not exceed the queue's capacity.
  711  */
  712 static void
  713 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
  714 {
  715         struct rx_sw_desc *sd = &q->sdesc[q->pidx];
  716         struct rx_desc *d = &q->desc[q->pidx];
  717         struct refill_fl_cb_arg cb_arg;
  718         struct mbuf *m;
  719         caddr_t cl;
  720         int err;
  721         
  722         cb_arg.error = 0;
  723         while (n--) {
  724                 /*
  725                  * We allocate an uninitialized mbuf + cluster, mbuf is
  726                  * initialized after rx.
  727                  */
  728                 if (q->zone == zone_pack) {
  729                         if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
  730                                 break;
  731                         cl = m->m_ext.ext_buf;                  
  732                 } else {
  733                         if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
  734                                 break;
  735                         if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
  736                                 uma_zfree(q->zone, cl);
  737                                 break;
  738                         }
  739                 }
  740                 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
  741                         if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
  742                                 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
  743                                 uma_zfree(q->zone, cl);
  744                                 goto done;
  745                         }
  746                         sd->flags |= RX_SW_DESC_MAP_CREATED;
  747                 }
  748 #if !defined(__i386__) && !defined(__amd64__)
  749                 err = bus_dmamap_load(q->entry_tag, sd->map,
  750                     cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
  751                 
  752                 if (err != 0 || cb_arg.error) {
  753                         if (q->zone != zone_pack)
  754                                 uma_zfree(q->zone, cl);
  755                         m_free(m);
  756                         goto done;
  757                 }
  758 #else
  759                 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
  760 #endif          
  761                 sd->flags |= RX_SW_DESC_INUSE;
  762                 sd->rxsd_cl = cl;
  763                 sd->m = m;
  764                 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
  765                 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
  766                 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
  767                 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
  768 
  769                 d++;
  770                 sd++;
  771 
  772                 if (++q->pidx == q->size) {
  773                         q->pidx = 0;
  774                         q->gen ^= 1;
  775                         sd = q->sdesc;
  776                         d = q->desc;
  777                 }
  778                 q->credits++;
  779                 q->db_pending++;
  780         }
  781 
  782 done:
  783         if (q->db_pending >= 32) {
  784                 q->db_pending = 0;
  785                 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  786         }
  787 }
  788 
  789 
  790 /**
  791  *      free_rx_bufs - free the Rx buffers on an SGE free list
  792  *      @sc: the controle softc
  793  *      @q: the SGE free list to clean up
  794  *
  795  *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
  796  *      this queue should be stopped before calling this function.
  797  */
  798 static void
  799 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
  800 {
  801         u_int cidx = q->cidx;
  802 
  803         while (q->credits--) {
  804                 struct rx_sw_desc *d = &q->sdesc[cidx];
  805 
  806                 if (d->flags & RX_SW_DESC_INUSE) {
  807                         bus_dmamap_unload(q->entry_tag, d->map);
  808                         bus_dmamap_destroy(q->entry_tag, d->map);
  809                         if (q->zone == zone_pack) {
  810                                 m_init(d->m, M_NOWAIT, MT_DATA, M_EXT);
  811                                 uma_zfree(zone_pack, d->m);
  812                         } else {
  813                                 m_init(d->m, M_NOWAIT, MT_DATA, 0);
  814                                 uma_zfree(zone_mbuf, d->m);
  815                                 uma_zfree(q->zone, d->rxsd_cl);
  816                         }                       
  817                 }
  818                 
  819                 d->rxsd_cl = NULL;
  820                 d->m = NULL;
  821                 if (++cidx == q->size)
  822                         cidx = 0;
  823         }
  824 }
  825 
  826 static __inline void
  827 __refill_fl(adapter_t *adap, struct sge_fl *fl)
  828 {
  829         refill_fl(adap, fl, min(16U, fl->size - fl->credits));
  830 }
  831 
  832 static __inline void
  833 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
  834 {
  835         uint32_t reclaimable = fl->size - fl->credits;
  836 
  837         if (reclaimable > 0)
  838                 refill_fl(adap, fl, min(max, reclaimable));
  839 }
  840 
  841 /**
  842  *      recycle_rx_buf - recycle a receive buffer
  843  *      @adapter: the adapter
  844  *      @q: the SGE free list
  845  *      @idx: index of buffer to recycle
  846  *
  847  *      Recycles the specified buffer on the given free list by adding it at
  848  *      the next available slot on the list.
  849  */
  850 static void
  851 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
  852 {
  853         struct rx_desc *from = &q->desc[idx];
  854         struct rx_desc *to   = &q->desc[q->pidx];
  855 
  856         q->sdesc[q->pidx] = q->sdesc[idx];
  857         to->addr_lo = from->addr_lo;        // already big endian
  858         to->addr_hi = from->addr_hi;        // likewise
  859         wmb();  /* necessary ? */
  860         to->len_gen = htobe32(V_FLD_GEN1(q->gen));
  861         to->gen2 = htobe32(V_FLD_GEN2(q->gen));
  862         q->credits++;
  863 
  864         if (++q->pidx == q->size) {
  865                 q->pidx = 0;
  866                 q->gen ^= 1;
  867         }
  868         t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  869 }
  870 
  871 static void
  872 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
  873 {
  874         uint32_t *addr;
  875 
  876         addr = arg;
  877         *addr = segs[0].ds_addr;
  878 }
  879 
  880 static int
  881 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
  882     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
  883     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
  884 {
  885         size_t len = nelem * elem_size;
  886         void *s = NULL;
  887         void *p = NULL;
  888         int err;
  889 
  890         if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
  891                                       BUS_SPACE_MAXADDR_32BIT,
  892                                       BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
  893                                       len, 0, NULL, NULL, tag)) != 0) {
  894                 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
  895                 return (ENOMEM);
  896         }
  897 
  898         if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
  899                                     map)) != 0) {
  900                 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
  901                 return (ENOMEM);
  902         }
  903 
  904         bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
  905         bzero(p, len);
  906         *(void **)desc = p;
  907 
  908         if (sw_size) {
  909                 len = nelem * sw_size;
  910                 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
  911                 *(void **)sdesc = s;
  912         }
  913         if (parent_entry_tag == NULL)
  914                 return (0);
  915             
  916         if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
  917                                       BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
  918                                       NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  919                                       TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  920                                       NULL, NULL, entry_tag)) != 0) {
  921                 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
  922                 return (ENOMEM);
  923         }
  924         return (0);
  925 }
  926 
  927 static void
  928 sge_slow_intr_handler(void *arg, int ncount)
  929 {
  930         adapter_t *sc = arg;
  931 
  932         t3_slow_intr_handler(sc);
  933         t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
  934         (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
  935 }
  936 
  937 /**
  938  *      sge_timer_cb - perform periodic maintenance of an SGE qset
  939  *      @data: the SGE queue set to maintain
  940  *
  941  *      Runs periodically from a timer to perform maintenance of an SGE queue
  942  *      set.  It performs two tasks:
  943  *
  944  *      a) Cleans up any completed Tx descriptors that may still be pending.
  945  *      Normal descriptor cleanup happens when new packets are added to a Tx
  946  *      queue so this timer is relatively infrequent and does any cleanup only
  947  *      if the Tx queue has not seen any new packets in a while.  We make a
  948  *      best effort attempt to reclaim descriptors, in that we don't wait
  949  *      around if we cannot get a queue's lock (which most likely is because
  950  *      someone else is queueing new packets and so will also handle the clean
  951  *      up).  Since control queues use immediate data exclusively we don't
  952  *      bother cleaning them up here.
  953  *
  954  *      b) Replenishes Rx queues that have run out due to memory shortage.
  955  *      Normally new Rx buffers are added when existing ones are consumed but
  956  *      when out of memory a queue can become empty.  We try to add only a few
  957  *      buffers here, the queue will be replenished fully as these new buffers
  958  *      are used up if memory shortage has subsided.
  959  *      
  960  *      c) Return coalesced response queue credits in case a response queue is
  961  *      starved.
  962  *
  963  *      d) Ring doorbells for T304 tunnel queues since we have seen doorbell 
  964  *      fifo overflows and the FW doesn't implement any recovery scheme yet.
  965  */
  966 static void
  967 sge_timer_cb(void *arg)
  968 {
  969         adapter_t *sc = arg;
  970         if ((sc->flags & USING_MSIX) == 0) {
  971                 
  972                 struct port_info *pi;
  973                 struct sge_qset *qs;
  974                 struct sge_txq  *txq;
  975                 int i, j;
  976                 int reclaim_ofl, refill_rx;
  977 
  978                 if (sc->open_device_map == 0) 
  979                         return;
  980 
  981                 for (i = 0; i < sc->params.nports; i++) {
  982                         pi = &sc->port[i];
  983                         for (j = 0; j < pi->nqsets; j++) {
  984                                 qs = &sc->sge.qs[pi->first_qset + j];
  985                                 txq = &qs->txq[0];
  986                                 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
  987                                 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 
  988                                     (qs->fl[1].credits < qs->fl[1].size));
  989                                 if (reclaim_ofl || refill_rx) {
  990                                         taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
  991                                         break;
  992                                 }
  993                         }
  994                 }
  995         }
  996         
  997         if (sc->params.nports > 2) {
  998                 int i;
  999 
 1000                 for_each_port(sc, i) {
 1001                         struct port_info *pi = &sc->port[i];
 1002 
 1003                         t3_write_reg(sc, A_SG_KDOORBELL, 
 1004                                      F_SELEGRCNTX | 
 1005                                      (FW_TUNNEL_SGEEC_START + pi->first_qset));
 1006                 }
 1007         }       
 1008         if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
 1009             sc->open_device_map != 0)
 1010                 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1011 }
 1012 
 1013 /*
 1014  * This is meant to be a catch-all function to keep sge state private
 1015  * to sge.c
 1016  *
 1017  */
 1018 int
 1019 t3_sge_init_adapter(adapter_t *sc)
 1020 {
 1021         callout_init(&sc->sge_timer_ch, 1);
 1022         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1023         TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
 1024         return (0);
 1025 }
 1026 
 1027 int
 1028 t3_sge_reset_adapter(adapter_t *sc)
 1029 {
 1030         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1031         return (0);
 1032 }
 1033 
 1034 int
 1035 t3_sge_init_port(struct port_info *pi)
 1036 {
 1037         TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
 1038         return (0);
 1039 }
 1040 
 1041 /**
 1042  *      refill_rspq - replenish an SGE response queue
 1043  *      @adapter: the adapter
 1044  *      @q: the response queue to replenish
 1045  *      @credits: how many new responses to make available
 1046  *
 1047  *      Replenishes a response queue by making the supplied number of responses
 1048  *      available to HW.
 1049  */
 1050 static __inline void
 1051 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
 1052 {
 1053 
 1054         /* mbufs are allocated on demand when a rspq entry is processed. */
 1055         t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
 1056                      V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 1057 }
 1058 
 1059 static void
 1060 sge_txq_reclaim_handler(void *arg, int ncount)
 1061 {
 1062         struct sge_qset *qs = arg;
 1063         int i;
 1064 
 1065         for (i = 0; i < 3; i++)
 1066                 reclaim_completed_tx(qs, 16, i);
 1067 }
 1068 
 1069 static void
 1070 sge_timer_reclaim(void *arg, int ncount)
 1071 {
 1072         struct port_info *pi = arg;
 1073         int i, nqsets = pi->nqsets;
 1074         adapter_t *sc = pi->adapter;
 1075         struct sge_qset *qs;
 1076         struct mtx *lock;
 1077         
 1078         KASSERT((sc->flags & USING_MSIX) == 0,
 1079             ("can't call timer reclaim for msi-x"));
 1080 
 1081         for (i = 0; i < nqsets; i++) {
 1082                 qs = &sc->sge.qs[pi->first_qset + i];
 1083 
 1084                 reclaim_completed_tx(qs, 16, TXQ_OFLD);
 1085                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 1086                             &sc->sge.qs[0].rspq.lock;
 1087 
 1088                 if (mtx_trylock(lock)) {
 1089                         /* XXX currently assume that we are *NOT* polling */
 1090                         uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
 1091 
 1092                         if (qs->fl[0].credits < qs->fl[0].size - 16)
 1093                                 __refill_fl(sc, &qs->fl[0]);
 1094                         if (qs->fl[1].credits < qs->fl[1].size - 16)
 1095                                 __refill_fl(sc, &qs->fl[1]);
 1096                         
 1097                         if (status & (1 << qs->rspq.cntxt_id)) {
 1098                                 if (qs->rspq.credits) {
 1099                                         refill_rspq(sc, &qs->rspq, 1);
 1100                                         qs->rspq.credits--;
 1101                                         t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 
 1102                                             1 << qs->rspq.cntxt_id);
 1103                                 }
 1104                         }
 1105                         mtx_unlock(lock);
 1106                 }
 1107         }
 1108 }
 1109 
 1110 /**
 1111  *      init_qset_cntxt - initialize an SGE queue set context info
 1112  *      @qs: the queue set
 1113  *      @id: the queue set id
 1114  *
 1115  *      Initializes the TIDs and context ids for the queues of a queue set.
 1116  */
 1117 static void
 1118 init_qset_cntxt(struct sge_qset *qs, u_int id)
 1119 {
 1120 
 1121         qs->rspq.cntxt_id = id;
 1122         qs->fl[0].cntxt_id = 2 * id;
 1123         qs->fl[1].cntxt_id = 2 * id + 1;
 1124         qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
 1125         qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
 1126         qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 1127         qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 1128         qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
 1129 
 1130         /* XXX: a sane limit is needed instead of INT_MAX */
 1131         mbufq_init(&qs->txq[TXQ_ETH].sendq, INT_MAX);
 1132         mbufq_init(&qs->txq[TXQ_OFLD].sendq, INT_MAX);
 1133         mbufq_init(&qs->txq[TXQ_CTRL].sendq, INT_MAX);
 1134 }
 1135 
 1136 
 1137 static void
 1138 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
 1139 {
 1140         txq->in_use += ndesc;
 1141         /*
 1142          * XXX we don't handle stopping of queue
 1143          * presumably start handles this when we bump against the end
 1144          */
 1145         txqs->gen = txq->gen;
 1146         txq->unacked += ndesc;
 1147         txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
 1148         txq->unacked &= 31;
 1149         txqs->pidx = txq->pidx;
 1150         txq->pidx += ndesc;
 1151 #ifdef INVARIANTS
 1152         if (((txqs->pidx > txq->cidx) &&
 1153                 (txq->pidx < txqs->pidx) &&
 1154                 (txq->pidx >= txq->cidx)) ||
 1155             ((txqs->pidx < txq->cidx) &&
 1156                 (txq->pidx >= txq-> cidx)) ||
 1157             ((txqs->pidx < txq->cidx) &&
 1158                 (txq->cidx < txqs->pidx)))
 1159                 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
 1160                     txqs->pidx, txq->pidx, txq->cidx);
 1161 #endif
 1162         if (txq->pidx >= txq->size) {
 1163                 txq->pidx -= txq->size;
 1164                 txq->gen ^= 1;
 1165         }
 1166 
 1167 }
 1168 
 1169 /**
 1170  *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 1171  *      @m: the packet mbufs
 1172  *      @nsegs: the number of segments 
 1173  *
 1174  *      Returns the number of Tx descriptors needed for the given Ethernet
 1175  *      packet.  Ethernet packets require addition of WR and CPL headers.
 1176  */
 1177 static __inline unsigned int
 1178 calc_tx_descs(const struct mbuf *m, int nsegs)
 1179 {
 1180         unsigned int flits;
 1181 
 1182         if (m->m_pkthdr.len <= PIO_LEN)
 1183                 return 1;
 1184 
 1185         flits = sgl_len(nsegs) + 2;
 1186         if (m->m_pkthdr.csum_flags & CSUM_TSO)
 1187                 flits++;
 1188 
 1189         return flits_to_desc(flits);
 1190 }
 1191 
 1192 /**
 1193  *      make_sgl - populate a scatter/gather list for a packet
 1194  *      @sgp: the SGL to populate
 1195  *      @segs: the packet dma segments
 1196  *      @nsegs: the number of segments
 1197  *
 1198  *      Generates a scatter/gather list for the buffers that make up a packet
 1199  *      and returns the SGL size in 8-byte words.  The caller must size the SGL
 1200  *      appropriately.
 1201  */
 1202 static __inline void
 1203 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
 1204 {
 1205         int i, idx;
 1206         
 1207         for (idx = 0, i = 0; i < nsegs; i++) {
 1208                 /*
 1209                  * firmware doesn't like empty segments
 1210                  */
 1211                 if (segs[i].ds_len == 0)
 1212                         continue;
 1213                 if (i && idx == 0) 
 1214                         ++sgp;
 1215                 
 1216                 sgp->len[idx] = htobe32(segs[i].ds_len);
 1217                 sgp->addr[idx] = htobe64(segs[i].ds_addr);
 1218                 idx ^= 1;
 1219         }
 1220         
 1221         if (idx) {
 1222                 sgp->len[idx] = 0;
 1223                 sgp->addr[idx] = 0;
 1224         }
 1225 }
 1226         
 1227 /**
 1228  *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
 1229  *      @adap: the adapter
 1230  *      @q: the Tx queue
 1231  *
 1232  *      Ring the doorbell if a Tx queue is asleep.  There is a natural race,
 1233  *      where the HW is going to sleep just after we checked, however,
 1234  *      then the interrupt handler will detect the outstanding TX packet
 1235  *      and ring the doorbell for us.
 1236  *
 1237  *      When GTS is disabled we unconditionally ring the doorbell.
 1238  */
 1239 static __inline void
 1240 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
 1241 {
 1242 #if USE_GTS
 1243         clear_bit(TXQ_LAST_PKT_DB, &q->flags);
 1244         if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
 1245                 set_bit(TXQ_LAST_PKT_DB, &q->flags);
 1246 #ifdef T3_TRACE
 1247                 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
 1248                           q->cntxt_id);
 1249 #endif
 1250                 t3_write_reg(adap, A_SG_KDOORBELL,
 1251                              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1252         }
 1253 #else
 1254         if (mustring || ++q->db_pending >= 32) {
 1255                 wmb();            /* write descriptors before telling HW */
 1256                 t3_write_reg(adap, A_SG_KDOORBELL,
 1257                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1258                 q->db_pending = 0;
 1259         }
 1260 #endif
 1261 }
 1262 
 1263 static __inline void
 1264 wr_gen2(struct tx_desc *d, unsigned int gen)
 1265 {
 1266 #if SGE_NUM_GENBITS == 2
 1267         d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
 1268 #endif
 1269 }
 1270 
 1271 /**
 1272  *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
 1273  *      @ndesc: number of Tx descriptors spanned by the SGL
 1274  *      @txd: first Tx descriptor to be written
 1275  *      @txqs: txq state (generation and producer index)
 1276  *      @txq: the SGE Tx queue
 1277  *      @sgl: the SGL
 1278  *      @flits: number of flits to the start of the SGL in the first descriptor
 1279  *      @sgl_flits: the SGL size in flits
 1280  *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
 1281  *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
 1282  *
 1283  *      Write a work request header and an associated SGL.  If the SGL is
 1284  *      small enough to fit into one Tx descriptor it has already been written
 1285  *      and we just need to write the WR header.  Otherwise we distribute the
 1286  *      SGL across the number of descriptors it spans.
 1287  */
 1288 static void
 1289 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
 1290     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
 1291     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
 1292 {
 1293 
 1294         struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
 1295         
 1296         if (__predict_true(ndesc == 1)) {
 1297                 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1298                     V_WR_SGLSFLT(flits)) | wr_hi,
 1299                     htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) |
 1300                     wr_lo);
 1301 
 1302                 wr_gen2(txd, txqs->gen);
 1303                 
 1304         } else {
 1305                 unsigned int ogen = txqs->gen;
 1306                 const uint64_t *fp = (const uint64_t *)sgl;
 1307                 struct work_request_hdr *wp = wrp;
 1308                 
 1309                 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
 1310                     V_WR_SGLSFLT(flits)) | wr_hi;
 1311                 
 1312                 while (sgl_flits) {
 1313                         unsigned int avail = WR_FLITS - flits;
 1314 
 1315                         if (avail > sgl_flits)
 1316                                 avail = sgl_flits;
 1317                         memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
 1318                         sgl_flits -= avail;
 1319                         ndesc--;
 1320                         if (!sgl_flits)
 1321                                 break;
 1322                         
 1323                         fp += avail;
 1324                         txd++;
 1325                         if (++txqs->pidx == txq->size) {
 1326                                 txqs->pidx = 0;
 1327                                 txqs->gen ^= 1;
 1328                                 txd = txq->desc;
 1329                         }
 1330 
 1331                         /*
 1332                          * when the head of the mbuf chain
 1333                          * is freed all clusters will be freed
 1334                          * with it
 1335                          */
 1336                         wrp = (struct work_request_hdr *)txd;
 1337                         wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
 1338                             V_WR_SGLSFLT(1)) | wr_hi;
 1339                         wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
 1340                                     sgl_flits + 1)) |
 1341                             V_WR_GEN(txqs->gen)) | wr_lo;
 1342                         wr_gen2(txd, txqs->gen);
 1343                         flits = 1;
 1344                 }
 1345                 wrp->wrh_hi |= htonl(F_WR_EOP);
 1346                 wmb();
 1347                 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
 1348                 wr_gen2((struct tx_desc *)wp, ogen);
 1349         }
 1350 }
 1351 
 1352 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
 1353 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
 1354 
 1355 #define GET_VTAG(cntrl, m) \
 1356 do { \
 1357         if ((m)->m_flags & M_VLANTAG)                                               \
 1358                 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
 1359 } while (0)
 1360 
 1361 static int
 1362 t3_encap(struct sge_qset *qs, struct mbuf **m)
 1363 {
 1364         adapter_t *sc;
 1365         struct mbuf *m0;
 1366         struct sge_txq *txq;
 1367         struct txq_state txqs;
 1368         struct port_info *pi;
 1369         unsigned int ndesc, flits, cntrl, mlen;
 1370         int err, nsegs, tso_info = 0;
 1371 
 1372         struct work_request_hdr *wrp;
 1373         struct tx_sw_desc *txsd;
 1374         struct sg_ent *sgp, *sgl;
 1375         uint32_t wr_hi, wr_lo, sgl_flits; 
 1376         bus_dma_segment_t segs[TX_MAX_SEGS];
 1377 
 1378         struct tx_desc *txd;
 1379                 
 1380         pi = qs->port;
 1381         sc = pi->adapter;
 1382         txq = &qs->txq[TXQ_ETH];
 1383         txd = &txq->desc[txq->pidx];
 1384         txsd = &txq->sdesc[txq->pidx];
 1385         sgl = txq->txq_sgl;
 1386 
 1387         prefetch(txd);
 1388         m0 = *m;
 1389 
 1390         mtx_assert(&qs->lock, MA_OWNED);
 1391         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1392         KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
 1393         
 1394         if  (m0->m_nextpkt == NULL && m0->m_next != NULL &&
 1395             m0->m_pkthdr.csum_flags & (CSUM_TSO))
 1396                 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
 1397 
 1398         if (m0->m_nextpkt != NULL) {
 1399                 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
 1400                 ndesc = 1;
 1401                 mlen = 0;
 1402         } else {
 1403                 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
 1404                     &m0, segs, &nsegs))) {
 1405                         if (cxgb_debug)
 1406                                 printf("failed ... err=%d\n", err);
 1407                         return (err);
 1408                 }
 1409                 mlen = m0->m_pkthdr.len;
 1410                 ndesc = calc_tx_descs(m0, nsegs);
 1411         }
 1412         txq_prod(txq, ndesc, &txqs);
 1413 
 1414         KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
 1415         txsd->m = m0;
 1416 
 1417         if (m0->m_nextpkt != NULL) {
 1418                 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
 1419                 int i, fidx;
 1420 
 1421                 if (nsegs > 7)
 1422                         panic("trying to coalesce %d packets in to one WR", nsegs);
 1423                 txq->txq_coalesced += nsegs;
 1424                 wrp = (struct work_request_hdr *)txd;
 1425                 flits = nsegs*2 + 1;
 1426 
 1427                 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
 1428                         struct cpl_tx_pkt_batch_entry *cbe;
 1429                         uint64_t flit;
 1430                         uint32_t *hflit = (uint32_t *)&flit;
 1431                         int cflags = m0->m_pkthdr.csum_flags;
 1432 
 1433                         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1434                         GET_VTAG(cntrl, m0);
 1435                         cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1436                         if (__predict_false(!(cflags & CSUM_IP)))
 1437                                 cntrl |= F_TXPKT_IPCSUM_DIS;
 1438                         if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP |
 1439                             CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
 1440                                 cntrl |= F_TXPKT_L4CSUM_DIS;
 1441 
 1442                         hflit[0] = htonl(cntrl);
 1443                         hflit[1] = htonl(segs[i].ds_len | 0x80000000);
 1444                         flit |= htobe64(1 << 24);
 1445                         cbe = &cpl_batch->pkt_entry[i];
 1446                         cbe->cntrl = hflit[0];
 1447                         cbe->len = hflit[1];
 1448                         cbe->addr = htobe64(segs[i].ds_addr);
 1449                 }
 1450 
 1451                 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1452                     V_WR_SGLSFLT(flits)) |
 1453                     htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1454                 wr_lo = htonl(V_WR_LEN(flits) |
 1455                     V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
 1456                 set_wr_hdr(wrp, wr_hi, wr_lo);
 1457                 wmb();
 1458                 ETHER_BPF_MTAP(pi->ifp, m0);
 1459                 wr_gen2(txd, txqs.gen);
 1460                 check_ring_tx_db(sc, txq, 0);
 1461                 return (0);             
 1462         } else if (tso_info) {
 1463                 uint16_t eth_type;
 1464                 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
 1465                 struct ether_header *eh;
 1466                 void *l3hdr;
 1467                 struct tcphdr *tcp;
 1468 
 1469                 txd->flit[2] = 0;
 1470                 GET_VTAG(cntrl, m0);
 1471                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
 1472                 hdr->cntrl = htonl(cntrl);
 1473                 hdr->len = htonl(mlen | 0x80000000);
 1474 
 1475                 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
 1476                         printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x",
 1477                             m0, mlen, m0->m_pkthdr.tso_segsz,
 1478                             (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags);
 1479                         panic("tx tso packet too small");
 1480                 }
 1481 
 1482                 /* Make sure that ether, ip, tcp headers are all in m0 */
 1483                 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
 1484                         m0 = m_pullup(m0, TCPPKTHDRSIZE);
 1485                         if (__predict_false(m0 == NULL)) {
 1486                                 /* XXX panic probably an overreaction */
 1487                                 panic("couldn't fit header into mbuf");
 1488                         }
 1489                 }
 1490 
 1491                 eh = mtod(m0, struct ether_header *);
 1492                 eth_type = eh->ether_type;
 1493                 if (eth_type == htons(ETHERTYPE_VLAN)) {
 1494                         struct ether_vlan_header *evh = (void *)eh;
 1495 
 1496                         tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN);
 1497                         l3hdr = evh + 1;
 1498                         eth_type = evh->evl_proto;
 1499                 } else {
 1500                         tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II);
 1501                         l3hdr = eh + 1;
 1502                 }
 1503 
 1504                 if (eth_type == htons(ETHERTYPE_IP)) {
 1505                         struct ip *ip = l3hdr;
 1506 
 1507                         tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl);
 1508                         tcp = (struct tcphdr *)(ip + 1);
 1509                 } else if (eth_type == htons(ETHERTYPE_IPV6)) {
 1510                         struct ip6_hdr *ip6 = l3hdr;
 1511 
 1512                         KASSERT(ip6->ip6_nxt == IPPROTO_TCP,
 1513                             ("%s: CSUM_TSO with ip6_nxt %d",
 1514                             __func__, ip6->ip6_nxt));
 1515 
 1516                         tso_info |= F_LSO_IPV6;
 1517                         tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2);
 1518                         tcp = (struct tcphdr *)(ip6 + 1);
 1519                 } else
 1520                         panic("%s: CSUM_TSO but neither ip nor ip6", __func__);
 1521 
 1522                 tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off);
 1523                 hdr->lso_info = htonl(tso_info);
 1524 
 1525                 if (__predict_false(mlen <= PIO_LEN)) {
 1526                         /*
 1527                          * pkt not undersized but fits in PIO_LEN
 1528                          * Indicates a TSO bug at the higher levels.
 1529                          */
 1530                         txsd->m = NULL;
 1531                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
 1532                         flits = (mlen + 7) / 8 + 3;
 1533                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1534                                           V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1535                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1536                         wr_lo = htonl(V_WR_LEN(flits) |
 1537                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1538                         set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
 1539                         wmb();
 1540                         ETHER_BPF_MTAP(pi->ifp, m0);
 1541                         wr_gen2(txd, txqs.gen);
 1542                         check_ring_tx_db(sc, txq, 0);
 1543                         m_freem(m0);
 1544                         return (0);
 1545                 }
 1546                 flits = 3;      
 1547         } else {
 1548                 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
 1549                 
 1550                 GET_VTAG(cntrl, m0);
 1551                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1552                 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
 1553                         cntrl |= F_TXPKT_IPCSUM_DIS;
 1554                 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP |
 1555                     CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
 1556                         cntrl |= F_TXPKT_L4CSUM_DIS;
 1557                 cpl->cntrl = htonl(cntrl);
 1558                 cpl->len = htonl(mlen | 0x80000000);
 1559 
 1560                 if (mlen <= PIO_LEN) {
 1561                         txsd->m = NULL;
 1562                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
 1563                         flits = (mlen + 7) / 8 + 2;
 1564                         
 1565                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1566                             V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1567                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1568                         wr_lo = htonl(V_WR_LEN(flits) |
 1569                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1570                         set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
 1571                         wmb();
 1572                         ETHER_BPF_MTAP(pi->ifp, m0);
 1573                         wr_gen2(txd, txqs.gen);
 1574                         check_ring_tx_db(sc, txq, 0);
 1575                         m_freem(m0);
 1576                         return (0);
 1577                 }
 1578                 flits = 2;
 1579         }
 1580         wrp = (struct work_request_hdr *)txd;
 1581         sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
 1582         make_sgl(sgp, segs, nsegs);
 1583 
 1584         sgl_flits = sgl_len(nsegs);
 1585 
 1586         ETHER_BPF_MTAP(pi->ifp, m0);
 1587 
 1588         KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
 1589         wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1590         wr_lo = htonl(V_WR_TID(txq->token));
 1591         write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
 1592             sgl_flits, wr_hi, wr_lo);
 1593         check_ring_tx_db(sc, txq, 0);
 1594 
 1595         return (0);
 1596 }
 1597 
 1598 #ifdef NETDUMP
 1599 int
 1600 cxgb_netdump_encap(struct sge_qset *qs, struct mbuf **m)
 1601 {
 1602         int error;
 1603 
 1604         error = t3_encap(qs, m);
 1605         if (error == 0)
 1606                 check_ring_tx_db(qs->port->adapter, &qs->txq[TXQ_ETH], 1);
 1607         else if (*m != NULL) {
 1608                 m_freem(*m);
 1609                 *m = NULL;
 1610         }
 1611         return (error);
 1612 }
 1613 #endif
 1614 
 1615 void
 1616 cxgb_tx_watchdog(void *arg)
 1617 {
 1618         struct sge_qset *qs = arg;
 1619         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1620 
 1621         if (qs->coalescing != 0 &&
 1622             (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
 1623             TXQ_RING_EMPTY(qs))
 1624                 qs->coalescing = 0; 
 1625         else if (qs->coalescing == 0 &&
 1626             (txq->in_use >= cxgb_tx_coalesce_enable_start))
 1627                 qs->coalescing = 1;
 1628         if (TXQ_TRYLOCK(qs)) {
 1629                 qs->qs_flags |= QS_FLUSHING;
 1630                 cxgb_start_locked(qs);
 1631                 qs->qs_flags &= ~QS_FLUSHING;
 1632                 TXQ_UNLOCK(qs);
 1633         }
 1634         if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
 1635                 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
 1636                     qs, txq->txq_watchdog.c_cpu);
 1637 }
 1638 
 1639 static void
 1640 cxgb_tx_timeout(void *arg)
 1641 {
 1642         struct sge_qset *qs = arg;
 1643         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1644 
 1645         if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
 1646                 qs->coalescing = 1;     
 1647         if (TXQ_TRYLOCK(qs)) {
 1648                 qs->qs_flags |= QS_TIMEOUT;
 1649                 cxgb_start_locked(qs);
 1650                 qs->qs_flags &= ~QS_TIMEOUT;
 1651                 TXQ_UNLOCK(qs);
 1652         }
 1653 }
 1654 
 1655 static void
 1656 cxgb_start_locked(struct sge_qset *qs)
 1657 {
 1658         struct mbuf *m_head = NULL;
 1659         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1660         struct port_info *pi = qs->port;
 1661         struct ifnet *ifp = pi->ifp;
 1662 
 1663         if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
 1664                 reclaim_completed_tx(qs, 0, TXQ_ETH);
 1665 
 1666         if (!pi->link_config.link_ok) {
 1667                 TXQ_RING_FLUSH(qs);
 1668                 return;
 1669         }
 1670         TXQ_LOCK_ASSERT(qs);
 1671         while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
 1672             pi->link_config.link_ok) {
 1673                 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1674 
 1675                 if (txq->size - txq->in_use <= TX_MAX_DESC)
 1676                         break;
 1677 
 1678                 if ((m_head = cxgb_dequeue(qs)) == NULL)
 1679                         break;
 1680                 /*
 1681                  *  Encapsulation can modify our pointer, and or make it
 1682                  *  NULL on failure.  In that event, we can't requeue.
 1683                  */
 1684                 if (t3_encap(qs, &m_head) || m_head == NULL)
 1685                         break;
 1686 
 1687                 m_head = NULL;
 1688         }
 1689 
 1690         if (txq->db_pending)
 1691                 check_ring_tx_db(pi->adapter, txq, 1);
 1692 
 1693         if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
 1694             pi->link_config.link_ok)
 1695                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1696                     qs, txq->txq_timer.c_cpu);
 1697         if (m_head != NULL)
 1698                 m_freem(m_head);
 1699 }
 1700 
 1701 static int
 1702 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
 1703 {
 1704         struct port_info *pi = qs->port;
 1705         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1706         struct buf_ring *br = txq->txq_mr;
 1707         int error, avail;
 1708 
 1709         avail = txq->size - txq->in_use;
 1710         TXQ_LOCK_ASSERT(qs);
 1711 
 1712         /*
 1713          * We can only do a direct transmit if the following are true:
 1714          * - we aren't coalescing (ring < 3/4 full)
 1715          * - the link is up -- checked in caller
 1716          * - there are no packets enqueued already
 1717          * - there is space in hardware transmit queue 
 1718          */
 1719         if (check_pkt_coalesce(qs) == 0 &&
 1720             !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
 1721                 if (t3_encap(qs, &m)) {
 1722                         if (m != NULL &&
 1723                             (error = drbr_enqueue(ifp, br, m)) != 0) 
 1724                                 return (error);
 1725                 } else {
 1726                         if (txq->db_pending)
 1727                                 check_ring_tx_db(pi->adapter, txq, 1);
 1728 
 1729                         /*
 1730                          * We've bypassed the buf ring so we need to update
 1731                          * the stats directly
 1732                          */
 1733                         txq->txq_direct_packets++;
 1734                         txq->txq_direct_bytes += m->m_pkthdr.len;
 1735                 }
 1736         } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
 1737                 return (error);
 1738 
 1739         reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1740         if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
 1741             (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
 1742                 cxgb_start_locked(qs);
 1743         else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
 1744                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1745                     qs, txq->txq_timer.c_cpu);
 1746         return (0);
 1747 }
 1748 
 1749 int
 1750 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
 1751 {
 1752         struct sge_qset *qs;
 1753         struct port_info *pi = ifp->if_softc;
 1754         int error, qidx = pi->first_qset;
 1755 
 1756         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
 1757             ||(!pi->link_config.link_ok)) {
 1758                 m_freem(m);
 1759                 return (0);
 1760         }
 1761 
 1762         /* check if flowid is set */
 1763         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)       
 1764                 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
 1765 
 1766         qs = &pi->adapter->sge.qs[qidx];
 1767         
 1768         if (TXQ_TRYLOCK(qs)) {
 1769                 /* XXX running */
 1770                 error = cxgb_transmit_locked(ifp, qs, m);
 1771                 TXQ_UNLOCK(qs);
 1772         } else
 1773                 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
 1774         return (error);
 1775 }
 1776 
 1777 void
 1778 cxgb_qflush(struct ifnet *ifp)
 1779 {
 1780         /*
 1781          * flush any enqueued mbufs in the buf_rings
 1782          * and in the transmit queues
 1783          * no-op for now
 1784          */
 1785         return;
 1786 }
 1787 
 1788 /**
 1789  *      write_imm - write a packet into a Tx descriptor as immediate data
 1790  *      @d: the Tx descriptor to write
 1791  *      @m: the packet
 1792  *      @len: the length of packet data to write as immediate data
 1793  *      @gen: the generation bit value to write
 1794  *
 1795  *      Writes a packet as immediate data into a Tx descriptor.  The packet
 1796  *      contains a work request at its beginning.  We must write the packet
 1797  *      carefully so the SGE doesn't read accidentally before it's written in
 1798  *      its entirety.
 1799  */
 1800 static __inline void
 1801 write_imm(struct tx_desc *d, caddr_t src,
 1802           unsigned int len, unsigned int gen)
 1803 {
 1804         struct work_request_hdr *from = (struct work_request_hdr *)src;
 1805         struct work_request_hdr *to = (struct work_request_hdr *)d;
 1806         uint32_t wr_hi, wr_lo;
 1807 
 1808         KASSERT(len <= WR_LEN && len >= sizeof(*from),
 1809             ("%s: invalid len %d", __func__, len));
 1810         
 1811         memcpy(&to[1], &from[1], len - sizeof(*from));
 1812         wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
 1813             V_WR_BCNTLFLT(len & 7));
 1814         wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8));
 1815         set_wr_hdr(to, wr_hi, wr_lo);
 1816         wmb();
 1817         wr_gen2(d, gen);
 1818 }
 1819 
 1820 /**
 1821  *      check_desc_avail - check descriptor availability on a send queue
 1822  *      @adap: the adapter
 1823  *      @q: the TX queue
 1824  *      @m: the packet needing the descriptors
 1825  *      @ndesc: the number of Tx descriptors needed
 1826  *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
 1827  *
 1828  *      Checks if the requested number of Tx descriptors is available on an
 1829  *      SGE send queue.  If the queue is already suspended or not enough
 1830  *      descriptors are available the packet is queued for later transmission.
 1831  *      Must be called with the Tx queue locked.
 1832  *
 1833  *      Returns 0 if enough descriptors are available, 1 if there aren't
 1834  *      enough descriptors and the packet has been queued, and 2 if the caller
 1835  *      needs to retry because there weren't enough descriptors at the
 1836  *      beginning of the call but some freed up in the mean time.
 1837  */
 1838 static __inline int
 1839 check_desc_avail(adapter_t *adap, struct sge_txq *q,
 1840                  struct mbuf *m, unsigned int ndesc,
 1841                  unsigned int qid)
 1842 {
 1843         /* 
 1844          * XXX We currently only use this for checking the control queue
 1845          * the control queue is only used for binding qsets which happens
 1846          * at init time so we are guaranteed enough descriptors
 1847          */
 1848         if (__predict_false(mbufq_len(&q->sendq))) {
 1849 addq_exit:      (void )mbufq_enqueue(&q->sendq, m);
 1850                 return 1;
 1851         }
 1852         if (__predict_false(q->size - q->in_use < ndesc)) {
 1853 
 1854                 struct sge_qset *qs = txq_to_qset(q, qid);
 1855 
 1856                 setbit(&qs->txq_stopped, qid);
 1857                 if (should_restart_tx(q) &&
 1858                     test_and_clear_bit(qid, &qs->txq_stopped))
 1859                         return 2;
 1860 
 1861                 q->stops++;
 1862                 goto addq_exit;
 1863         }
 1864         return 0;
 1865 }
 1866 
 1867 
 1868 /**
 1869  *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
 1870  *      @q: the SGE control Tx queue
 1871  *
 1872  *      This is a variant of reclaim_completed_tx() that is used for Tx queues
 1873  *      that send only immediate data (presently just the control queues) and
 1874  *      thus do not have any mbufs
 1875  */
 1876 static __inline void
 1877 reclaim_completed_tx_imm(struct sge_txq *q)
 1878 {
 1879         unsigned int reclaim = q->processed - q->cleaned;
 1880 
 1881         q->in_use -= reclaim;
 1882         q->cleaned += reclaim;
 1883 }
 1884 
 1885 /**
 1886  *      ctrl_xmit - send a packet through an SGE control Tx queue
 1887  *      @adap: the adapter
 1888  *      @q: the control queue
 1889  *      @m: the packet
 1890  *
 1891  *      Send a packet through an SGE control Tx queue.  Packets sent through
 1892  *      a control queue must fit entirely as immediate data in a single Tx
 1893  *      descriptor and have no page fragments.
 1894  */
 1895 static int
 1896 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 1897 {
 1898         int ret;
 1899         struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
 1900         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1901         
 1902         KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__));
 1903 
 1904         wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
 1905         wrp->wrh_lo = htonl(V_WR_TID(q->token));
 1906 
 1907         TXQ_LOCK(qs);
 1908 again:  reclaim_completed_tx_imm(q);
 1909 
 1910         ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
 1911         if (__predict_false(ret)) {
 1912                 if (ret == 1) {
 1913                         TXQ_UNLOCK(qs);
 1914                         return (ENOSPC);
 1915                 }
 1916                 goto again;
 1917         }
 1918         write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
 1919         
 1920         q->in_use++;
 1921         if (++q->pidx >= q->size) {
 1922                 q->pidx = 0;
 1923                 q->gen ^= 1;
 1924         }
 1925         TXQ_UNLOCK(qs);
 1926         wmb();
 1927         t3_write_reg(adap, A_SG_KDOORBELL,
 1928             F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1929 
 1930         m_free(m);
 1931         return (0);
 1932 }
 1933 
 1934 
 1935 /**
 1936  *      restart_ctrlq - restart a suspended control queue
 1937  *      @qs: the queue set cotaining the control queue
 1938  *
 1939  *      Resumes transmission on a suspended Tx control queue.
 1940  */
 1941 static void
 1942 restart_ctrlq(void *data, int npending)
 1943 {
 1944         struct mbuf *m;
 1945         struct sge_qset *qs = (struct sge_qset *)data;
 1946         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1947         adapter_t *adap = qs->port->adapter;
 1948 
 1949         TXQ_LOCK(qs);
 1950 again:  reclaim_completed_tx_imm(q);
 1951 
 1952         while (q->in_use < q->size &&
 1953                (m = mbufq_dequeue(&q->sendq)) != NULL) {
 1954 
 1955                 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
 1956                 m_free(m);
 1957 
 1958                 if (++q->pidx >= q->size) {
 1959                         q->pidx = 0;
 1960                         q->gen ^= 1;
 1961                 }
 1962                 q->in_use++;
 1963         }
 1964         if (mbufq_len(&q->sendq)) {
 1965                 setbit(&qs->txq_stopped, TXQ_CTRL);
 1966 
 1967                 if (should_restart_tx(q) &&
 1968                     test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
 1969                         goto again;
 1970                 q->stops++;
 1971         }
 1972         TXQ_UNLOCK(qs);
 1973         t3_write_reg(adap, A_SG_KDOORBELL,
 1974                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1975 }
 1976 
 1977 
 1978 /*
 1979  * Send a management message through control queue 0
 1980  */
 1981 int
 1982 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
 1983 {
 1984         return ctrl_xmit(adap, &adap->sge.qs[0], m);
 1985 }
 1986 
 1987 /**
 1988  *      free_qset - free the resources of an SGE queue set
 1989  *      @sc: the controller owning the queue set
 1990  *      @q: the queue set
 1991  *
 1992  *      Release the HW and SW resources associated with an SGE queue set, such
 1993  *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 1994  *      queue set must be quiesced prior to calling this.
 1995  */
 1996 static void
 1997 t3_free_qset(adapter_t *sc, struct sge_qset *q)
 1998 {
 1999         int i;
 2000         
 2001         reclaim_completed_tx(q, 0, TXQ_ETH);
 2002         if (q->txq[TXQ_ETH].txq_mr != NULL) 
 2003                 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
 2004         if (q->txq[TXQ_ETH].txq_ifq != NULL) {
 2005                 ifq_delete(q->txq[TXQ_ETH].txq_ifq);
 2006                 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
 2007         }
 2008 
 2009         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2010                 if (q->fl[i].desc) {
 2011                         mtx_lock_spin(&sc->sge.reg_lock);
 2012                         t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
 2013                         mtx_unlock_spin(&sc->sge.reg_lock);
 2014                         bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
 2015                         bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
 2016                                         q->fl[i].desc_map);
 2017                         bus_dma_tag_destroy(q->fl[i].desc_tag);
 2018                         bus_dma_tag_destroy(q->fl[i].entry_tag);
 2019                 }
 2020                 if (q->fl[i].sdesc) {
 2021                         free_rx_bufs(sc, &q->fl[i]);
 2022                         free(q->fl[i].sdesc, M_DEVBUF);
 2023                 }
 2024         }
 2025 
 2026         mtx_unlock(&q->lock);
 2027         MTX_DESTROY(&q->lock);
 2028         for (i = 0; i < SGE_TXQ_PER_SET; i++) {
 2029                 if (q->txq[i].desc) {
 2030                         mtx_lock_spin(&sc->sge.reg_lock);
 2031                         t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
 2032                         mtx_unlock_spin(&sc->sge.reg_lock);
 2033                         bus_dmamap_unload(q->txq[i].desc_tag,
 2034                                         q->txq[i].desc_map);
 2035                         bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
 2036                                         q->txq[i].desc_map);
 2037                         bus_dma_tag_destroy(q->txq[i].desc_tag);
 2038                         bus_dma_tag_destroy(q->txq[i].entry_tag);
 2039                 }
 2040                 if (q->txq[i].sdesc) {
 2041                         free(q->txq[i].sdesc, M_DEVBUF);
 2042                 }
 2043         }
 2044 
 2045         if (q->rspq.desc) {
 2046                 mtx_lock_spin(&sc->sge.reg_lock);
 2047                 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
 2048                 mtx_unlock_spin(&sc->sge.reg_lock);
 2049                 
 2050                 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
 2051                 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
 2052                                 q->rspq.desc_map);
 2053                 bus_dma_tag_destroy(q->rspq.desc_tag);
 2054                 MTX_DESTROY(&q->rspq.lock);
 2055         }
 2056 
 2057 #if defined(INET6) || defined(INET)
 2058         tcp_lro_free(&q->lro.ctrl);
 2059 #endif
 2060 
 2061         bzero(q, sizeof(*q));
 2062 }
 2063 
 2064 /**
 2065  *      t3_free_sge_resources - free SGE resources
 2066  *      @sc: the adapter softc
 2067  *
 2068  *      Frees resources used by the SGE queue sets.
 2069  */
 2070 void
 2071 t3_free_sge_resources(adapter_t *sc, int nqsets)
 2072 {
 2073         int i;
 2074 
 2075         for (i = 0; i < nqsets; ++i) {
 2076                 TXQ_LOCK(&sc->sge.qs[i]);
 2077                 t3_free_qset(sc, &sc->sge.qs[i]);
 2078         }
 2079 }
 2080 
 2081 /**
 2082  *      t3_sge_start - enable SGE
 2083  *      @sc: the controller softc
 2084  *
 2085  *      Enables the SGE for DMAs.  This is the last step in starting packet
 2086  *      transfers.
 2087  */
 2088 void
 2089 t3_sge_start(adapter_t *sc)
 2090 {
 2091         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
 2092 }
 2093 
 2094 /**
 2095  *      t3_sge_stop - disable SGE operation
 2096  *      @sc: the adapter
 2097  *
 2098  *      Disables the DMA engine.  This can be called in emeregencies (e.g.,
 2099  *      from error interrupts) or from normal process context.  In the latter
 2100  *      case it also disables any pending queue restart tasklets.  Note that
 2101  *      if it is called in interrupt context it cannot disable the restart
 2102  *      tasklets as it cannot wait, however the tasklets will have no effect
 2103  *      since the doorbells are disabled and the driver will call this again
 2104  *      later from process context, at which time the tasklets will be stopped
 2105  *      if they are still running.
 2106  */
 2107 void
 2108 t3_sge_stop(adapter_t *sc)
 2109 {
 2110         int i, nqsets;
 2111         
 2112         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
 2113 
 2114         if (sc->tq == NULL)
 2115                 return;
 2116         
 2117         for (nqsets = i = 0; i < (sc)->params.nports; i++) 
 2118                 nqsets += sc->port[i].nqsets;
 2119 #ifdef notyet
 2120         /*
 2121          * 
 2122          * XXX
 2123          */
 2124         for (i = 0; i < nqsets; ++i) {
 2125                 struct sge_qset *qs = &sc->sge.qs[i];
 2126                 
 2127                 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2128                 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2129         }
 2130 #endif
 2131 }
 2132 
 2133 /**
 2134  *      t3_free_tx_desc - reclaims Tx descriptors and their buffers
 2135  *      @adapter: the adapter
 2136  *      @q: the Tx queue to reclaim descriptors from
 2137  *      @reclaimable: the number of descriptors to reclaim
 2138  *      @m_vec_size: maximum number of buffers to reclaim
 2139  *      @desc_reclaimed: returns the number of descriptors reclaimed
 2140  *
 2141  *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 2142  *      Tx buffers.  Called with the Tx queue lock held.
 2143  *
 2144  *      Returns number of buffers of reclaimed   
 2145  */
 2146 void
 2147 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
 2148 {
 2149         struct tx_sw_desc *txsd;
 2150         unsigned int cidx, mask;
 2151         struct sge_txq *q = &qs->txq[queue];
 2152 
 2153 #ifdef T3_TRACE
 2154         T3_TRACE2(sc->tb[q->cntxt_id & 7],
 2155                   "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
 2156 #endif
 2157         cidx = q->cidx;
 2158         mask = q->size - 1;
 2159         txsd = &q->sdesc[cidx];
 2160 
 2161         mtx_assert(&qs->lock, MA_OWNED);
 2162         while (reclaimable--) {
 2163                 prefetch(q->sdesc[(cidx + 1) & mask].m);
 2164                 prefetch(q->sdesc[(cidx + 2) & mask].m);
 2165 
 2166                 if (txsd->m != NULL) {
 2167                         if (txsd->flags & TX_SW_DESC_MAPPED) {
 2168                                 bus_dmamap_unload(q->entry_tag, txsd->map);
 2169                                 txsd->flags &= ~TX_SW_DESC_MAPPED;
 2170                         }
 2171                         m_freem_list(txsd->m);
 2172                         txsd->m = NULL;
 2173                 } else
 2174                         q->txq_skipped++;
 2175                 
 2176                 ++txsd;
 2177                 if (++cidx == q->size) {
 2178                         cidx = 0;
 2179                         txsd = q->sdesc;
 2180                 }
 2181         }
 2182         q->cidx = cidx;
 2183 
 2184 }
 2185 
 2186 /**
 2187  *      is_new_response - check if a response is newly written
 2188  *      @r: the response descriptor
 2189  *      @q: the response queue
 2190  *
 2191  *      Returns true if a response descriptor contains a yet unprocessed
 2192  *      response.
 2193  */
 2194 static __inline int
 2195 is_new_response(const struct rsp_desc *r,
 2196     const struct sge_rspq *q)
 2197 {
 2198         return (r->intr_gen & F_RSPD_GEN2) == q->gen;
 2199 }
 2200 
 2201 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
 2202 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
 2203                         V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
 2204                         V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
 2205                         V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
 2206 
 2207 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
 2208 #define NOMEM_INTR_DELAY 2500
 2209 
 2210 #ifdef TCP_OFFLOAD
 2211 /**
 2212  *      write_ofld_wr - write an offload work request
 2213  *      @adap: the adapter
 2214  *      @m: the packet to send
 2215  *      @q: the Tx queue
 2216  *      @pidx: index of the first Tx descriptor to write
 2217  *      @gen: the generation value to use
 2218  *      @ndesc: number of descriptors the packet will occupy
 2219  *
 2220  *      Write an offload work request to send the supplied packet.  The packet
 2221  *      data already carry the work request with most fields populated.
 2222  */
 2223 static void
 2224 write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q,
 2225     unsigned int pidx, unsigned int gen, unsigned int ndesc)
 2226 {
 2227         unsigned int sgl_flits, flits;
 2228         int i, idx, nsegs, wrlen;
 2229         struct work_request_hdr *from;
 2230         struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1];
 2231         struct tx_desc *d = &q->desc[pidx];
 2232         struct txq_state txqs;
 2233         struct sglist_seg *segs;
 2234         struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2235         struct sglist *sgl;
 2236 
 2237         from = (void *)(oh + 1);        /* Start of WR within mbuf */
 2238         wrlen = m->m_len - sizeof(*oh);
 2239 
 2240         if (!(oh->flags & F_HDR_SGL)) {
 2241                 write_imm(d, (caddr_t)from, wrlen, gen);
 2242 
 2243                 /*
 2244                  * mbuf with "real" immediate tx data will be enqueue_wr'd by
 2245                  * t3_push_frames and freed in wr_ack.  Others, like those sent
 2246                  * down by close_conn, t3_send_reset, etc. should be freed here.
 2247                  */
 2248                 if (!(oh->flags & F_HDR_DF))
 2249                         m_free(m);
 2250                 return;
 2251         }
 2252 
 2253         memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from));
 2254 
 2255         sgl = oh->sgl;
 2256         flits = wrlen / 8;
 2257         sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl;
 2258 
 2259         nsegs = sgl->sg_nseg;
 2260         segs = sgl->sg_segs;
 2261         for (idx = 0, i = 0; i < nsegs; i++) {
 2262                 KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__));
 2263                 if (i && idx == 0) 
 2264                         ++sgp;
 2265                 sgp->len[idx] = htobe32(segs[i].ss_len);
 2266                 sgp->addr[idx] = htobe64(segs[i].ss_paddr);
 2267                 idx ^= 1;
 2268         }
 2269         if (idx) {
 2270                 sgp->len[idx] = 0;
 2271                 sgp->addr[idx] = 0;
 2272         }
 2273 
 2274         sgl_flits = sgl_len(nsegs);
 2275         txqs.gen = gen;
 2276         txqs.pidx = pidx;
 2277         txqs.compl = 0;
 2278 
 2279         write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits,
 2280             from->wrh_hi, from->wrh_lo);
 2281 }
 2282 
 2283 /**
 2284  *      ofld_xmit - send a packet through an offload queue
 2285  *      @adap: the adapter
 2286  *      @q: the Tx offload queue
 2287  *      @m: the packet
 2288  *
 2289  *      Send an offload packet through an SGE offload queue.
 2290  */
 2291 static int
 2292 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 2293 {
 2294         int ret;
 2295         unsigned int ndesc;
 2296         unsigned int pidx, gen;
 2297         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2298         struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2299 
 2300         ndesc = G_HDR_NDESC(oh->flags);
 2301 
 2302         TXQ_LOCK(qs);
 2303 again:  reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2304         ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
 2305         if (__predict_false(ret)) {
 2306                 if (ret == 1) {
 2307                         TXQ_UNLOCK(qs);
 2308                         return (EINTR);
 2309                 }
 2310                 goto again;
 2311         }
 2312 
 2313         gen = q->gen;
 2314         q->in_use += ndesc;
 2315         pidx = q->pidx;
 2316         q->pidx += ndesc;
 2317         if (q->pidx >= q->size) {
 2318                 q->pidx -= q->size;
 2319                 q->gen ^= 1;
 2320         }
 2321 
 2322         write_ofld_wr(adap, m, q, pidx, gen, ndesc);
 2323         check_ring_tx_db(adap, q, 1);
 2324         TXQ_UNLOCK(qs);
 2325 
 2326         return (0);
 2327 }
 2328 
 2329 /**
 2330  *      restart_offloadq - restart a suspended offload queue
 2331  *      @qs: the queue set cotaining the offload queue
 2332  *
 2333  *      Resumes transmission on a suspended Tx offload queue.
 2334  */
 2335 static void
 2336 restart_offloadq(void *data, int npending)
 2337 {
 2338         struct mbuf *m;
 2339         struct sge_qset *qs = data;
 2340         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2341         adapter_t *adap = qs->port->adapter;
 2342         int cleaned;
 2343                 
 2344         TXQ_LOCK(qs);
 2345 again:  cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2346 
 2347         while ((m = mbufq_first(&q->sendq)) != NULL) {
 2348                 unsigned int gen, pidx;
 2349                 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2350                 unsigned int ndesc = G_HDR_NDESC(oh->flags);
 2351 
 2352                 if (__predict_false(q->size - q->in_use < ndesc)) {
 2353                         setbit(&qs->txq_stopped, TXQ_OFLD);
 2354                         if (should_restart_tx(q) &&
 2355                             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
 2356                                 goto again;
 2357                         q->stops++;
 2358                         break;
 2359                 }
 2360 
 2361                 gen = q->gen;
 2362                 q->in_use += ndesc;
 2363                 pidx = q->pidx;
 2364                 q->pidx += ndesc;
 2365                 if (q->pidx >= q->size) {
 2366                         q->pidx -= q->size;
 2367                         q->gen ^= 1;
 2368                 }
 2369                 
 2370                 (void)mbufq_dequeue(&q->sendq);
 2371                 TXQ_UNLOCK(qs);
 2372                 write_ofld_wr(adap, m, q, pidx, gen, ndesc);
 2373                 TXQ_LOCK(qs);
 2374         }
 2375 #if USE_GTS
 2376         set_bit(TXQ_RUNNING, &q->flags);
 2377         set_bit(TXQ_LAST_PKT_DB, &q->flags);
 2378 #endif
 2379         TXQ_UNLOCK(qs);
 2380         wmb();
 2381         t3_write_reg(adap, A_SG_KDOORBELL,
 2382                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 2383 }
 2384 
 2385 /**
 2386  *      t3_offload_tx - send an offload packet
 2387  *      @m: the packet
 2388  *
 2389  *      Sends an offload packet.  We use the packet priority to select the
 2390  *      appropriate Tx queue as follows: bit 0 indicates whether the packet
 2391  *      should be sent as regular or control, bits 1-3 select the queue set.
 2392  */
 2393 int
 2394 t3_offload_tx(struct adapter *sc, struct mbuf *m)
 2395 {
 2396         struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2397         struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)];
 2398 
 2399         if (oh->flags & F_HDR_CTRL) {
 2400                 m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */
 2401                 return (ctrl_xmit(sc, qs, m));
 2402         } else
 2403                 return (ofld_xmit(sc, qs, m));
 2404 }
 2405 #endif
 2406 
 2407 static void
 2408 restart_tx(struct sge_qset *qs)
 2409 {
 2410         struct adapter *sc = qs->port->adapter;
 2411 
 2412         if (isset(&qs->txq_stopped, TXQ_OFLD) &&
 2413             should_restart_tx(&qs->txq[TXQ_OFLD]) &&
 2414             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
 2415                 qs->txq[TXQ_OFLD].restarts++;
 2416                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2417         }
 2418 
 2419         if (isset(&qs->txq_stopped, TXQ_CTRL) &&
 2420             should_restart_tx(&qs->txq[TXQ_CTRL]) &&
 2421             test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
 2422                 qs->txq[TXQ_CTRL].restarts++;
 2423                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2424         }
 2425 }
 2426 
 2427 /**
 2428  *      t3_sge_alloc_qset - initialize an SGE queue set
 2429  *      @sc: the controller softc
 2430  *      @id: the queue set id
 2431  *      @nports: how many Ethernet ports will be using this queue set
 2432  *      @irq_vec_idx: the IRQ vector index for response queue interrupts
 2433  *      @p: configuration parameters for this queue set
 2434  *      @ntxq: number of Tx queues for the queue set
 2435  *      @pi: port info for queue set
 2436  *
 2437  *      Allocate resources and initialize an SGE queue set.  A queue set
 2438  *      comprises a response queue, two Rx free-buffer queues, and up to 3
 2439  *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
 2440  *      queue, offload queue, and control queue.
 2441  */
 2442 int
 2443 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
 2444                   const struct qset_params *p, int ntxq, struct port_info *pi)
 2445 {
 2446         struct sge_qset *q = &sc->sge.qs[id];
 2447         int i, ret = 0;
 2448 
 2449         MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
 2450         q->port = pi;
 2451         q->adap = sc;
 2452 
 2453         if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
 2454             M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
 2455                 device_printf(sc->dev, "failed to allocate mbuf ring\n");
 2456                 goto err;
 2457         }
 2458         if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
 2459             M_NOWAIT | M_ZERO)) == NULL) {
 2460                 device_printf(sc->dev, "failed to allocate ifq\n");
 2461                 goto err;
 2462         }
 2463         ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);     
 2464         callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
 2465         callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
 2466         q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
 2467         q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
 2468 
 2469         init_qset_cntxt(q, id);
 2470         q->idx = id;
 2471         if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
 2472                     sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
 2473                     &q->fl[0].desc, &q->fl[0].sdesc,
 2474                     &q->fl[0].desc_tag, &q->fl[0].desc_map,
 2475                     sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
 2476                 printf("error %d from alloc ring fl0\n", ret);
 2477                 goto err;
 2478         }
 2479 
 2480         if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
 2481                     sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
 2482                     &q->fl[1].desc, &q->fl[1].sdesc,
 2483                     &q->fl[1].desc_tag, &q->fl[1].desc_map,
 2484                     sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
 2485                 printf("error %d from alloc ring fl1\n", ret);
 2486                 goto err;
 2487         }
 2488 
 2489         if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
 2490                     &q->rspq.phys_addr, &q->rspq.desc, NULL,
 2491                     &q->rspq.desc_tag, &q->rspq.desc_map,
 2492                     NULL, NULL)) != 0) {
 2493                 printf("error %d from alloc ring rspq\n", ret);
 2494                 goto err;
 2495         }
 2496 
 2497         snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
 2498             device_get_unit(sc->dev), irq_vec_idx);
 2499         MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
 2500 
 2501         for (i = 0; i < ntxq; ++i) {
 2502                 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
 2503 
 2504                 if ((ret = alloc_ring(sc, p->txq_size[i],
 2505                             sizeof(struct tx_desc), sz,
 2506                             &q->txq[i].phys_addr, &q->txq[i].desc,
 2507                             &q->txq[i].sdesc, &q->txq[i].desc_tag,
 2508                             &q->txq[i].desc_map,
 2509                             sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
 2510                         printf("error %d from alloc ring tx %i\n", ret, i);
 2511                         goto err;
 2512                 }
 2513                 mbufq_init(&q->txq[i].sendq, INT_MAX);
 2514                 q->txq[i].gen = 1;
 2515                 q->txq[i].size = p->txq_size[i];
 2516         }
 2517 
 2518 #ifdef TCP_OFFLOAD
 2519         TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
 2520 #endif
 2521         TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
 2522         TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2523         TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2524 
 2525         q->fl[0].gen = q->fl[1].gen = 1;
 2526         q->fl[0].size = p->fl_size;
 2527         q->fl[1].size = p->jumbo_size;
 2528 
 2529         q->rspq.gen = 1;
 2530         q->rspq.cidx = 0;
 2531         q->rspq.size = p->rspq_size;
 2532 
 2533         q->txq[TXQ_ETH].stop_thres = nports *
 2534             flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
 2535 
 2536         q->fl[0].buf_size = MCLBYTES;
 2537         q->fl[0].zone = zone_pack;
 2538         q->fl[0].type = EXT_PACKET;
 2539 
 2540         if (p->jumbo_buf_size ==  MJUM16BYTES) {
 2541                 q->fl[1].zone = zone_jumbo16;
 2542                 q->fl[1].type = EXT_JUMBO16;
 2543         } else if (p->jumbo_buf_size ==  MJUM9BYTES) {
 2544                 q->fl[1].zone = zone_jumbo9;
 2545                 q->fl[1].type = EXT_JUMBO9;             
 2546         } else if (p->jumbo_buf_size ==  MJUMPAGESIZE) {
 2547                 q->fl[1].zone = zone_jumbop;
 2548                 q->fl[1].type = EXT_JUMBOP;
 2549         } else {
 2550                 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
 2551                 ret = EDOOFUS;
 2552                 goto err;
 2553         }
 2554         q->fl[1].buf_size = p->jumbo_buf_size;
 2555 
 2556         /* Allocate and setup the lro_ctrl structure */
 2557         q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
 2558 #if defined(INET6) || defined(INET)
 2559         ret = tcp_lro_init(&q->lro.ctrl);
 2560         if (ret) {
 2561                 printf("error %d from tcp_lro_init\n", ret);
 2562                 goto err;
 2563         }
 2564 #endif
 2565         q->lro.ctrl.ifp = pi->ifp;
 2566 
 2567         mtx_lock_spin(&sc->sge.reg_lock);
 2568         ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
 2569                                    q->rspq.phys_addr, q->rspq.size,
 2570                                    q->fl[0].buf_size, 1, 0);
 2571         if (ret) {
 2572                 printf("error %d from t3_sge_init_rspcntxt\n", ret);
 2573                 goto err_unlock;
 2574         }
 2575 
 2576         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2577                 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
 2578                                           q->fl[i].phys_addr, q->fl[i].size,
 2579                                           q->fl[i].buf_size, p->cong_thres, 1,
 2580                                           0);
 2581                 if (ret) {
 2582                         printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
 2583                         goto err_unlock;
 2584                 }
 2585         }
 2586 
 2587         ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
 2588                                  SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
 2589                                  q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
 2590                                  1, 0);
 2591         if (ret) {
 2592                 printf("error %d from t3_sge_init_ecntxt\n", ret);
 2593                 goto err_unlock;
 2594         }
 2595 
 2596         if (ntxq > 1) {
 2597                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
 2598                                          USE_GTS, SGE_CNTXT_OFLD, id,
 2599                                          q->txq[TXQ_OFLD].phys_addr,
 2600                                          q->txq[TXQ_OFLD].size, 0, 1, 0);
 2601                 if (ret) {
 2602                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2603                         goto err_unlock;
 2604                 }
 2605         }
 2606 
 2607         if (ntxq > 2) {
 2608                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
 2609                                          SGE_CNTXT_CTRL, id,
 2610                                          q->txq[TXQ_CTRL].phys_addr,
 2611                                          q->txq[TXQ_CTRL].size,
 2612                                          q->txq[TXQ_CTRL].token, 1, 0);
 2613                 if (ret) {
 2614                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2615                         goto err_unlock;
 2616                 }
 2617         }
 2618 
 2619         mtx_unlock_spin(&sc->sge.reg_lock);
 2620         t3_update_qset_coalesce(q, p);
 2621 
 2622         refill_fl(sc, &q->fl[0], q->fl[0].size);
 2623         refill_fl(sc, &q->fl[1], q->fl[1].size);
 2624         refill_rspq(sc, &q->rspq, q->rspq.size - 1);
 2625 
 2626         t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
 2627                      V_NEWTIMER(q->rspq.holdoff_tmr));
 2628 
 2629         return (0);
 2630 
 2631 err_unlock:
 2632         mtx_unlock_spin(&sc->sge.reg_lock);
 2633 err:    
 2634         TXQ_LOCK(q);
 2635         t3_free_qset(sc, q);
 2636 
 2637         return (ret);
 2638 }
 2639 
 2640 /*
 2641  * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
 2642  * ethernet data.  Hardware assistance with various checksums and any vlan tag
 2643  * will also be taken into account here.
 2644  */
 2645 void
 2646 t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad)
 2647 {
 2648         struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
 2649         struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
 2650         struct ifnet *ifp = pi->ifp;
 2651         
 2652         if (cpl->vlan_valid) {
 2653                 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
 2654                 m->m_flags |= M_VLANTAG;
 2655         } 
 2656 
 2657         m->m_pkthdr.rcvif = ifp;
 2658         /*
 2659          * adjust after conversion to mbuf chain
 2660          */
 2661         m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
 2662         m->m_len -= (sizeof(*cpl) + ethpad);
 2663         m->m_data += (sizeof(*cpl) + ethpad);
 2664 
 2665         if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) {
 2666                 struct ether_header *eh = mtod(m, void *);
 2667                 uint16_t eh_type;
 2668 
 2669                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 2670                         struct ether_vlan_header *evh = mtod(m, void *);
 2671 
 2672                         eh_type = evh->evl_proto;
 2673                 } else
 2674                         eh_type = eh->ether_type;
 2675 
 2676                 if (ifp->if_capenable & IFCAP_RXCSUM &&
 2677                     eh_type == htons(ETHERTYPE_IP)) {
 2678                         m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
 2679                             CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 2680                         m->m_pkthdr.csum_data = 0xffff;
 2681                 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
 2682                     eh_type == htons(ETHERTYPE_IPV6)) {
 2683                         m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
 2684                             CSUM_PSEUDO_HDR);
 2685                         m->m_pkthdr.csum_data = 0xffff;
 2686                 }
 2687         }
 2688 }
 2689 
 2690 /**
 2691  *      get_packet - return the next ingress packet buffer from a free list
 2692  *      @adap: the adapter that received the packet
 2693  *      @drop_thres: # of remaining buffers before we start dropping packets
 2694  *      @qs: the qset that the SGE free list holding the packet belongs to
 2695  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
 2696  *      @r: response descriptor 
 2697  *
 2698  *      Get the next packet from a free list and complete setup of the
 2699  *      sk_buff.  If the packet is small we make a copy and recycle the
 2700  *      original buffer, otherwise we use the original buffer itself.  If a
 2701  *      positive drop threshold is supplied packets are dropped and their
 2702  *      buffers recycled if (a) the number of remaining buffers is under the
 2703  *      threshold and the packet is too big to copy, or (b) the packet should
 2704  *      be copied but there is no memory for the copy.
 2705  */
 2706 static int
 2707 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
 2708     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
 2709 {
 2710 
 2711         unsigned int len_cq =  ntohl(r->len_cq);
 2712         struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
 2713         int mask, cidx = fl->cidx;
 2714         struct rx_sw_desc *sd = &fl->sdesc[cidx];
 2715         uint32_t len = G_RSPD_LEN(len_cq);
 2716         uint32_t flags = M_EXT;
 2717         uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
 2718         caddr_t cl;
 2719         struct mbuf *m;
 2720         int ret = 0;
 2721 
 2722         mask = fl->size - 1;
 2723         prefetch(fl->sdesc[(cidx + 1) & mask].m);
 2724         prefetch(fl->sdesc[(cidx + 2) & mask].m);
 2725         prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
 2726         prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 
 2727 
 2728         fl->credits--;
 2729         bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
 2730         
 2731         if (recycle_enable && len <= SGE_RX_COPY_THRES &&
 2732             sopeop == RSPQ_SOP_EOP) {
 2733                 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
 2734                         goto skip_recycle;
 2735                 cl = mtod(m, void *);
 2736                 memcpy(cl, sd->rxsd_cl, len);
 2737                 recycle_rx_buf(adap, fl, fl->cidx);
 2738                 m->m_pkthdr.len = m->m_len = len;
 2739                 m->m_flags = 0;
 2740                 mh->mh_head = mh->mh_tail = m;
 2741                 ret = 1;
 2742                 goto done;
 2743         } else {
 2744         skip_recycle:
 2745                 bus_dmamap_unload(fl->entry_tag, sd->map);
 2746                 cl = sd->rxsd_cl;
 2747                 m = sd->m;
 2748 
 2749                 if ((sopeop == RSPQ_SOP_EOP) ||
 2750                     (sopeop == RSPQ_SOP))
 2751                         flags |= M_PKTHDR;
 2752                 m_init(m, M_NOWAIT, MT_DATA, flags);
 2753                 if (fl->zone == zone_pack) {
 2754                         /*
 2755                          * restore clobbered data pointer
 2756                          */
 2757                         m->m_data = m->m_ext.ext_buf;
 2758                 } else {
 2759                         m_cljset(m, cl, fl->type);
 2760                 }
 2761                 m->m_len = len;
 2762         }               
 2763         switch(sopeop) {
 2764         case RSPQ_SOP_EOP:
 2765                 ret = 1;
 2766                 /* FALLTHROUGH */
 2767         case RSPQ_SOP:
 2768                 mh->mh_head = mh->mh_tail = m;
 2769                 m->m_pkthdr.len = len;
 2770                 break;
 2771         case RSPQ_EOP:
 2772                 ret = 1;
 2773                 /* FALLTHROUGH */
 2774         case RSPQ_NSOP_NEOP:
 2775                 if (mh->mh_tail == NULL) {
 2776                         log(LOG_ERR, "discarding intermediate descriptor entry\n");
 2777                         m_freem(m);
 2778                         m = NULL;
 2779                         break;
 2780                 }
 2781                 mh->mh_tail->m_next = m;
 2782                 mh->mh_tail = m;
 2783                 mh->mh_head->m_pkthdr.len += len;
 2784                 break;
 2785         }
 2786         if (cxgb_debug && m != NULL)
 2787                 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
 2788 done:
 2789         if (++fl->cidx == fl->size)
 2790                 fl->cidx = 0;
 2791 
 2792         return (ret);
 2793 }
 2794 
 2795 /**
 2796  *      handle_rsp_cntrl_info - handles control information in a response
 2797  *      @qs: the queue set corresponding to the response
 2798  *      @flags: the response control flags
 2799  *
 2800  *      Handles the control information of an SGE response, such as GTS
 2801  *      indications and completion credits for the queue set's Tx queues.
 2802  *      HW coalesces credits, we don't do any extra SW coalescing.
 2803  */
 2804 static __inline void
 2805 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
 2806 {
 2807         unsigned int credits;
 2808 
 2809 #if USE_GTS
 2810         if (flags & F_RSPD_TXQ0_GTS)
 2811                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
 2812 #endif
 2813         credits = G_RSPD_TXQ0_CR(flags);
 2814         if (credits) 
 2815                 qs->txq[TXQ_ETH].processed += credits;
 2816 
 2817         credits = G_RSPD_TXQ2_CR(flags);
 2818         if (credits)
 2819                 qs->txq[TXQ_CTRL].processed += credits;
 2820 
 2821 # if USE_GTS
 2822         if (flags & F_RSPD_TXQ1_GTS)
 2823                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
 2824 # endif
 2825         credits = G_RSPD_TXQ1_CR(flags);
 2826         if (credits)
 2827                 qs->txq[TXQ_OFLD].processed += credits;
 2828 
 2829 }
 2830 
 2831 static void
 2832 check_ring_db(adapter_t *adap, struct sge_qset *qs,
 2833     unsigned int sleeping)
 2834 {
 2835         ;
 2836 }
 2837 
 2838 /**
 2839  *      process_responses - process responses from an SGE response queue
 2840  *      @adap: the adapter
 2841  *      @qs: the queue set to which the response queue belongs
 2842  *      @budget: how many responses can be processed in this round
 2843  *
 2844  *      Process responses from an SGE response queue up to the supplied budget.
 2845  *      Responses include received packets as well as credits and other events
 2846  *      for the queues that belong to the response queue's queue set.
 2847  *      A negative budget is effectively unlimited.
 2848  *
 2849  *      Additionally choose the interrupt holdoff time for the next interrupt
 2850  *      on this queue.  If the system is under memory shortage use a fairly
 2851  *      long delay to help recovery.
 2852  */
 2853 static int
 2854 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
 2855 {
 2856         struct sge_rspq *rspq = &qs->rspq;
 2857         struct rsp_desc *r = &rspq->desc[rspq->cidx];
 2858         int budget_left = budget;
 2859         unsigned int sleeping = 0;
 2860 #if defined(INET6) || defined(INET)
 2861         int lro_enabled = qs->lro.enabled;
 2862         int skip_lro;
 2863         struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
 2864 #endif
 2865         struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
 2866 #ifdef DEBUG    
 2867         static int last_holdoff = 0;
 2868         if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
 2869                 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
 2870                 last_holdoff = rspq->holdoff_tmr;
 2871         }
 2872 #endif
 2873         rspq->next_holdoff = rspq->holdoff_tmr;
 2874 
 2875         while (__predict_true(budget_left && is_new_response(r, rspq))) {
 2876                 int eth, eop = 0, ethpad = 0;
 2877                 uint32_t flags = ntohl(r->flags);
 2878                 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
 2879                 uint8_t opcode = r->rss_hdr.opcode;
 2880                 
 2881                 eth = (opcode == CPL_RX_PKT);
 2882                 
 2883                 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
 2884                         struct mbuf *m;
 2885 
 2886                         if (cxgb_debug)
 2887                                 printf("async notification\n");
 2888 
 2889                         if (mh->mh_head == NULL) {
 2890                                 mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA);
 2891                                 m = mh->mh_head;
 2892                         } else {
 2893                                 m = m_gethdr(M_NOWAIT, MT_DATA);
 2894                         }
 2895                         if (m == NULL)
 2896                                 goto no_mem;
 2897 
 2898                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
 2899                         m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
 2900                         *mtod(m, uint8_t *) = CPL_ASYNC_NOTIF;
 2901                         opcode = CPL_ASYNC_NOTIF;
 2902                         eop = 1;
 2903                         rspq->async_notif++;
 2904                         goto skip;
 2905                 } else if  (flags & F_RSPD_IMM_DATA_VALID) {
 2906                         struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
 2907 
 2908                         if (m == NULL) {        
 2909                 no_mem:
 2910                                 rspq->next_holdoff = NOMEM_INTR_DELAY;
 2911                                 budget_left--;
 2912                                 break;
 2913                         }
 2914                         if (mh->mh_head == NULL)
 2915                                 mh->mh_head = m;
 2916                         else 
 2917                                 mh->mh_tail->m_next = m;
 2918                         mh->mh_tail = m;
 2919 
 2920                         get_imm_packet(adap, r, m);
 2921                         mh->mh_head->m_pkthdr.len += m->m_len;
 2922                         eop = 1;
 2923                         rspq->imm_data++;
 2924                 } else if (r->len_cq) {
 2925                         int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
 2926                         
 2927                         eop = get_packet(adap, drop_thresh, qs, mh, r);
 2928                         if (eop) {
 2929                                 if (r->rss_hdr.hash_type && !adap->timestamp) {
 2930                                         M_HASHTYPE_SET(mh->mh_head,
 2931                                             M_HASHTYPE_OPAQUE_HASH);
 2932                                         mh->mh_head->m_pkthdr.flowid = rss_hash;
 2933                                 }
 2934                         }
 2935                         
 2936                         ethpad = 2;
 2937                 } else {
 2938                         rspq->pure_rsps++;
 2939                 }
 2940         skip:
 2941                 if (flags & RSPD_CTRL_MASK) {
 2942                         sleeping |= flags & RSPD_GTS_MASK;
 2943                         handle_rsp_cntrl_info(qs, flags);
 2944                 }
 2945 
 2946                 if (!eth && eop) {
 2947                         rspq->offload_pkts++;
 2948 #ifdef TCP_OFFLOAD
 2949                         adap->cpl_handler[opcode](qs, r, mh->mh_head);
 2950 #else
 2951                         m_freem(mh->mh_head);
 2952 #endif
 2953                         mh->mh_head = NULL;
 2954                 } else if (eth && eop) {
 2955                         struct mbuf *m = mh->mh_head;
 2956 
 2957                         t3_rx_eth(adap, m, ethpad);
 2958 
 2959                         /*
 2960                          * The T304 sends incoming packets on any qset.  If LRO
 2961                          * is also enabled, we could end up sending packet up
 2962                          * lro_ctrl->ifp's input.  That is incorrect.
 2963                          *
 2964                          * The mbuf's rcvif was derived from the cpl header and
 2965                          * is accurate.  Skip LRO and just use that.
 2966                          */
 2967 #if defined(INET6) || defined(INET)
 2968                         skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
 2969 
 2970                         if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
 2971                             && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
 2972                             ) {
 2973                                 /* successfully queue'd for LRO */
 2974                         } else
 2975 #endif
 2976                         {
 2977                                 /*
 2978                                  * LRO not enabled, packet unsuitable for LRO,
 2979                                  * or unable to queue.  Pass it up right now in
 2980                                  * either case.
 2981                                  */
 2982                                 struct ifnet *ifp = m->m_pkthdr.rcvif;
 2983                                 (*ifp->if_input)(ifp, m);
 2984                         }
 2985                         mh->mh_head = NULL;
 2986 
 2987                 }
 2988 
 2989                 r++;
 2990                 if (__predict_false(++rspq->cidx == rspq->size)) {
 2991                         rspq->cidx = 0;
 2992                         rspq->gen ^= 1;
 2993                         r = rspq->desc;
 2994                 }
 2995 
 2996                 if (++rspq->credits >= 64) {
 2997                         refill_rspq(adap, rspq, rspq->credits);
 2998                         rspq->credits = 0;
 2999                 }
 3000                 __refill_fl_lt(adap, &qs->fl[0], 32);
 3001                 __refill_fl_lt(adap, &qs->fl[1], 32);
 3002                 --budget_left;
 3003         }
 3004 
 3005 #if defined(INET6) || defined(INET)
 3006         /* Flush LRO */
 3007         tcp_lro_flush_all(lro_ctrl);
 3008 #endif
 3009 
 3010         if (sleeping)
 3011                 check_ring_db(adap, qs, sleeping);
 3012 
 3013         mb();  /* commit Tx queue processed updates */
 3014         if (__predict_false(qs->txq_stopped > 1))
 3015                 restart_tx(qs);
 3016 
 3017         __refill_fl_lt(adap, &qs->fl[0], 512);
 3018         __refill_fl_lt(adap, &qs->fl[1], 512);
 3019         budget -= budget_left;
 3020         return (budget);
 3021 }
 3022 
 3023 /*
 3024  * A helper function that processes responses and issues GTS.
 3025  */
 3026 static __inline int
 3027 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
 3028 {
 3029         int work;
 3030         static int last_holdoff = 0;
 3031         
 3032         work = process_responses(adap, rspq_to_qset(rq), -1);
 3033 
 3034         if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
 3035                 printf("next_holdoff=%d\n", rq->next_holdoff);
 3036                 last_holdoff = rq->next_holdoff;
 3037         }
 3038         t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
 3039             V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
 3040         
 3041         return (work);
 3042 }
 3043 
 3044 #ifdef NETDUMP
 3045 int
 3046 cxgb_netdump_poll_rx(adapter_t *adap, struct sge_qset *qs)
 3047 {
 3048 
 3049         return (process_responses_gts(adap, &qs->rspq));
 3050 }
 3051 #endif
 3052 
 3053 /*
 3054  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
 3055  * Handles data events from SGE response queues as well as error and other
 3056  * async events as they all use the same interrupt pin.  We use one SGE
 3057  * response queue per port in this mode and protect all response queues with
 3058  * queue 0's lock.
 3059  */
 3060 void
 3061 t3b_intr(void *data)
 3062 {
 3063         uint32_t i, map;
 3064         adapter_t *adap = data;
 3065         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3066         
 3067         t3_write_reg(adap, A_PL_CLI, 0);
 3068         map = t3_read_reg(adap, A_SG_DATA_INTR);
 3069 
 3070         if (!map) 
 3071                 return;
 3072 
 3073         if (__predict_false(map & F_ERRINTR)) {
 3074                 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
 3075                 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
 3076                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3077         }
 3078 
 3079         mtx_lock(&q0->lock);
 3080         for_each_port(adap, i)
 3081             if (map & (1 << i))
 3082                         process_responses_gts(adap, &adap->sge.qs[i].rspq);
 3083         mtx_unlock(&q0->lock);
 3084 }
 3085 
 3086 /*
 3087  * The MSI interrupt handler.  This needs to handle data events from SGE
 3088  * response queues as well as error and other async events as they all use
 3089  * the same MSI vector.  We use one SGE response queue per port in this mode
 3090  * and protect all response queues with queue 0's lock.
 3091  */
 3092 void
 3093 t3_intr_msi(void *data)
 3094 {
 3095         adapter_t *adap = data;
 3096         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3097         int i, new_packets = 0;
 3098 
 3099         mtx_lock(&q0->lock);
 3100 
 3101         for_each_port(adap, i)
 3102             if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 
 3103                     new_packets = 1;
 3104         mtx_unlock(&q0->lock);
 3105         if (new_packets == 0) {
 3106                 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
 3107                 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
 3108                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3109         }
 3110 }
 3111 
 3112 void
 3113 t3_intr_msix(void *data)
 3114 {
 3115         struct sge_qset *qs = data;
 3116         adapter_t *adap = qs->port->adapter;
 3117         struct sge_rspq *rspq = &qs->rspq;
 3118 
 3119         if (process_responses_gts(adap, rspq) == 0)
 3120                 rspq->unhandled_irqs++;
 3121 }
 3122 
 3123 #define QDUMP_SBUF_SIZE         32 * 400
 3124 static int
 3125 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
 3126 {
 3127         struct sge_rspq *rspq;
 3128         struct sge_qset *qs;
 3129         int i, err, dump_end, idx;
 3130         struct sbuf *sb;
 3131         struct rsp_desc *rspd;
 3132         uint32_t data[4];
 3133         
 3134         rspq = arg1;
 3135         qs = rspq_to_qset(rspq);
 3136         if (rspq->rspq_dump_count == 0) 
 3137                 return (0);
 3138         if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
 3139                 log(LOG_WARNING,
 3140                     "dump count is too large %d\n", rspq->rspq_dump_count);
 3141                 rspq->rspq_dump_count = 0;
 3142                 return (EINVAL);
 3143         }
 3144         if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
 3145                 log(LOG_WARNING,
 3146                     "dump start of %d is greater than queue size\n",
 3147                     rspq->rspq_dump_start);
 3148                 rspq->rspq_dump_start = 0;
 3149                 return (EINVAL);
 3150         }
 3151         err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
 3152         if (err)
 3153                 return (err);
 3154         err = sysctl_wire_old_buffer(req, 0);
 3155         if (err)
 3156                 return (err);
 3157         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3158 
 3159         sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
 3160             (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
 3161             ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
 3162         sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
 3163             ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
 3164         
 3165         sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
 3166             (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
 3167         
 3168         dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
 3169         for (i = rspq->rspq_dump_start; i < dump_end; i++) {
 3170                 idx = i & (RSPQ_Q_SIZE-1);
 3171                 
 3172                 rspd = &rspq->desc[idx];
 3173                 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
 3174                     idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
 3175                     rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
 3176                 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
 3177                     rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
 3178                     be32toh(rspd->len_cq), rspd->intr_gen);
 3179         }
 3180 
 3181         err = sbuf_finish(sb);
 3182         sbuf_delete(sb);
 3183         return (err);
 3184 }       
 3185 
 3186 static int
 3187 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
 3188 {
 3189         struct sge_txq *txq;
 3190         struct sge_qset *qs;
 3191         int i, j, err, dump_end;
 3192         struct sbuf *sb;
 3193         struct tx_desc *txd;
 3194         uint32_t *WR, wr_hi, wr_lo, gen;
 3195         uint32_t data[4];
 3196         
 3197         txq = arg1;
 3198         qs = txq_to_qset(txq, TXQ_ETH);
 3199         if (txq->txq_dump_count == 0) {
 3200                 return (0);
 3201         }
 3202         if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
 3203                 log(LOG_WARNING,
 3204                     "dump count is too large %d\n", txq->txq_dump_count);
 3205                 txq->txq_dump_count = 1;
 3206                 return (EINVAL);
 3207         }
 3208         if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
 3209                 log(LOG_WARNING,
 3210                     "dump start of %d is greater than queue size\n",
 3211                     txq->txq_dump_start);
 3212                 txq->txq_dump_start = 0;
 3213                 return (EINVAL);
 3214         }
 3215         err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
 3216         if (err)
 3217                 return (err);
 3218         err = sysctl_wire_old_buffer(req, 0);
 3219         if (err)
 3220                 return (err);
 3221         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3222 
 3223         sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
 3224             (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 
 3225             (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
 3226         sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
 3227             ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
 3228             ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
 3229         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3230             txq->txq_dump_start,
 3231             (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
 3232 
 3233         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3234         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3235                 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
 3236                 WR = (uint32_t *)txd->flit;
 3237                 wr_hi = ntohl(WR[0]);
 3238                 wr_lo = ntohl(WR[1]);           
 3239                 gen = G_WR_GEN(wr_lo);
 3240                 
 3241                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3242                     wr_hi, wr_lo, gen);
 3243                 for (j = 2; j < 30; j += 4) 
 3244                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3245                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3246 
 3247         }
 3248         err = sbuf_finish(sb);
 3249         sbuf_delete(sb);
 3250         return (err);
 3251 }
 3252 
 3253 static int
 3254 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
 3255 {
 3256         struct sge_txq *txq;
 3257         struct sge_qset *qs;
 3258         int i, j, err, dump_end;
 3259         struct sbuf *sb;
 3260         struct tx_desc *txd;
 3261         uint32_t *WR, wr_hi, wr_lo, gen;
 3262         
 3263         txq = arg1;
 3264         qs = txq_to_qset(txq, TXQ_CTRL);
 3265         if (txq->txq_dump_count == 0) {
 3266                 return (0);
 3267         }
 3268         if (txq->txq_dump_count > 256) {
 3269                 log(LOG_WARNING,
 3270                     "dump count is too large %d\n", txq->txq_dump_count);
 3271                 txq->txq_dump_count = 1;
 3272                 return (EINVAL);
 3273         }
 3274         if (txq->txq_dump_start > 255) {
 3275                 log(LOG_WARNING,
 3276                     "dump start of %d is greater than queue size\n",
 3277                     txq->txq_dump_start);
 3278                 txq->txq_dump_start = 0;
 3279                 return (EINVAL);
 3280         }
 3281 
 3282         err = sysctl_wire_old_buffer(req, 0);
 3283         if (err != 0)
 3284                 return (err);
 3285         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3286         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3287             txq->txq_dump_start,
 3288             (txq->txq_dump_start + txq->txq_dump_count) & 255);
 3289 
 3290         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3291         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3292                 txd = &txq->desc[i & (255)];
 3293                 WR = (uint32_t *)txd->flit;
 3294                 wr_hi = ntohl(WR[0]);
 3295                 wr_lo = ntohl(WR[1]);           
 3296                 gen = G_WR_GEN(wr_lo);
 3297                 
 3298                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3299                     wr_hi, wr_lo, gen);
 3300                 for (j = 2; j < 30; j += 4) 
 3301                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3302                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3303 
 3304         }
 3305         err = sbuf_finish(sb);
 3306         sbuf_delete(sb);
 3307         return (err);
 3308 }
 3309 
 3310 static int
 3311 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
 3312 {
 3313         adapter_t *sc = arg1;
 3314         struct qset_params *qsp = &sc->params.sge.qset[0]; 
 3315         int coalesce_usecs;     
 3316         struct sge_qset *qs;
 3317         int i, j, err, nqsets = 0;
 3318         struct mtx *lock;
 3319 
 3320         if ((sc->flags & FULL_INIT_DONE) == 0)
 3321                 return (ENXIO);
 3322                 
 3323         coalesce_usecs = qsp->coalesce_usecs;
 3324         err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
 3325 
 3326         if (err != 0) {
 3327                 return (err);
 3328         }
 3329         if (coalesce_usecs == qsp->coalesce_usecs)
 3330                 return (0);
 3331 
 3332         for (i = 0; i < sc->params.nports; i++) 
 3333                 for (j = 0; j < sc->port[i].nqsets; j++)
 3334                         nqsets++;
 3335 
 3336         coalesce_usecs = max(1, coalesce_usecs);
 3337 
 3338         for (i = 0; i < nqsets; i++) {
 3339                 qs = &sc->sge.qs[i];
 3340                 qsp = &sc->params.sge.qset[i];
 3341                 qsp->coalesce_usecs = coalesce_usecs;
 3342                 
 3343                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 3344                             &sc->sge.qs[0].rspq.lock;
 3345 
 3346                 mtx_lock(lock);
 3347                 t3_update_qset_coalesce(qs, qsp);
 3348                 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
 3349                     V_NEWTIMER(qs->rspq.holdoff_tmr));
 3350                 mtx_unlock(lock);
 3351         }
 3352 
 3353         return (0);
 3354 }
 3355 
 3356 static int
 3357 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)
 3358 {
 3359         adapter_t *sc = arg1;
 3360         int rc, timestamp;
 3361 
 3362         if ((sc->flags & FULL_INIT_DONE) == 0)
 3363                 return (ENXIO);
 3364 
 3365         timestamp = sc->timestamp;
 3366         rc = sysctl_handle_int(oidp, &timestamp, arg2, req);
 3367 
 3368         if (rc != 0)
 3369                 return (rc);
 3370 
 3371         if (timestamp != sc->timestamp) {
 3372                 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS,
 3373                     timestamp ? F_ENABLERXPKTTMSTPRSS : 0);
 3374                 sc->timestamp = timestamp;
 3375         }
 3376 
 3377         return (0);
 3378 }
 3379 
 3380 void
 3381 t3_add_attach_sysctls(adapter_t *sc)
 3382 {
 3383         struct sysctl_ctx_list *ctx;
 3384         struct sysctl_oid_list *children;
 3385 
 3386         ctx = device_get_sysctl_ctx(sc->dev);
 3387         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3388 
 3389         /* random information */
 3390         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3391             "firmware_version",
 3392             CTLFLAG_RD, sc->fw_version,
 3393             0, "firmware version");
 3394         SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
 3395             "hw_revision",
 3396             CTLFLAG_RD, &sc->params.rev,
 3397             0, "chip model");
 3398         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3399             "port_types",
 3400             CTLFLAG_RD, sc->port_types,
 3401             0, "type of ports");
 3402         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3403             "enable_debug",
 3404             CTLFLAG_RW, &cxgb_debug,
 3405             0, "enable verbose debugging output");
 3406         SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce",
 3407             CTLFLAG_RD, &sc->tunq_coalesce,
 3408             "#tunneled packets freed");
 3409         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3410             "txq_overrun",
 3411             CTLFLAG_RD, &txq_fills,
 3412             0, "#times txq overrun");
 3413         SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
 3414             "core_clock",
 3415             CTLFLAG_RD, &sc->params.vpd.cclk,
 3416             0, "core clock frequency (in KHz)");
 3417 }
 3418 
 3419 
 3420 static const char *rspq_name = "rspq";
 3421 static const char *txq_names[] =
 3422 {
 3423         "txq_eth",
 3424         "txq_ofld",
 3425         "txq_ctrl"      
 3426 };
 3427 
 3428 static int
 3429 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
 3430 {
 3431         struct port_info *p = arg1;
 3432         uint64_t *parg;
 3433 
 3434         if (!p)
 3435                 return (EINVAL);
 3436 
 3437         cxgb_refresh_stats(p);
 3438         parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
 3439 
 3440         return (sysctl_handle_64(oidp, parg, 0, req));
 3441 }
 3442 
 3443 void
 3444 t3_add_configured_sysctls(adapter_t *sc)
 3445 {
 3446         struct sysctl_ctx_list *ctx;
 3447         struct sysctl_oid_list *children;
 3448         int i, j;
 3449         
 3450         ctx = device_get_sysctl_ctx(sc->dev);
 3451         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3452 
 3453         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3454             "intr_coal",
 3455             CTLTYPE_INT|CTLFLAG_RW, sc,
 3456             0, t3_set_coalesce_usecs,
 3457             "I", "interrupt coalescing timer (us)");
 3458 
 3459         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3460             "pkt_timestamp",
 3461             CTLTYPE_INT | CTLFLAG_RW, sc,
 3462             0, t3_pkt_timestamp,
 3463             "I", "provide packet timestamp instead of connection hash");
 3464 
 3465         for (i = 0; i < sc->params.nports; i++) {
 3466                 struct port_info *pi = &sc->port[i];
 3467                 struct sysctl_oid *poid;
 3468                 struct sysctl_oid_list *poidlist;
 3469                 struct mac_stats *mstats = &pi->mac.stats;
 3470                 
 3471                 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
 3472                 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 
 3473                     pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
 3474                 poidlist = SYSCTL_CHILDREN(poid);
 3475                 SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO,
 3476                     "nqsets", CTLFLAG_RD, &pi->nqsets,
 3477                     0, "#queue sets");
 3478 
 3479                 for (j = 0; j < pi->nqsets; j++) {
 3480                         struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
 3481                         struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
 3482                                           *ctrlqpoid, *lropoid;
 3483                         struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
 3484                                                *txqpoidlist, *ctrlqpoidlist,
 3485                                                *lropoidlist;
 3486                         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 3487                         
 3488                         snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
 3489                         
 3490                         qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 
 3491                             qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
 3492                         qspoidlist = SYSCTL_CHILDREN(qspoid);
 3493 
 3494                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
 3495                                         CTLFLAG_RD, &qs->fl[0].empty, 0,
 3496                                         "freelist #0 empty");
 3497                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
 3498                                         CTLFLAG_RD, &qs->fl[1].empty, 0,
 3499                                         "freelist #1 empty");
 3500 
 3501                         rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3502                             rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
 3503                         rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
 3504 
 3505                         txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3506                             txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
 3507                         txqpoidlist = SYSCTL_CHILDREN(txqpoid);
 3508 
 3509                         ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3510                             txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
 3511                         ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
 3512 
 3513                         lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3514                             "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
 3515                         lropoidlist = SYSCTL_CHILDREN(lropoid);
 3516 
 3517                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
 3518                             CTLFLAG_RD, &qs->rspq.size,
 3519                             0, "#entries in response queue");
 3520                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
 3521                             CTLFLAG_RD, &qs->rspq.cidx,
 3522                             0, "consumer index");
 3523                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
 3524                             CTLFLAG_RD, &qs->rspq.credits,
 3525                             0, "#credits");
 3526                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
 3527                             CTLFLAG_RD, &qs->rspq.starved,
 3528                             0, "#times starved");
 3529                         SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
 3530                             CTLFLAG_RD, &qs->rspq.phys_addr,
 3531                             "physical_address_of the queue");
 3532                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
 3533                             CTLFLAG_RW, &qs->rspq.rspq_dump_start,
 3534                             0, "start rspq dump entry");
 3535                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
 3536                             CTLFLAG_RW, &qs->rspq.rspq_dump_count,
 3537                             0, "#rspq entries to dump");
 3538                         SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
 3539                             CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
 3540                             0, t3_dump_rspq, "A", "dump of the response queue");
 3541 
 3542                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
 3543                             CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
 3544                             "#tunneled packets dropped");
 3545                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
 3546                             CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.mq_len,
 3547                             0, "#tunneled packets waiting to be sent");
 3548 #if 0                   
 3549                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
 3550                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
 3551                             0, "#tunneled packets queue producer index");
 3552                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
 3553                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
 3554                             0, "#tunneled packets queue consumer index");
 3555 #endif                  
 3556                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed",
 3557                             CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
 3558                             0, "#tunneled packets processed by the card");
 3559                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
 3560                             CTLFLAG_RD, &txq->cleaned,
 3561                             0, "#tunneled packets cleaned");
 3562                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
 3563                             CTLFLAG_RD, &txq->in_use,
 3564                             0, "#tunneled packet slots in use");
 3565                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees",
 3566                             CTLFLAG_RD, &txq->txq_frees,
 3567                             "#tunneled packets freed");
 3568                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
 3569                             CTLFLAG_RD, &txq->txq_skipped,
 3570                             0, "#tunneled packet descriptors skipped");
 3571                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
 3572                             CTLFLAG_RD, &txq->txq_coalesced,
 3573                             "#tunneled packets coalesced");
 3574                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
 3575                             CTLFLAG_RD, &txq->txq_enqueued,
 3576                             0, "#tunneled packets enqueued to hardware");
 3577                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
 3578                             CTLFLAG_RD, &qs->txq_stopped,
 3579                             0, "tx queues stopped");
 3580                         SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr",
 3581                             CTLFLAG_RD, &txq->phys_addr,
 3582                             "physical_address_of the queue");
 3583                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
 3584                             CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
 3585                             0, "txq generation");
 3586                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
 3587                             CTLFLAG_RD, &txq->cidx,
 3588                             0, "hardware queue cidx");                  
 3589                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
 3590                             CTLFLAG_RD, &txq->pidx,
 3591                             0, "hardware queue pidx");
 3592                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
 3593                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
 3594                             0, "txq start idx for dump");
 3595                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
 3596                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
 3597                             0, "txq #entries to dump");                 
 3598                         SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
 3599                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
 3600                             0, t3_dump_txq_eth, "A", "dump of the transmit queue");
 3601 
 3602                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
 3603                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
 3604                             0, "ctrlq start idx for dump");
 3605                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
 3606                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
 3607                             0, "ctrl #entries to dump");                        
 3608                         SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
 3609                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
 3610                             0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
 3611 
 3612                         SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_queued",
 3613                             CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
 3614                         SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_flushed",
 3615                             CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
 3616                         SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
 3617                             CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
 3618                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
 3619                             CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
 3620                 }
 3621 
 3622                 /* Now add a node for mac stats. */
 3623                 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
 3624                     CTLFLAG_RD, NULL, "MAC statistics");
 3625                 poidlist = SYSCTL_CHILDREN(poid);
 3626 
 3627                 /*
 3628                  * We (ab)use the length argument (arg2) to pass on the offset
 3629                  * of the data that we are interested in.  This is only required
 3630                  * for the quad counters that are updated from the hardware (we
 3631                  * make sure that we return the latest value).
 3632                  * sysctl_handle_macstat first updates *all* the counters from
 3633                  * the hardware, and then returns the latest value of the
 3634                  * requested counter.  Best would be to update only the
 3635                  * requested counter from hardware, but t3_mac_update_stats()
 3636                  * hides all the register details and we don't want to dive into
 3637                  * all that here.
 3638                  */
 3639 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
 3640     (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
 3641     sysctl_handle_macstat, "QU", 0)
 3642                 CXGB_SYSCTL_ADD_QUAD(tx_octets);
 3643                 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
 3644                 CXGB_SYSCTL_ADD_QUAD(tx_frames);
 3645                 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
 3646                 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
 3647                 CXGB_SYSCTL_ADD_QUAD(tx_pause);
 3648                 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
 3649                 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
 3650                 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
 3651                 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
 3652                 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
 3653                 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
 3654                 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
 3655                 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
 3656                 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
 3657                 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
 3658                 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
 3659                 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
 3660                 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
 3661                 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
 3662                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
 3663                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
 3664                 CXGB_SYSCTL_ADD_QUAD(rx_octets);
 3665                 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
 3666                 CXGB_SYSCTL_ADD_QUAD(rx_frames);
 3667                 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
 3668                 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
 3669                 CXGB_SYSCTL_ADD_QUAD(rx_pause);
 3670                 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
 3671                 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
 3672                 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
 3673                 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
 3674                 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
 3675                 CXGB_SYSCTL_ADD_QUAD(rx_runt);
 3676                 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
 3677                 CXGB_SYSCTL_ADD_QUAD(rx_short);
 3678                 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
 3679                 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
 3680                 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
 3681                 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
 3682                 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
 3683                 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
 3684                 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
 3685                 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
 3686                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
 3687                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
 3688 #undef CXGB_SYSCTL_ADD_QUAD
 3689 
 3690 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
 3691     CTLFLAG_RD, &mstats->a, 0)
 3692                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
 3693                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
 3694                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
 3695                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
 3696                 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
 3697                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
 3698                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
 3699                 CXGB_SYSCTL_ADD_ULONG(num_toggled);
 3700                 CXGB_SYSCTL_ADD_ULONG(num_resets);
 3701                 CXGB_SYSCTL_ADD_ULONG(link_faults);
 3702 #undef CXGB_SYSCTL_ADD_ULONG
 3703         }
 3704 }
 3705         
 3706 /**
 3707  *      t3_get_desc - dump an SGE descriptor for debugging purposes
 3708  *      @qs: the queue set
 3709  *      @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
 3710  *      @idx: the descriptor index in the queue
 3711  *      @data: where to dump the descriptor contents
 3712  *
 3713  *      Dumps the contents of a HW descriptor of an SGE queue.  Returns the
 3714  *      size of the descriptor.
 3715  */
 3716 int
 3717 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
 3718                 unsigned char *data)
 3719 {
 3720         if (qnum >= 6)
 3721                 return (EINVAL);
 3722 
 3723         if (qnum < 3) {
 3724                 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
 3725                         return -EINVAL;
 3726                 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
 3727                 return sizeof(struct tx_desc);
 3728         }
 3729 
 3730         if (qnum == 3) {
 3731                 if (!qs->rspq.desc || idx >= qs->rspq.size)
 3732                         return (EINVAL);
 3733                 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
 3734                 return sizeof(struct rsp_desc);
 3735         }
 3736 
 3737         qnum -= 4;
 3738         if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
 3739                 return (EINVAL);
 3740         memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
 3741         return sizeof(struct rx_desc);
 3742 }

Cache object: 800e716d6fc0bddd459e0e371136e0e0


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.