The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/cxgb/cxgb_sge.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /**************************************************************************
    2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3 
    4 Copyright (c) 2007-2009, Chelsio Inc.
    5 All rights reserved.
    6 
    7 Redistribution and use in source and binary forms, with or without
    8 modification, are permitted provided that the following conditions are met:
    9 
   10  1. Redistributions of source code must retain the above copyright notice,
   11     this list of conditions and the following disclaimer.
   12 
   13  2. Neither the name of the Chelsio Corporation nor the names of its
   14     contributors may be used to endorse or promote products derived from
   15     this software without specific prior written permission.
   16  
   17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   27 POSSIBILITY OF SUCH DAMAGE.
   28 
   29 ***************************************************************************/
   30 
   31 #include <sys/cdefs.h>
   32 __FBSDID("$FreeBSD$");
   33 
   34 #include "opt_inet6.h"
   35 #include "opt_inet.h"
   36 
   37 #include <sys/param.h>
   38 #include <sys/systm.h>
   39 #include <sys/kernel.h>
   40 #include <sys/module.h>
   41 #include <sys/bus.h>
   42 #include <sys/conf.h>
   43 #include <machine/bus.h>
   44 #include <machine/resource.h>
   45 #include <sys/rman.h>
   46 #include <sys/queue.h>
   47 #include <sys/sysctl.h>
   48 #include <sys/taskqueue.h>
   49 
   50 #include <sys/proc.h>
   51 #include <sys/sbuf.h>
   52 #include <sys/sched.h>
   53 #include <sys/smp.h>
   54 #include <sys/systm.h>
   55 #include <sys/syslog.h>
   56 #include <sys/socket.h>
   57 #include <sys/sglist.h>
   58 
   59 #include <net/if.h>
   60 #include <net/if_var.h>
   61 #include <net/bpf.h>    
   62 #include <net/ethernet.h>
   63 #include <net/if_vlan_var.h>
   64 
   65 #include <netinet/in_systm.h>
   66 #include <netinet/in.h>
   67 #include <netinet/ip.h>
   68 #include <netinet/ip6.h>
   69 #include <netinet/tcp.h>
   70 
   71 #include <dev/pci/pcireg.h>
   72 #include <dev/pci/pcivar.h>
   73 
   74 #include <vm/vm.h>
   75 #include <vm/pmap.h>
   76 
   77 #include <cxgb_include.h>
   78 #include <sys/mvec.h>
   79 
   80 int     txq_fills = 0;
   81 int     multiq_tx_enable = 1;
   82 
   83 #ifdef TCP_OFFLOAD
   84 CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS);
   85 #endif
   86 
   87 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
   88 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
   89 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
   90     "size of per-queue mbuf ring");
   91 
   92 static int cxgb_tx_coalesce_force = 0;
   93 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RWTUN,
   94     &cxgb_tx_coalesce_force, 0,
   95     "coalesce small packets into a single work request regardless of ring state");
   96 
   97 #define COALESCE_START_DEFAULT          TX_ETH_Q_SIZE>>1
   98 #define COALESCE_START_MAX              (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
   99 #define COALESCE_STOP_DEFAULT           TX_ETH_Q_SIZE>>2
  100 #define COALESCE_STOP_MIN               TX_ETH_Q_SIZE>>5
  101 #define TX_RECLAIM_DEFAULT              TX_ETH_Q_SIZE>>5
  102 #define TX_RECLAIM_MAX                  TX_ETH_Q_SIZE>>2
  103 #define TX_RECLAIM_MIN                  TX_ETH_Q_SIZE>>6
  104 
  105 
  106 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
  107 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RWTUN,
  108     &cxgb_tx_coalesce_enable_start, 0,
  109     "coalesce enable threshold");
  110 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
  111 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RWTUN,
  112     &cxgb_tx_coalesce_enable_stop, 0,
  113     "coalesce disable threshold");
  114 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  115 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RWTUN,
  116     &cxgb_tx_reclaim_threshold, 0,
  117     "tx cleaning minimum threshold");
  118 
  119 /*
  120  * XXX don't re-enable this until TOE stops assuming
  121  * we have an m_ext
  122  */
  123 static int recycle_enable = 0;
  124 
  125 extern int cxgb_use_16k_clusters;
  126 extern int nmbjumbop;
  127 extern int nmbjumbo9;
  128 extern int nmbjumbo16;
  129 
  130 #define USE_GTS 0
  131 
  132 #define SGE_RX_SM_BUF_SIZE      1536
  133 #define SGE_RX_DROP_THRES       16
  134 #define SGE_RX_COPY_THRES       128
  135 
  136 /*
  137  * Period of the Tx buffer reclaim timer.  This timer does not need to run
  138  * frequently as Tx buffers are usually reclaimed by new Tx packets.
  139  */
  140 #define TX_RECLAIM_PERIOD       (hz >> 1)
  141 
  142 /* 
  143  * Values for sge_txq.flags
  144  */
  145 enum {
  146         TXQ_RUNNING     = 1 << 0,  /* fetch engine is running */
  147         TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
  148 };
  149 
  150 struct tx_desc {
  151         uint64_t        flit[TX_DESC_FLITS];
  152 } __packed;
  153 
  154 struct rx_desc {
  155         uint32_t        addr_lo;
  156         uint32_t        len_gen;
  157         uint32_t        gen2;
  158         uint32_t        addr_hi;
  159 } __packed;
  160 
  161 struct rsp_desc {               /* response queue descriptor */
  162         struct rss_header       rss_hdr;
  163         uint32_t                flags;
  164         uint32_t                len_cq;
  165         uint8_t                 imm_data[47];
  166         uint8_t                 intr_gen;
  167 } __packed;
  168 
  169 #define RX_SW_DESC_MAP_CREATED  (1 << 0)
  170 #define TX_SW_DESC_MAP_CREATED  (1 << 1)
  171 #define RX_SW_DESC_INUSE        (1 << 3)
  172 #define TX_SW_DESC_MAPPED       (1 << 4)
  173 
  174 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
  175 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
  176 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
  177 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
  178 
  179 struct tx_sw_desc {                /* SW state per Tx descriptor */
  180         struct mbuf     *m;
  181         bus_dmamap_t    map;
  182         int             flags;
  183 };
  184 
  185 struct rx_sw_desc {                /* SW state per Rx descriptor */
  186         caddr_t         rxsd_cl;
  187         struct mbuf     *m;
  188         bus_dmamap_t    map;
  189         int             flags;
  190 };
  191 
  192 struct txq_state {
  193         unsigned int    compl;
  194         unsigned int    gen;
  195         unsigned int    pidx;
  196 };
  197 
  198 struct refill_fl_cb_arg {
  199         int               error;
  200         bus_dma_segment_t seg;
  201         int               nseg;
  202 };
  203 
  204 
  205 /*
  206  * Maps a number of flits to the number of Tx descriptors that can hold them.
  207  * The formula is
  208  *
  209  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
  210  *
  211  * HW allows up to 4 descriptors to be combined into a WR.
  212  */
  213 static uint8_t flit_desc_map[] = {
  214         0,
  215 #if SGE_NUM_GENBITS == 1
  216         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  217         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  218         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  219         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
  220 #elif SGE_NUM_GENBITS == 2
  221         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  222         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  223         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  224         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  225 #else
  226 # error "SGE_NUM_GENBITS must be 1 or 2"
  227 #endif
  228 };
  229 
  230 #define TXQ_LOCK_ASSERT(qs)     mtx_assert(&(qs)->lock, MA_OWNED)
  231 #define TXQ_TRYLOCK(qs)         mtx_trylock(&(qs)->lock)        
  232 #define TXQ_LOCK(qs)            mtx_lock(&(qs)->lock)   
  233 #define TXQ_UNLOCK(qs)          mtx_unlock(&(qs)->lock) 
  234 #define TXQ_RING_EMPTY(qs)      drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  235 #define TXQ_RING_NEEDS_ENQUEUE(qs)                                      \
  236         drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  237 #define TXQ_RING_FLUSH(qs)      drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  238 #define TXQ_RING_DEQUEUE_COND(qs, func, arg)                            \
  239         drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
  240 #define TXQ_RING_DEQUEUE(qs) \
  241         drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  242 
  243 int cxgb_debug = 0;
  244 
  245 static void sge_timer_cb(void *arg);
  246 static void sge_timer_reclaim(void *arg, int ncount);
  247 static void sge_txq_reclaim_handler(void *arg, int ncount);
  248 static void cxgb_start_locked(struct sge_qset *qs);
  249 
  250 /*
  251  * XXX need to cope with bursty scheduling by looking at a wider
  252  * window than we are now for determining the need for coalescing
  253  *
  254  */
  255 static __inline uint64_t
  256 check_pkt_coalesce(struct sge_qset *qs) 
  257 { 
  258         struct adapter *sc; 
  259         struct sge_txq *txq; 
  260         uint8_t *fill;
  261 
  262         if (__predict_false(cxgb_tx_coalesce_force))
  263                 return (1);
  264         txq = &qs->txq[TXQ_ETH]; 
  265         sc = qs->port->adapter; 
  266         fill = &sc->tunq_fill[qs->idx];
  267 
  268         if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
  269                 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
  270         if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
  271                 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
  272         /*
  273          * if the hardware transmit queue is more than 1/8 full
  274          * we mark it as coalescing - we drop back from coalescing
  275          * when we go below 1/32 full and there are no packets enqueued, 
  276          * this provides us with some degree of hysteresis
  277          */
  278         if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
  279             TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
  280                 *fill = 0; 
  281         else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
  282                 *fill = 1; 
  283 
  284         return (sc->tunq_coalesce);
  285 } 
  286 
  287 #ifdef __LP64__
  288 static void
  289 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  290 {
  291         uint64_t wr_hilo;
  292 #if _BYTE_ORDER == _LITTLE_ENDIAN
  293         wr_hilo = wr_hi;
  294         wr_hilo |= (((uint64_t)wr_lo)<<32);
  295 #else
  296         wr_hilo = wr_lo;
  297         wr_hilo |= (((uint64_t)wr_hi)<<32);
  298 #endif  
  299         wrp->wrh_hilo = wr_hilo;
  300 }
  301 #else
  302 static void
  303 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  304 {
  305 
  306         wrp->wrh_hi = wr_hi;
  307         wmb();
  308         wrp->wrh_lo = wr_lo;
  309 }
  310 #endif
  311 
  312 struct coalesce_info {
  313         int count;
  314         int nbytes;
  315         int noncoal;
  316 };
  317 
  318 static int
  319 coalesce_check(struct mbuf *m, void *arg)
  320 {
  321         struct coalesce_info *ci = arg;
  322 
  323         if ((m->m_next != NULL) ||
  324             ((mtod(m, vm_offset_t) & PAGE_MASK) + m->m_len > PAGE_SIZE))
  325                 ci->noncoal = 1;
  326 
  327         if ((ci->count == 0) || (ci->noncoal == 0 && (ci->count < 7) &&
  328             (ci->nbytes + m->m_len <= 10500))) {
  329                 ci->count++;
  330                 ci->nbytes += m->m_len;
  331                 return (1);
  332         }
  333         return (0);
  334 }
  335 
  336 static struct mbuf *
  337 cxgb_dequeue(struct sge_qset *qs)
  338 {
  339         struct mbuf *m, *m_head, *m_tail;
  340         struct coalesce_info ci;
  341 
  342         
  343         if (check_pkt_coalesce(qs) == 0) 
  344                 return TXQ_RING_DEQUEUE(qs);
  345 
  346         m_head = m_tail = NULL;
  347         ci.count = ci.nbytes = ci.noncoal = 0;
  348         do {
  349                 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
  350                 if (m_head == NULL) {
  351                         m_tail = m_head = m;
  352                 } else if (m != NULL) {
  353                         m_tail->m_nextpkt = m;
  354                         m_tail = m;
  355                 }
  356         } while (m != NULL);
  357         if (ci.count > 7)
  358                 panic("trying to coalesce %d packets in to one WR", ci.count);
  359         return (m_head);
  360 }
  361         
  362 /**
  363  *      reclaim_completed_tx - reclaims completed Tx descriptors
  364  *      @adapter: the adapter
  365  *      @q: the Tx queue to reclaim completed descriptors from
  366  *
  367  *      Reclaims Tx descriptors that the SGE has indicated it has processed,
  368  *      and frees the associated buffers if possible.  Called with the Tx
  369  *      queue's lock held.
  370  */
  371 static __inline int
  372 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
  373 {
  374         struct sge_txq *q = &qs->txq[queue];
  375         int reclaim = desc_reclaimable(q);
  376 
  377         if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
  378             (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
  379                 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  380 
  381         if (reclaim < reclaim_min)
  382                 return (0);
  383 
  384         mtx_assert(&qs->lock, MA_OWNED);
  385         if (reclaim > 0) {
  386                 t3_free_tx_desc(qs, reclaim, queue);
  387                 q->cleaned += reclaim;
  388                 q->in_use -= reclaim;
  389         }
  390         if (isset(&qs->txq_stopped, TXQ_ETH))
  391                 clrbit(&qs->txq_stopped, TXQ_ETH);
  392 
  393         return (reclaim);
  394 }
  395 
  396 #ifdef DEBUGNET
  397 int
  398 cxgb_debugnet_poll_tx(struct sge_qset *qs)
  399 {
  400 
  401         return (reclaim_completed_tx(qs, TX_RECLAIM_MAX, TXQ_ETH));
  402 }
  403 #endif
  404 
  405 /**
  406  *      should_restart_tx - are there enough resources to restart a Tx queue?
  407  *      @q: the Tx queue
  408  *
  409  *      Checks if there are enough descriptors to restart a suspended Tx queue.
  410  */
  411 static __inline int
  412 should_restart_tx(const struct sge_txq *q)
  413 {
  414         unsigned int r = q->processed - q->cleaned;
  415 
  416         return q->in_use - r < (q->size >> 1);
  417 }
  418 
  419 /**
  420  *      t3_sge_init - initialize SGE
  421  *      @adap: the adapter
  422  *      @p: the SGE parameters
  423  *
  424  *      Performs SGE initialization needed every time after a chip reset.
  425  *      We do not initialize any of the queue sets here, instead the driver
  426  *      top-level must request those individually.  We also do not enable DMA
  427  *      here, that should be done after the queues have been set up.
  428  */
  429 void
  430 t3_sge_init(adapter_t *adap, struct sge_params *p)
  431 {
  432         u_int ctrl, ups;
  433 
  434         ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
  435 
  436         ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
  437                F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
  438                V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
  439                V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
  440 #if SGE_NUM_GENBITS == 1
  441         ctrl |= F_EGRGENCTRL;
  442 #endif
  443         if (adap->params.rev > 0) {
  444                 if (!(adap->flags & (USING_MSIX | USING_MSI)))
  445                         ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
  446         }
  447         t3_write_reg(adap, A_SG_CONTROL, ctrl);
  448         t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
  449                      V_LORCQDRBTHRSH(512));
  450         t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
  451         t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
  452                      V_TIMEOUT(200 * core_ticks_per_usec(adap)));
  453         t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
  454                      adap->params.rev < T3_REV_C ? 1000 : 500);
  455         t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
  456         t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
  457         t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
  458         t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
  459         t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
  460 }
  461 
  462 
  463 /**
  464  *      sgl_len - calculates the size of an SGL of the given capacity
  465  *      @n: the number of SGL entries
  466  *
  467  *      Calculates the number of flits needed for a scatter/gather list that
  468  *      can hold the given number of entries.
  469  */
  470 static __inline unsigned int
  471 sgl_len(unsigned int n)
  472 {
  473         return ((3 * n) / 2 + (n & 1));
  474 }
  475 
  476 /**
  477  *      get_imm_packet - return the next ingress packet buffer from a response
  478  *      @resp: the response descriptor containing the packet data
  479  *
  480  *      Return a packet containing the immediate data of the given response.
  481  */
  482 static int
  483 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
  484 {
  485 
  486         if (resp->rss_hdr.opcode == CPL_RX_DATA) {
  487                 const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0];
  488                 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
  489         } else if (resp->rss_hdr.opcode == CPL_RX_PKT) {
  490                 const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0];
  491                 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
  492         } else
  493                 m->m_len = IMMED_PKT_SIZE;
  494         m->m_ext.ext_buf = NULL;
  495         m->m_ext.ext_type = 0;
  496         memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len); 
  497         return (0);     
  498 }
  499 
  500 static __inline u_int
  501 flits_to_desc(u_int n)
  502 {
  503         return (flit_desc_map[n]);
  504 }
  505 
  506 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
  507                     F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
  508                     V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
  509                     F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
  510                     F_HIRCQPARITYERROR)
  511 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
  512 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
  513                       F_RSPQDISABLED)
  514 
  515 /**
  516  *      t3_sge_err_intr_handler - SGE async event interrupt handler
  517  *      @adapter: the adapter
  518  *
  519  *      Interrupt handler for SGE asynchronous (non-data) events.
  520  */
  521 void
  522 t3_sge_err_intr_handler(adapter_t *adapter)
  523 {
  524         unsigned int v, status;
  525 
  526         status = t3_read_reg(adapter, A_SG_INT_CAUSE);
  527         if (status & SGE_PARERR)
  528                 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
  529                          status & SGE_PARERR);
  530         if (status & SGE_FRAMINGERR)
  531                 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
  532                          status & SGE_FRAMINGERR);
  533         if (status & F_RSPQCREDITOVERFOW)
  534                 CH_ALERT(adapter, "SGE response queue credit overflow\n");
  535 
  536         if (status & F_RSPQDISABLED) {
  537                 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
  538 
  539                 CH_ALERT(adapter,
  540                          "packet delivered to disabled response queue (0x%x)\n",
  541                          (v >> S_RSPQ0DISABLED) & 0xff);
  542         }
  543 
  544         t3_write_reg(adapter, A_SG_INT_CAUSE, status);
  545         if (status & SGE_FATALERR)
  546                 t3_fatal_err(adapter);
  547 }
  548 
  549 void
  550 t3_sge_prep(adapter_t *adap, struct sge_params *p)
  551 {
  552         int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
  553 
  554         nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
  555         nqsets *= adap->params.nports;
  556 
  557         fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
  558 
  559         while (!powerof2(fl_q_size))
  560                 fl_q_size--;
  561 
  562         use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
  563             is_offload(adap);
  564 
  565         if (use_16k) {
  566                 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
  567                 jumbo_buf_size = MJUM16BYTES;
  568         } else {
  569                 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
  570                 jumbo_buf_size = MJUM9BYTES;
  571         }
  572         while (!powerof2(jumbo_q_size))
  573                 jumbo_q_size--;
  574 
  575         if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
  576                 device_printf(adap->dev,
  577                     "Insufficient clusters and/or jumbo buffers.\n");
  578 
  579         p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
  580 
  581         for (i = 0; i < SGE_QSETS; ++i) {
  582                 struct qset_params *q = p->qset + i;
  583 
  584                 if (adap->params.nports > 2) {
  585                         q->coalesce_usecs = 50;
  586                 } else {
  587 #ifdef INVARIANTS                       
  588                         q->coalesce_usecs = 10;
  589 #else
  590                         q->coalesce_usecs = 5;
  591 #endif                  
  592                 }
  593                 q->polling = 0;
  594                 q->rspq_size = RSPQ_Q_SIZE;
  595                 q->fl_size = fl_q_size;
  596                 q->jumbo_size = jumbo_q_size;
  597                 q->jumbo_buf_size = jumbo_buf_size;
  598                 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
  599                 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
  600                 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
  601                 q->cong_thres = 0;
  602         }
  603 }
  604 
  605 int
  606 t3_sge_alloc(adapter_t *sc)
  607 {
  608 
  609         /* The parent tag. */
  610         if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */
  611                                 1, 0,                   /* algnmnt, boundary */
  612                                 BUS_SPACE_MAXADDR,      /* lowaddr */
  613                                 BUS_SPACE_MAXADDR,      /* highaddr */
  614                                 NULL, NULL,             /* filter, filterarg */
  615                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
  616                                 BUS_SPACE_UNRESTRICTED, /* nsegments */
  617                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
  618                                 0,                      /* flags */
  619                                 NULL, NULL,             /* lock, lockarg */
  620                                 &sc->parent_dmat)) {
  621                 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
  622                 return (ENOMEM);
  623         }
  624 
  625         /*
  626          * DMA tag for normal sized RX frames
  627          */
  628         if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
  629                 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
  630                 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
  631                 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
  632                 return (ENOMEM);
  633         }
  634 
  635         /* 
  636          * DMA tag for jumbo sized RX frames.
  637          */
  638         if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
  639                 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
  640                 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
  641                 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
  642                 return (ENOMEM);
  643         }
  644 
  645         /* 
  646          * DMA tag for TX frames.
  647          */
  648         if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
  649                 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  650                 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  651                 NULL, NULL, &sc->tx_dmat)) {
  652                 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
  653                 return (ENOMEM);
  654         }
  655 
  656         return (0);
  657 }
  658 
  659 int
  660 t3_sge_free(struct adapter * sc)
  661 {
  662 
  663         if (sc->tx_dmat != NULL)
  664                 bus_dma_tag_destroy(sc->tx_dmat);
  665 
  666         if (sc->rx_jumbo_dmat != NULL)
  667                 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
  668 
  669         if (sc->rx_dmat != NULL)
  670                 bus_dma_tag_destroy(sc->rx_dmat);
  671 
  672         if (sc->parent_dmat != NULL)
  673                 bus_dma_tag_destroy(sc->parent_dmat);
  674 
  675         return (0);
  676 }
  677 
  678 void
  679 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
  680 {
  681 
  682         qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
  683         qs->rspq.polling = 0 /* p->polling */;
  684 }
  685 
  686 #if !defined(__i386__) && !defined(__amd64__)
  687 static void
  688 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
  689 {
  690         struct refill_fl_cb_arg *cb_arg = arg;
  691         
  692         cb_arg->error = error;
  693         cb_arg->seg = segs[0];
  694         cb_arg->nseg = nseg;
  695 
  696 }
  697 #endif
  698 /**
  699  *      refill_fl - refill an SGE free-buffer list
  700  *      @sc: the controller softc
  701  *      @q: the free-list to refill
  702  *      @n: the number of new buffers to allocate
  703  *
  704  *      (Re)populate an SGE free-buffer list with up to @n new packet buffers.
  705  *      The caller must assure that @n does not exceed the queue's capacity.
  706  */
  707 static void
  708 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
  709 {
  710         struct rx_sw_desc *sd = &q->sdesc[q->pidx];
  711         struct rx_desc *d = &q->desc[q->pidx];
  712         struct refill_fl_cb_arg cb_arg;
  713         struct mbuf *m;
  714         caddr_t cl;
  715         int err;
  716         
  717         cb_arg.error = 0;
  718         while (n--) {
  719                 /*
  720                  * We allocate an uninitialized mbuf + cluster, mbuf is
  721                  * initialized after rx.
  722                  */
  723                 if (q->zone == zone_pack) {
  724                         if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
  725                                 break;
  726                         cl = m->m_ext.ext_buf;                  
  727                 } else {
  728                         if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
  729                                 break;
  730                         if ((m = m_gethdr_raw(M_NOWAIT, 0)) == NULL) {
  731                                 uma_zfree(q->zone, cl);
  732                                 break;
  733                         }
  734                 }
  735                 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
  736                         if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
  737                                 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
  738                                 uma_zfree(q->zone, cl);
  739                                 goto done;
  740                         }
  741                         sd->flags |= RX_SW_DESC_MAP_CREATED;
  742                 }
  743 #if !defined(__i386__) && !defined(__amd64__)
  744                 err = bus_dmamap_load(q->entry_tag, sd->map,
  745                     cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
  746                 
  747                 if (err != 0 || cb_arg.error) {
  748                         if (q->zone != zone_pack)
  749                                 uma_zfree(q->zone, cl);
  750                         m_free(m);
  751                         goto done;
  752                 }
  753 #else
  754                 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
  755 #endif          
  756                 sd->flags |= RX_SW_DESC_INUSE;
  757                 sd->rxsd_cl = cl;
  758                 sd->m = m;
  759                 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
  760                 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
  761                 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
  762                 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
  763 
  764                 d++;
  765                 sd++;
  766 
  767                 if (++q->pidx == q->size) {
  768                         q->pidx = 0;
  769                         q->gen ^= 1;
  770                         sd = q->sdesc;
  771                         d = q->desc;
  772                 }
  773                 q->credits++;
  774                 q->db_pending++;
  775         }
  776 
  777 done:
  778         if (q->db_pending >= 32) {
  779                 q->db_pending = 0;
  780                 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  781         }
  782 }
  783 
  784 
  785 /**
  786  *      free_rx_bufs - free the Rx buffers on an SGE free list
  787  *      @sc: the controle softc
  788  *      @q: the SGE free list to clean up
  789  *
  790  *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
  791  *      this queue should be stopped before calling this function.
  792  */
  793 static void
  794 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
  795 {
  796         u_int cidx = q->cidx;
  797 
  798         while (q->credits--) {
  799                 struct rx_sw_desc *d = &q->sdesc[cidx];
  800 
  801                 if (d->flags & RX_SW_DESC_INUSE) {
  802                         bus_dmamap_unload(q->entry_tag, d->map);
  803                         bus_dmamap_destroy(q->entry_tag, d->map);
  804                         if (q->zone == zone_pack) {
  805                                 m_init(d->m, M_NOWAIT, MT_DATA, M_EXT);
  806                                 uma_zfree(zone_pack, d->m);
  807                         } else {
  808                                 m_init(d->m, M_NOWAIT, MT_DATA, 0);
  809                                 m_free_raw(d->m);
  810                                 uma_zfree(q->zone, d->rxsd_cl);
  811                         }                       
  812                 }
  813                 
  814                 d->rxsd_cl = NULL;
  815                 d->m = NULL;
  816                 if (++cidx == q->size)
  817                         cidx = 0;
  818         }
  819 }
  820 
  821 static __inline void
  822 __refill_fl(adapter_t *adap, struct sge_fl *fl)
  823 {
  824         refill_fl(adap, fl, min(16U, fl->size - fl->credits));
  825 }
  826 
  827 static __inline void
  828 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
  829 {
  830         uint32_t reclaimable = fl->size - fl->credits;
  831 
  832         if (reclaimable > 0)
  833                 refill_fl(adap, fl, min(max, reclaimable));
  834 }
  835 
  836 /**
  837  *      recycle_rx_buf - recycle a receive buffer
  838  *      @adapter: the adapter
  839  *      @q: the SGE free list
  840  *      @idx: index of buffer to recycle
  841  *
  842  *      Recycles the specified buffer on the given free list by adding it at
  843  *      the next available slot on the list.
  844  */
  845 static void
  846 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
  847 {
  848         struct rx_desc *from = &q->desc[idx];
  849         struct rx_desc *to   = &q->desc[q->pidx];
  850 
  851         q->sdesc[q->pidx] = q->sdesc[idx];
  852         to->addr_lo = from->addr_lo;        // already big endian
  853         to->addr_hi = from->addr_hi;        // likewise
  854         wmb();  /* necessary ? */
  855         to->len_gen = htobe32(V_FLD_GEN1(q->gen));
  856         to->gen2 = htobe32(V_FLD_GEN2(q->gen));
  857         q->credits++;
  858 
  859         if (++q->pidx == q->size) {
  860                 q->pidx = 0;
  861                 q->gen ^= 1;
  862         }
  863         t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  864 }
  865 
  866 static void
  867 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
  868 {
  869         uint32_t *addr;
  870 
  871         addr = arg;
  872         *addr = segs[0].ds_addr;
  873 }
  874 
  875 static int
  876 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
  877     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
  878     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
  879 {
  880         size_t len = nelem * elem_size;
  881         void *s = NULL;
  882         void *p = NULL;
  883         int err;
  884 
  885         if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
  886                                       BUS_SPACE_MAXADDR_32BIT,
  887                                       BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
  888                                       len, 0, NULL, NULL, tag)) != 0) {
  889                 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
  890                 return (ENOMEM);
  891         }
  892 
  893         if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
  894                                     map)) != 0) {
  895                 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
  896                 return (ENOMEM);
  897         }
  898 
  899         bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
  900         bzero(p, len);
  901         *(void **)desc = p;
  902 
  903         if (sw_size) {
  904                 len = nelem * sw_size;
  905                 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
  906                 *(void **)sdesc = s;
  907         }
  908         if (parent_entry_tag == NULL)
  909                 return (0);
  910             
  911         if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
  912                                       BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
  913                                       NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  914                                       TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  915                                       NULL, NULL, entry_tag)) != 0) {
  916                 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
  917                 return (ENOMEM);
  918         }
  919         return (0);
  920 }
  921 
  922 static void
  923 sge_slow_intr_handler(void *arg, int ncount)
  924 {
  925         adapter_t *sc = arg;
  926 
  927         t3_slow_intr_handler(sc);
  928         t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
  929         (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
  930 }
  931 
  932 /**
  933  *      sge_timer_cb - perform periodic maintenance of an SGE qset
  934  *      @data: the SGE queue set to maintain
  935  *
  936  *      Runs periodically from a timer to perform maintenance of an SGE queue
  937  *      set.  It performs two tasks:
  938  *
  939  *      a) Cleans up any completed Tx descriptors that may still be pending.
  940  *      Normal descriptor cleanup happens when new packets are added to a Tx
  941  *      queue so this timer is relatively infrequent and does any cleanup only
  942  *      if the Tx queue has not seen any new packets in a while.  We make a
  943  *      best effort attempt to reclaim descriptors, in that we don't wait
  944  *      around if we cannot get a queue's lock (which most likely is because
  945  *      someone else is queueing new packets and so will also handle the clean
  946  *      up).  Since control queues use immediate data exclusively we don't
  947  *      bother cleaning them up here.
  948  *
  949  *      b) Replenishes Rx queues that have run out due to memory shortage.
  950  *      Normally new Rx buffers are added when existing ones are consumed but
  951  *      when out of memory a queue can become empty.  We try to add only a few
  952  *      buffers here, the queue will be replenished fully as these new buffers
  953  *      are used up if memory shortage has subsided.
  954  *      
  955  *      c) Return coalesced response queue credits in case a response queue is
  956  *      starved.
  957  *
  958  *      d) Ring doorbells for T304 tunnel queues since we have seen doorbell 
  959  *      fifo overflows and the FW doesn't implement any recovery scheme yet.
  960  */
  961 static void
  962 sge_timer_cb(void *arg)
  963 {
  964         adapter_t *sc = arg;
  965         if ((sc->flags & USING_MSIX) == 0) {
  966                 
  967                 struct port_info *pi;
  968                 struct sge_qset *qs;
  969                 struct sge_txq  *txq;
  970                 int i, j;
  971                 int reclaim_ofl, refill_rx;
  972 
  973                 if (sc->open_device_map == 0) 
  974                         return;
  975 
  976                 for (i = 0; i < sc->params.nports; i++) {
  977                         pi = &sc->port[i];
  978                         for (j = 0; j < pi->nqsets; j++) {
  979                                 qs = &sc->sge.qs[pi->first_qset + j];
  980                                 txq = &qs->txq[0];
  981                                 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
  982                                 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 
  983                                     (qs->fl[1].credits < qs->fl[1].size));
  984                                 if (reclaim_ofl || refill_rx) {
  985                                         taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
  986                                         break;
  987                                 }
  988                         }
  989                 }
  990         }
  991         
  992         if (sc->params.nports > 2) {
  993                 int i;
  994 
  995                 for_each_port(sc, i) {
  996                         struct port_info *pi = &sc->port[i];
  997 
  998                         t3_write_reg(sc, A_SG_KDOORBELL, 
  999                                      F_SELEGRCNTX | 
 1000                                      (FW_TUNNEL_SGEEC_START + pi->first_qset));
 1001                 }
 1002         }       
 1003         if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
 1004             sc->open_device_map != 0)
 1005                 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1006 }
 1007 
 1008 /*
 1009  * This is meant to be a catch-all function to keep sge state private
 1010  * to sge.c
 1011  *
 1012  */
 1013 int
 1014 t3_sge_init_adapter(adapter_t *sc)
 1015 {
 1016         callout_init(&sc->sge_timer_ch, 1);
 1017         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1018         TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
 1019         return (0);
 1020 }
 1021 
 1022 int
 1023 t3_sge_reset_adapter(adapter_t *sc)
 1024 {
 1025         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
 1026         return (0);
 1027 }
 1028 
 1029 int
 1030 t3_sge_init_port(struct port_info *pi)
 1031 {
 1032         TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
 1033         return (0);
 1034 }
 1035 
 1036 /**
 1037  *      refill_rspq - replenish an SGE response queue
 1038  *      @adapter: the adapter
 1039  *      @q: the response queue to replenish
 1040  *      @credits: how many new responses to make available
 1041  *
 1042  *      Replenishes a response queue by making the supplied number of responses
 1043  *      available to HW.
 1044  */
 1045 static __inline void
 1046 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
 1047 {
 1048 
 1049         /* mbufs are allocated on demand when a rspq entry is processed. */
 1050         t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
 1051                      V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 1052 }
 1053 
 1054 static void
 1055 sge_txq_reclaim_handler(void *arg, int ncount)
 1056 {
 1057         struct sge_qset *qs = arg;
 1058         int i;
 1059 
 1060         for (i = 0; i < 3; i++)
 1061                 reclaim_completed_tx(qs, 16, i);
 1062 }
 1063 
 1064 static void
 1065 sge_timer_reclaim(void *arg, int ncount)
 1066 {
 1067         struct port_info *pi = arg;
 1068         int i, nqsets = pi->nqsets;
 1069         adapter_t *sc = pi->adapter;
 1070         struct sge_qset *qs;
 1071         struct mtx *lock;
 1072         
 1073         KASSERT((sc->flags & USING_MSIX) == 0,
 1074             ("can't call timer reclaim for msi-x"));
 1075 
 1076         for (i = 0; i < nqsets; i++) {
 1077                 qs = &sc->sge.qs[pi->first_qset + i];
 1078 
 1079                 reclaim_completed_tx(qs, 16, TXQ_OFLD);
 1080                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 1081                             &sc->sge.qs[0].rspq.lock;
 1082 
 1083                 if (mtx_trylock(lock)) {
 1084                         /* XXX currently assume that we are *NOT* polling */
 1085                         uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
 1086 
 1087                         if (qs->fl[0].credits < qs->fl[0].size - 16)
 1088                                 __refill_fl(sc, &qs->fl[0]);
 1089                         if (qs->fl[1].credits < qs->fl[1].size - 16)
 1090                                 __refill_fl(sc, &qs->fl[1]);
 1091                         
 1092                         if (status & (1 << qs->rspq.cntxt_id)) {
 1093                                 if (qs->rspq.credits) {
 1094                                         refill_rspq(sc, &qs->rspq, 1);
 1095                                         qs->rspq.credits--;
 1096                                         t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 
 1097                                             1 << qs->rspq.cntxt_id);
 1098                                 }
 1099                         }
 1100                         mtx_unlock(lock);
 1101                 }
 1102         }
 1103 }
 1104 
 1105 /**
 1106  *      init_qset_cntxt - initialize an SGE queue set context info
 1107  *      @qs: the queue set
 1108  *      @id: the queue set id
 1109  *
 1110  *      Initializes the TIDs and context ids for the queues of a queue set.
 1111  */
 1112 static void
 1113 init_qset_cntxt(struct sge_qset *qs, u_int id)
 1114 {
 1115 
 1116         qs->rspq.cntxt_id = id;
 1117         qs->fl[0].cntxt_id = 2 * id;
 1118         qs->fl[1].cntxt_id = 2 * id + 1;
 1119         qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
 1120         qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
 1121         qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 1122         qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 1123         qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
 1124 
 1125         /* XXX: a sane limit is needed instead of INT_MAX */
 1126         mbufq_init(&qs->txq[TXQ_ETH].sendq, INT_MAX);
 1127         mbufq_init(&qs->txq[TXQ_OFLD].sendq, INT_MAX);
 1128         mbufq_init(&qs->txq[TXQ_CTRL].sendq, INT_MAX);
 1129 }
 1130 
 1131 
 1132 static void
 1133 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
 1134 {
 1135         txq->in_use += ndesc;
 1136         /*
 1137          * XXX we don't handle stopping of queue
 1138          * presumably start handles this when we bump against the end
 1139          */
 1140         txqs->gen = txq->gen;
 1141         txq->unacked += ndesc;
 1142         txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
 1143         txq->unacked &= 31;
 1144         txqs->pidx = txq->pidx;
 1145         txq->pidx += ndesc;
 1146 #ifdef INVARIANTS
 1147         if (((txqs->pidx > txq->cidx) &&
 1148                 (txq->pidx < txqs->pidx) &&
 1149                 (txq->pidx >= txq->cidx)) ||
 1150             ((txqs->pidx < txq->cidx) &&
 1151                 (txq->pidx >= txq-> cidx)) ||
 1152             ((txqs->pidx < txq->cidx) &&
 1153                 (txq->cidx < txqs->pidx)))
 1154                 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
 1155                     txqs->pidx, txq->pidx, txq->cidx);
 1156 #endif
 1157         if (txq->pidx >= txq->size) {
 1158                 txq->pidx -= txq->size;
 1159                 txq->gen ^= 1;
 1160         }
 1161 
 1162 }
 1163 
 1164 /**
 1165  *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 1166  *      @m: the packet mbufs
 1167  *      @nsegs: the number of segments 
 1168  *
 1169  *      Returns the number of Tx descriptors needed for the given Ethernet
 1170  *      packet.  Ethernet packets require addition of WR and CPL headers.
 1171  */
 1172 static __inline unsigned int
 1173 calc_tx_descs(const struct mbuf *m, int nsegs)
 1174 {
 1175         unsigned int flits;
 1176 
 1177         if (m->m_pkthdr.len <= PIO_LEN)
 1178                 return 1;
 1179 
 1180         flits = sgl_len(nsegs) + 2;
 1181         if (m->m_pkthdr.csum_flags & CSUM_TSO)
 1182                 flits++;
 1183 
 1184         return flits_to_desc(flits);
 1185 }
 1186 
 1187 /**
 1188  *      make_sgl - populate a scatter/gather list for a packet
 1189  *      @sgp: the SGL to populate
 1190  *      @segs: the packet dma segments
 1191  *      @nsegs: the number of segments
 1192  *
 1193  *      Generates a scatter/gather list for the buffers that make up a packet
 1194  *      and returns the SGL size in 8-byte words.  The caller must size the SGL
 1195  *      appropriately.
 1196  */
 1197 static __inline void
 1198 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
 1199 {
 1200         int i, idx;
 1201         
 1202         for (idx = 0, i = 0; i < nsegs; i++) {
 1203                 /*
 1204                  * firmware doesn't like empty segments
 1205                  */
 1206                 if (segs[i].ds_len == 0)
 1207                         continue;
 1208                 if (i && idx == 0) 
 1209                         ++sgp;
 1210                 
 1211                 sgp->len[idx] = htobe32(segs[i].ds_len);
 1212                 sgp->addr[idx] = htobe64(segs[i].ds_addr);
 1213                 idx ^= 1;
 1214         }
 1215         
 1216         if (idx) {
 1217                 sgp->len[idx] = 0;
 1218                 sgp->addr[idx] = 0;
 1219         }
 1220 }
 1221         
 1222 /**
 1223  *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
 1224  *      @adap: the adapter
 1225  *      @q: the Tx queue
 1226  *
 1227  *      Ring the doorbell if a Tx queue is asleep.  There is a natural race,
 1228  *      where the HW is going to sleep just after we checked, however,
 1229  *      then the interrupt handler will detect the outstanding TX packet
 1230  *      and ring the doorbell for us.
 1231  *
 1232  *      When GTS is disabled we unconditionally ring the doorbell.
 1233  */
 1234 static __inline void
 1235 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
 1236 {
 1237 #if USE_GTS
 1238         clear_bit(TXQ_LAST_PKT_DB, &q->flags);
 1239         if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
 1240                 set_bit(TXQ_LAST_PKT_DB, &q->flags);
 1241 #ifdef T3_TRACE
 1242                 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
 1243                           q->cntxt_id);
 1244 #endif
 1245                 t3_write_reg(adap, A_SG_KDOORBELL,
 1246                              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1247         }
 1248 #else
 1249         if (mustring || ++q->db_pending >= 32) {
 1250                 wmb();            /* write descriptors before telling HW */
 1251                 t3_write_reg(adap, A_SG_KDOORBELL,
 1252                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1253                 q->db_pending = 0;
 1254         }
 1255 #endif
 1256 }
 1257 
 1258 static __inline void
 1259 wr_gen2(struct tx_desc *d, unsigned int gen)
 1260 {
 1261 #if SGE_NUM_GENBITS == 2
 1262         d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
 1263 #endif
 1264 }
 1265 
 1266 /**
 1267  *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
 1268  *      @ndesc: number of Tx descriptors spanned by the SGL
 1269  *      @txd: first Tx descriptor to be written
 1270  *      @txqs: txq state (generation and producer index)
 1271  *      @txq: the SGE Tx queue
 1272  *      @sgl: the SGL
 1273  *      @flits: number of flits to the start of the SGL in the first descriptor
 1274  *      @sgl_flits: the SGL size in flits
 1275  *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
 1276  *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
 1277  *
 1278  *      Write a work request header and an associated SGL.  If the SGL is
 1279  *      small enough to fit into one Tx descriptor it has already been written
 1280  *      and we just need to write the WR header.  Otherwise we distribute the
 1281  *      SGL across the number of descriptors it spans.
 1282  */
 1283 static void
 1284 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
 1285     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
 1286     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
 1287 {
 1288 
 1289         struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
 1290         
 1291         if (__predict_true(ndesc == 1)) {
 1292                 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1293                     V_WR_SGLSFLT(flits)) | wr_hi,
 1294                     htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) |
 1295                     wr_lo);
 1296 
 1297                 wr_gen2(txd, txqs->gen);
 1298                 
 1299         } else {
 1300                 unsigned int ogen = txqs->gen;
 1301                 const uint64_t *fp = (const uint64_t *)sgl;
 1302                 struct work_request_hdr *wp = wrp;
 1303                 
 1304                 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
 1305                     V_WR_SGLSFLT(flits)) | wr_hi;
 1306                 
 1307                 while (sgl_flits) {
 1308                         unsigned int avail = WR_FLITS - flits;
 1309 
 1310                         if (avail > sgl_flits)
 1311                                 avail = sgl_flits;
 1312                         memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
 1313                         sgl_flits -= avail;
 1314                         ndesc--;
 1315                         if (!sgl_flits)
 1316                                 break;
 1317                         
 1318                         fp += avail;
 1319                         txd++;
 1320                         if (++txqs->pidx == txq->size) {
 1321                                 txqs->pidx = 0;
 1322                                 txqs->gen ^= 1;
 1323                                 txd = txq->desc;
 1324                         }
 1325 
 1326                         /*
 1327                          * when the head of the mbuf chain
 1328                          * is freed all clusters will be freed
 1329                          * with it
 1330                          */
 1331                         wrp = (struct work_request_hdr *)txd;
 1332                         wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
 1333                             V_WR_SGLSFLT(1)) | wr_hi;
 1334                         wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
 1335                                     sgl_flits + 1)) |
 1336                             V_WR_GEN(txqs->gen)) | wr_lo;
 1337                         wr_gen2(txd, txqs->gen);
 1338                         flits = 1;
 1339                 }
 1340                 wrp->wrh_hi |= htonl(F_WR_EOP);
 1341                 wmb();
 1342                 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
 1343                 wr_gen2((struct tx_desc *)wp, ogen);
 1344         }
 1345 }
 1346 
 1347 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
 1348 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
 1349 
 1350 #define GET_VTAG(cntrl, m) \
 1351 do { \
 1352         if ((m)->m_flags & M_VLANTAG)                                               \
 1353                 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
 1354 } while (0)
 1355 
 1356 static int
 1357 t3_encap(struct sge_qset *qs, struct mbuf **m)
 1358 {
 1359         adapter_t *sc;
 1360         struct mbuf *m0;
 1361         struct sge_txq *txq;
 1362         struct txq_state txqs;
 1363         struct port_info *pi;
 1364         unsigned int ndesc, flits, cntrl, mlen;
 1365         int err, nsegs, tso_info = 0;
 1366 
 1367         struct work_request_hdr *wrp;
 1368         struct tx_sw_desc *txsd;
 1369         struct sg_ent *sgp, *sgl;
 1370         uint32_t wr_hi, wr_lo, sgl_flits; 
 1371         bus_dma_segment_t segs[TX_MAX_SEGS];
 1372 
 1373         struct tx_desc *txd;
 1374                 
 1375         pi = qs->port;
 1376         sc = pi->adapter;
 1377         txq = &qs->txq[TXQ_ETH];
 1378         txd = &txq->desc[txq->pidx];
 1379         txsd = &txq->sdesc[txq->pidx];
 1380         sgl = txq->txq_sgl;
 1381 
 1382         prefetch(txd);
 1383         m0 = *m;
 1384 
 1385         mtx_assert(&qs->lock, MA_OWNED);
 1386         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1387         KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
 1388         
 1389         if  (m0->m_nextpkt == NULL && m0->m_next != NULL &&
 1390             m0->m_pkthdr.csum_flags & (CSUM_TSO))
 1391                 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
 1392 
 1393         if (m0->m_nextpkt != NULL) {
 1394                 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
 1395                 ndesc = 1;
 1396                 mlen = 0;
 1397         } else {
 1398                 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
 1399                     &m0, segs, &nsegs))) {
 1400                         if (cxgb_debug)
 1401                                 printf("failed ... err=%d\n", err);
 1402                         return (err);
 1403                 }
 1404                 mlen = m0->m_pkthdr.len;
 1405                 ndesc = calc_tx_descs(m0, nsegs);
 1406         }
 1407         txq_prod(txq, ndesc, &txqs);
 1408 
 1409         KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
 1410         txsd->m = m0;
 1411 
 1412         if (m0->m_nextpkt != NULL) {
 1413                 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
 1414                 int i, fidx;
 1415 
 1416                 if (nsegs > 7)
 1417                         panic("trying to coalesce %d packets in to one WR", nsegs);
 1418                 txq->txq_coalesced += nsegs;
 1419                 wrp = (struct work_request_hdr *)txd;
 1420                 flits = nsegs*2 + 1;
 1421 
 1422                 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
 1423                         struct cpl_tx_pkt_batch_entry *cbe;
 1424                         uint64_t flit;
 1425                         uint32_t *hflit = (uint32_t *)&flit;
 1426                         int cflags = m0->m_pkthdr.csum_flags;
 1427 
 1428                         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1429                         GET_VTAG(cntrl, m0);
 1430                         cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1431                         if (__predict_false(!(cflags & CSUM_IP)))
 1432                                 cntrl |= F_TXPKT_IPCSUM_DIS;
 1433                         if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP |
 1434                             CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
 1435                                 cntrl |= F_TXPKT_L4CSUM_DIS;
 1436 
 1437                         hflit[0] = htonl(cntrl);
 1438                         hflit[1] = htonl(segs[i].ds_len | 0x80000000);
 1439                         flit |= htobe64(1 << 24);
 1440                         cbe = &cpl_batch->pkt_entry[i];
 1441                         cbe->cntrl = hflit[0];
 1442                         cbe->len = hflit[1];
 1443                         cbe->addr = htobe64(segs[i].ds_addr);
 1444                 }
 1445 
 1446                 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1447                     V_WR_SGLSFLT(flits)) |
 1448                     htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1449                 wr_lo = htonl(V_WR_LEN(flits) |
 1450                     V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
 1451                 set_wr_hdr(wrp, wr_hi, wr_lo);
 1452                 wmb();
 1453                 ETHER_BPF_MTAP(pi->ifp, m0);
 1454                 wr_gen2(txd, txqs.gen);
 1455                 check_ring_tx_db(sc, txq, 0);
 1456                 return (0);             
 1457         } else if (tso_info) {
 1458                 uint16_t eth_type;
 1459                 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
 1460                 struct ether_header *eh;
 1461                 void *l3hdr;
 1462                 struct tcphdr *tcp;
 1463 
 1464                 txd->flit[2] = 0;
 1465                 GET_VTAG(cntrl, m0);
 1466                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
 1467                 hdr->cntrl = htonl(cntrl);
 1468                 hdr->len = htonl(mlen | 0x80000000);
 1469 
 1470                 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
 1471                         printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x",
 1472                             m0, mlen, m0->m_pkthdr.tso_segsz,
 1473                             (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags);
 1474                         panic("tx tso packet too small");
 1475                 }
 1476 
 1477                 /* Make sure that ether, ip, tcp headers are all in m0 */
 1478                 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
 1479                         m0 = m_pullup(m0, TCPPKTHDRSIZE);
 1480                         if (__predict_false(m0 == NULL)) {
 1481                                 /* XXX panic probably an overreaction */
 1482                                 panic("couldn't fit header into mbuf");
 1483                         }
 1484                 }
 1485 
 1486                 eh = mtod(m0, struct ether_header *);
 1487                 eth_type = eh->ether_type;
 1488                 if (eth_type == htons(ETHERTYPE_VLAN)) {
 1489                         struct ether_vlan_header *evh = (void *)eh;
 1490 
 1491                         tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN);
 1492                         l3hdr = evh + 1;
 1493                         eth_type = evh->evl_proto;
 1494                 } else {
 1495                         tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II);
 1496                         l3hdr = eh + 1;
 1497                 }
 1498 
 1499                 if (eth_type == htons(ETHERTYPE_IP)) {
 1500                         struct ip *ip = l3hdr;
 1501 
 1502                         tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl);
 1503                         tcp = (struct tcphdr *)(ip + 1);
 1504                 } else if (eth_type == htons(ETHERTYPE_IPV6)) {
 1505                         struct ip6_hdr *ip6 = l3hdr;
 1506 
 1507                         KASSERT(ip6->ip6_nxt == IPPROTO_TCP,
 1508                             ("%s: CSUM_TSO with ip6_nxt %d",
 1509                             __func__, ip6->ip6_nxt));
 1510 
 1511                         tso_info |= F_LSO_IPV6;
 1512                         tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2);
 1513                         tcp = (struct tcphdr *)(ip6 + 1);
 1514                 } else
 1515                         panic("%s: CSUM_TSO but neither ip nor ip6", __func__);
 1516 
 1517                 tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off);
 1518                 hdr->lso_info = htonl(tso_info);
 1519 
 1520                 if (__predict_false(mlen <= PIO_LEN)) {
 1521                         /*
 1522                          * pkt not undersized but fits in PIO_LEN
 1523                          * Indicates a TSO bug at the higher levels.
 1524                          */
 1525                         txsd->m = NULL;
 1526                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
 1527                         flits = (mlen + 7) / 8 + 3;
 1528                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1529                                           V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1530                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1531                         wr_lo = htonl(V_WR_LEN(flits) |
 1532                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1533                         set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
 1534                         wmb();
 1535                         ETHER_BPF_MTAP(pi->ifp, m0);
 1536                         wr_gen2(txd, txqs.gen);
 1537                         check_ring_tx_db(sc, txq, 0);
 1538                         m_freem(m0);
 1539                         return (0);
 1540                 }
 1541                 flits = 3;      
 1542         } else {
 1543                 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
 1544                 
 1545                 GET_VTAG(cntrl, m0);
 1546                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1547                 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
 1548                         cntrl |= F_TXPKT_IPCSUM_DIS;
 1549                 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP |
 1550                     CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
 1551                         cntrl |= F_TXPKT_L4CSUM_DIS;
 1552                 cpl->cntrl = htonl(cntrl);
 1553                 cpl->len = htonl(mlen | 0x80000000);
 1554 
 1555                 if (mlen <= PIO_LEN) {
 1556                         txsd->m = NULL;
 1557                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
 1558                         flits = (mlen + 7) / 8 + 2;
 1559                         
 1560                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1561                             V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1562                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1563                         wr_lo = htonl(V_WR_LEN(flits) |
 1564                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1565                         set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
 1566                         wmb();
 1567                         ETHER_BPF_MTAP(pi->ifp, m0);
 1568                         wr_gen2(txd, txqs.gen);
 1569                         check_ring_tx_db(sc, txq, 0);
 1570                         m_freem(m0);
 1571                         return (0);
 1572                 }
 1573                 flits = 2;
 1574         }
 1575         wrp = (struct work_request_hdr *)txd;
 1576         sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
 1577         make_sgl(sgp, segs, nsegs);
 1578 
 1579         sgl_flits = sgl_len(nsegs);
 1580 
 1581         ETHER_BPF_MTAP(pi->ifp, m0);
 1582 
 1583         KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
 1584         wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1585         wr_lo = htonl(V_WR_TID(txq->token));
 1586         write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
 1587             sgl_flits, wr_hi, wr_lo);
 1588         check_ring_tx_db(sc, txq, 0);
 1589 
 1590         return (0);
 1591 }
 1592 
 1593 #ifdef DEBUGNET
 1594 int
 1595 cxgb_debugnet_encap(struct sge_qset *qs, struct mbuf **m)
 1596 {
 1597         int error;
 1598 
 1599         error = t3_encap(qs, m);
 1600         if (error == 0)
 1601                 check_ring_tx_db(qs->port->adapter, &qs->txq[TXQ_ETH], 1);
 1602         else if (*m != NULL) {
 1603                 m_freem(*m);
 1604                 *m = NULL;
 1605         }
 1606         return (error);
 1607 }
 1608 #endif
 1609 
 1610 void
 1611 cxgb_tx_watchdog(void *arg)
 1612 {
 1613         struct sge_qset *qs = arg;
 1614         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1615 
 1616         if (qs->coalescing != 0 &&
 1617             (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
 1618             TXQ_RING_EMPTY(qs))
 1619                 qs->coalescing = 0; 
 1620         else if (qs->coalescing == 0 &&
 1621             (txq->in_use >= cxgb_tx_coalesce_enable_start))
 1622                 qs->coalescing = 1;
 1623         if (TXQ_TRYLOCK(qs)) {
 1624                 qs->qs_flags |= QS_FLUSHING;
 1625                 cxgb_start_locked(qs);
 1626                 qs->qs_flags &= ~QS_FLUSHING;
 1627                 TXQ_UNLOCK(qs);
 1628         }
 1629         if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
 1630                 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
 1631                     qs, txq->txq_watchdog.c_cpu);
 1632 }
 1633 
 1634 static void
 1635 cxgb_tx_timeout(void *arg)
 1636 {
 1637         struct sge_qset *qs = arg;
 1638         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1639 
 1640         if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
 1641                 qs->coalescing = 1;     
 1642         if (TXQ_TRYLOCK(qs)) {
 1643                 qs->qs_flags |= QS_TIMEOUT;
 1644                 cxgb_start_locked(qs);
 1645                 qs->qs_flags &= ~QS_TIMEOUT;
 1646                 TXQ_UNLOCK(qs);
 1647         }
 1648 }
 1649 
 1650 static void
 1651 cxgb_start_locked(struct sge_qset *qs)
 1652 {
 1653         struct mbuf *m_head = NULL;
 1654         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1655         struct port_info *pi = qs->port;
 1656         struct ifnet *ifp = pi->ifp;
 1657 
 1658         if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
 1659                 reclaim_completed_tx(qs, 0, TXQ_ETH);
 1660 
 1661         if (!pi->link_config.link_ok) {
 1662                 TXQ_RING_FLUSH(qs);
 1663                 return;
 1664         }
 1665         TXQ_LOCK_ASSERT(qs);
 1666         while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
 1667             pi->link_config.link_ok) {
 1668                 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1669 
 1670                 if (txq->size - txq->in_use <= TX_MAX_DESC)
 1671                         break;
 1672 
 1673                 if ((m_head = cxgb_dequeue(qs)) == NULL)
 1674                         break;
 1675                 /*
 1676                  *  Encapsulation can modify our pointer, and or make it
 1677                  *  NULL on failure.  In that event, we can't requeue.
 1678                  */
 1679                 if (t3_encap(qs, &m_head) || m_head == NULL)
 1680                         break;
 1681 
 1682                 m_head = NULL;
 1683         }
 1684 
 1685         if (txq->db_pending)
 1686                 check_ring_tx_db(pi->adapter, txq, 1);
 1687 
 1688         if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
 1689             pi->link_config.link_ok)
 1690                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1691                     qs, txq->txq_timer.c_cpu);
 1692         if (m_head != NULL)
 1693                 m_freem(m_head);
 1694 }
 1695 
 1696 static int
 1697 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
 1698 {
 1699         struct port_info *pi = qs->port;
 1700         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1701         struct buf_ring *br = txq->txq_mr;
 1702         int error, avail;
 1703 
 1704         avail = txq->size - txq->in_use;
 1705         TXQ_LOCK_ASSERT(qs);
 1706 
 1707         /*
 1708          * We can only do a direct transmit if the following are true:
 1709          * - we aren't coalescing (ring < 3/4 full)
 1710          * - the link is up -- checked in caller
 1711          * - there are no packets enqueued already
 1712          * - there is space in hardware transmit queue 
 1713          */
 1714         if (check_pkt_coalesce(qs) == 0 &&
 1715             !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
 1716                 if (t3_encap(qs, &m)) {
 1717                         if (m != NULL &&
 1718                             (error = drbr_enqueue(ifp, br, m)) != 0) 
 1719                                 return (error);
 1720                 } else {
 1721                         if (txq->db_pending)
 1722                                 check_ring_tx_db(pi->adapter, txq, 1);
 1723 
 1724                         /*
 1725                          * We've bypassed the buf ring so we need to update
 1726                          * the stats directly
 1727                          */
 1728                         txq->txq_direct_packets++;
 1729                         txq->txq_direct_bytes += m->m_pkthdr.len;
 1730                 }
 1731         } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
 1732                 return (error);
 1733 
 1734         reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1735         if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
 1736             (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
 1737                 cxgb_start_locked(qs);
 1738         else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
 1739                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1740                     qs, txq->txq_timer.c_cpu);
 1741         return (0);
 1742 }
 1743 
 1744 int
 1745 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
 1746 {
 1747         struct sge_qset *qs;
 1748         struct port_info *pi = ifp->if_softc;
 1749         int error, qidx = pi->first_qset;
 1750 
 1751         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
 1752             ||(!pi->link_config.link_ok)) {
 1753                 m_freem(m);
 1754                 return (0);
 1755         }
 1756 
 1757         /* check if flowid is set */
 1758         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)       
 1759                 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
 1760 
 1761         qs = &pi->adapter->sge.qs[qidx];
 1762         
 1763         if (TXQ_TRYLOCK(qs)) {
 1764                 /* XXX running */
 1765                 error = cxgb_transmit_locked(ifp, qs, m);
 1766                 TXQ_UNLOCK(qs);
 1767         } else
 1768                 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
 1769         return (error);
 1770 }
 1771 
 1772 void
 1773 cxgb_qflush(struct ifnet *ifp)
 1774 {
 1775         /*
 1776          * flush any enqueued mbufs in the buf_rings
 1777          * and in the transmit queues
 1778          * no-op for now
 1779          */
 1780         return;
 1781 }
 1782 
 1783 /**
 1784  *      write_imm - write a packet into a Tx descriptor as immediate data
 1785  *      @d: the Tx descriptor to write
 1786  *      @m: the packet
 1787  *      @len: the length of packet data to write as immediate data
 1788  *      @gen: the generation bit value to write
 1789  *
 1790  *      Writes a packet as immediate data into a Tx descriptor.  The packet
 1791  *      contains a work request at its beginning.  We must write the packet
 1792  *      carefully so the SGE doesn't read accidentally before it's written in
 1793  *      its entirety.
 1794  */
 1795 static __inline void
 1796 write_imm(struct tx_desc *d, caddr_t src,
 1797           unsigned int len, unsigned int gen)
 1798 {
 1799         struct work_request_hdr *from = (struct work_request_hdr *)src;
 1800         struct work_request_hdr *to = (struct work_request_hdr *)d;
 1801         uint32_t wr_hi, wr_lo;
 1802 
 1803         KASSERT(len <= WR_LEN && len >= sizeof(*from),
 1804             ("%s: invalid len %d", __func__, len));
 1805         
 1806         memcpy(&to[1], &from[1], len - sizeof(*from));
 1807         wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
 1808             V_WR_BCNTLFLT(len & 7));
 1809         wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8));
 1810         set_wr_hdr(to, wr_hi, wr_lo);
 1811         wmb();
 1812         wr_gen2(d, gen);
 1813 }
 1814 
 1815 /**
 1816  *      check_desc_avail - check descriptor availability on a send queue
 1817  *      @adap: the adapter
 1818  *      @q: the TX queue
 1819  *      @m: the packet needing the descriptors
 1820  *      @ndesc: the number of Tx descriptors needed
 1821  *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
 1822  *
 1823  *      Checks if the requested number of Tx descriptors is available on an
 1824  *      SGE send queue.  If the queue is already suspended or not enough
 1825  *      descriptors are available the packet is queued for later transmission.
 1826  *      Must be called with the Tx queue locked.
 1827  *
 1828  *      Returns 0 if enough descriptors are available, 1 if there aren't
 1829  *      enough descriptors and the packet has been queued, and 2 if the caller
 1830  *      needs to retry because there weren't enough descriptors at the
 1831  *      beginning of the call but some freed up in the mean time.
 1832  */
 1833 static __inline int
 1834 check_desc_avail(adapter_t *adap, struct sge_txq *q,
 1835                  struct mbuf *m, unsigned int ndesc,
 1836                  unsigned int qid)
 1837 {
 1838         /* 
 1839          * XXX We currently only use this for checking the control queue
 1840          * the control queue is only used for binding qsets which happens
 1841          * at init time so we are guaranteed enough descriptors
 1842          */
 1843         if (__predict_false(mbufq_len(&q->sendq))) {
 1844 addq_exit:      (void )mbufq_enqueue(&q->sendq, m);
 1845                 return 1;
 1846         }
 1847         if (__predict_false(q->size - q->in_use < ndesc)) {
 1848 
 1849                 struct sge_qset *qs = txq_to_qset(q, qid);
 1850 
 1851                 setbit(&qs->txq_stopped, qid);
 1852                 if (should_restart_tx(q) &&
 1853                     test_and_clear_bit(qid, &qs->txq_stopped))
 1854                         return 2;
 1855 
 1856                 q->stops++;
 1857                 goto addq_exit;
 1858         }
 1859         return 0;
 1860 }
 1861 
 1862 
 1863 /**
 1864  *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
 1865  *      @q: the SGE control Tx queue
 1866  *
 1867  *      This is a variant of reclaim_completed_tx() that is used for Tx queues
 1868  *      that send only immediate data (presently just the control queues) and
 1869  *      thus do not have any mbufs
 1870  */
 1871 static __inline void
 1872 reclaim_completed_tx_imm(struct sge_txq *q)
 1873 {
 1874         unsigned int reclaim = q->processed - q->cleaned;
 1875 
 1876         q->in_use -= reclaim;
 1877         q->cleaned += reclaim;
 1878 }
 1879 
 1880 /**
 1881  *      ctrl_xmit - send a packet through an SGE control Tx queue
 1882  *      @adap: the adapter
 1883  *      @q: the control queue
 1884  *      @m: the packet
 1885  *
 1886  *      Send a packet through an SGE control Tx queue.  Packets sent through
 1887  *      a control queue must fit entirely as immediate data in a single Tx
 1888  *      descriptor and have no page fragments.
 1889  */
 1890 static int
 1891 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 1892 {
 1893         int ret;
 1894         struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
 1895         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1896         
 1897         KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__));
 1898 
 1899         wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
 1900         wrp->wrh_lo = htonl(V_WR_TID(q->token));
 1901 
 1902         TXQ_LOCK(qs);
 1903 again:  reclaim_completed_tx_imm(q);
 1904 
 1905         ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
 1906         if (__predict_false(ret)) {
 1907                 if (ret == 1) {
 1908                         TXQ_UNLOCK(qs);
 1909                         return (ENOSPC);
 1910                 }
 1911                 goto again;
 1912         }
 1913         write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
 1914         
 1915         q->in_use++;
 1916         if (++q->pidx >= q->size) {
 1917                 q->pidx = 0;
 1918                 q->gen ^= 1;
 1919         }
 1920         TXQ_UNLOCK(qs);
 1921         wmb();
 1922         t3_write_reg(adap, A_SG_KDOORBELL,
 1923             F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1924 
 1925         m_free(m);
 1926         return (0);
 1927 }
 1928 
 1929 
 1930 /**
 1931  *      restart_ctrlq - restart a suspended control queue
 1932  *      @qs: the queue set cotaining the control queue
 1933  *
 1934  *      Resumes transmission on a suspended Tx control queue.
 1935  */
 1936 static void
 1937 restart_ctrlq(void *data, int npending)
 1938 {
 1939         struct mbuf *m;
 1940         struct sge_qset *qs = (struct sge_qset *)data;
 1941         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1942         adapter_t *adap = qs->port->adapter;
 1943 
 1944         TXQ_LOCK(qs);
 1945 again:  reclaim_completed_tx_imm(q);
 1946 
 1947         while (q->in_use < q->size &&
 1948                (m = mbufq_dequeue(&q->sendq)) != NULL) {
 1949 
 1950                 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
 1951                 m_free(m);
 1952 
 1953                 if (++q->pidx >= q->size) {
 1954                         q->pidx = 0;
 1955                         q->gen ^= 1;
 1956                 }
 1957                 q->in_use++;
 1958         }
 1959         if (mbufq_len(&q->sendq)) {
 1960                 setbit(&qs->txq_stopped, TXQ_CTRL);
 1961 
 1962                 if (should_restart_tx(q) &&
 1963                     test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
 1964                         goto again;
 1965                 q->stops++;
 1966         }
 1967         TXQ_UNLOCK(qs);
 1968         t3_write_reg(adap, A_SG_KDOORBELL,
 1969                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1970 }
 1971 
 1972 
 1973 /*
 1974  * Send a management message through control queue 0
 1975  */
 1976 int
 1977 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
 1978 {
 1979         return ctrl_xmit(adap, &adap->sge.qs[0], m);
 1980 }
 1981 
 1982 /**
 1983  *      free_qset - free the resources of an SGE queue set
 1984  *      @sc: the controller owning the queue set
 1985  *      @q: the queue set
 1986  *
 1987  *      Release the HW and SW resources associated with an SGE queue set, such
 1988  *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 1989  *      queue set must be quiesced prior to calling this.
 1990  */
 1991 static void
 1992 t3_free_qset(adapter_t *sc, struct sge_qset *q)
 1993 {
 1994         int i;
 1995         
 1996         reclaim_completed_tx(q, 0, TXQ_ETH);
 1997         if (q->txq[TXQ_ETH].txq_mr != NULL) 
 1998                 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
 1999         if (q->txq[TXQ_ETH].txq_ifq != NULL) {
 2000                 ifq_delete(q->txq[TXQ_ETH].txq_ifq);
 2001                 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
 2002         }
 2003 
 2004         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2005                 if (q->fl[i].desc) {
 2006                         mtx_lock_spin(&sc->sge.reg_lock);
 2007                         t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
 2008                         mtx_unlock_spin(&sc->sge.reg_lock);
 2009                         bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
 2010                         bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
 2011                                         q->fl[i].desc_map);
 2012                         bus_dma_tag_destroy(q->fl[i].desc_tag);
 2013                         bus_dma_tag_destroy(q->fl[i].entry_tag);
 2014                 }
 2015                 if (q->fl[i].sdesc) {
 2016                         free_rx_bufs(sc, &q->fl[i]);
 2017                         free(q->fl[i].sdesc, M_DEVBUF);
 2018                 }
 2019         }
 2020 
 2021         mtx_unlock(&q->lock);
 2022         MTX_DESTROY(&q->lock);
 2023         for (i = 0; i < SGE_TXQ_PER_SET; i++) {
 2024                 if (q->txq[i].desc) {
 2025                         mtx_lock_spin(&sc->sge.reg_lock);
 2026                         t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
 2027                         mtx_unlock_spin(&sc->sge.reg_lock);
 2028                         bus_dmamap_unload(q->txq[i].desc_tag,
 2029                                         q->txq[i].desc_map);
 2030                         bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
 2031                                         q->txq[i].desc_map);
 2032                         bus_dma_tag_destroy(q->txq[i].desc_tag);
 2033                         bus_dma_tag_destroy(q->txq[i].entry_tag);
 2034                 }
 2035                 if (q->txq[i].sdesc) {
 2036                         free(q->txq[i].sdesc, M_DEVBUF);
 2037                 }
 2038         }
 2039 
 2040         if (q->rspq.desc) {
 2041                 mtx_lock_spin(&sc->sge.reg_lock);
 2042                 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
 2043                 mtx_unlock_spin(&sc->sge.reg_lock);
 2044                 
 2045                 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
 2046                 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
 2047                                 q->rspq.desc_map);
 2048                 bus_dma_tag_destroy(q->rspq.desc_tag);
 2049                 MTX_DESTROY(&q->rspq.lock);
 2050         }
 2051 
 2052 #if defined(INET6) || defined(INET)
 2053         tcp_lro_free(&q->lro.ctrl);
 2054 #endif
 2055 
 2056         bzero(q, sizeof(*q));
 2057 }
 2058 
 2059 /**
 2060  *      t3_free_sge_resources - free SGE resources
 2061  *      @sc: the adapter softc
 2062  *
 2063  *      Frees resources used by the SGE queue sets.
 2064  */
 2065 void
 2066 t3_free_sge_resources(adapter_t *sc, int nqsets)
 2067 {
 2068         int i;
 2069 
 2070         for (i = 0; i < nqsets; ++i) {
 2071                 TXQ_LOCK(&sc->sge.qs[i]);
 2072                 t3_free_qset(sc, &sc->sge.qs[i]);
 2073         }
 2074 }
 2075 
 2076 /**
 2077  *      t3_sge_start - enable SGE
 2078  *      @sc: the controller softc
 2079  *
 2080  *      Enables the SGE for DMAs.  This is the last step in starting packet
 2081  *      transfers.
 2082  */
 2083 void
 2084 t3_sge_start(adapter_t *sc)
 2085 {
 2086         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
 2087 }
 2088 
 2089 /**
 2090  *      t3_sge_stop - disable SGE operation
 2091  *      @sc: the adapter
 2092  *
 2093  *      Disables the DMA engine.  This can be called in emeregencies (e.g.,
 2094  *      from error interrupts) or from normal process context.  In the latter
 2095  *      case it also disables any pending queue restart tasklets.  Note that
 2096  *      if it is called in interrupt context it cannot disable the restart
 2097  *      tasklets as it cannot wait, however the tasklets will have no effect
 2098  *      since the doorbells are disabled and the driver will call this again
 2099  *      later from process context, at which time the tasklets will be stopped
 2100  *      if they are still running.
 2101  */
 2102 void
 2103 t3_sge_stop(adapter_t *sc)
 2104 {
 2105 
 2106         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
 2107 }
 2108 
 2109 /**
 2110  *      t3_free_tx_desc - reclaims Tx descriptors and their buffers
 2111  *      @adapter: the adapter
 2112  *      @q: the Tx queue to reclaim descriptors from
 2113  *      @reclaimable: the number of descriptors to reclaim
 2114  *      @m_vec_size: maximum number of buffers to reclaim
 2115  *      @desc_reclaimed: returns the number of descriptors reclaimed
 2116  *
 2117  *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 2118  *      Tx buffers.  Called with the Tx queue lock held.
 2119  *
 2120  *      Returns number of buffers of reclaimed   
 2121  */
 2122 void
 2123 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
 2124 {
 2125         struct tx_sw_desc *txsd;
 2126         unsigned int cidx, mask;
 2127         struct sge_txq *q = &qs->txq[queue];
 2128 
 2129 #ifdef T3_TRACE
 2130         T3_TRACE2(sc->tb[q->cntxt_id & 7],
 2131                   "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
 2132 #endif
 2133         cidx = q->cidx;
 2134         mask = q->size - 1;
 2135         txsd = &q->sdesc[cidx];
 2136 
 2137         mtx_assert(&qs->lock, MA_OWNED);
 2138         while (reclaimable--) {
 2139                 prefetch(q->sdesc[(cidx + 1) & mask].m);
 2140                 prefetch(q->sdesc[(cidx + 2) & mask].m);
 2141 
 2142                 if (txsd->m != NULL) {
 2143                         if (txsd->flags & TX_SW_DESC_MAPPED) {
 2144                                 bus_dmamap_unload(q->entry_tag, txsd->map);
 2145                                 txsd->flags &= ~TX_SW_DESC_MAPPED;
 2146                         }
 2147                         m_freem_list(txsd->m);
 2148                         txsd->m = NULL;
 2149                 } else
 2150                         q->txq_skipped++;
 2151                 
 2152                 ++txsd;
 2153                 if (++cidx == q->size) {
 2154                         cidx = 0;
 2155                         txsd = q->sdesc;
 2156                 }
 2157         }
 2158         q->cidx = cidx;
 2159 
 2160 }
 2161 
 2162 /**
 2163  *      is_new_response - check if a response is newly written
 2164  *      @r: the response descriptor
 2165  *      @q: the response queue
 2166  *
 2167  *      Returns true if a response descriptor contains a yet unprocessed
 2168  *      response.
 2169  */
 2170 static __inline int
 2171 is_new_response(const struct rsp_desc *r,
 2172     const struct sge_rspq *q)
 2173 {
 2174         return (r->intr_gen & F_RSPD_GEN2) == q->gen;
 2175 }
 2176 
 2177 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
 2178 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
 2179                         V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
 2180                         V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
 2181                         V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
 2182 
 2183 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
 2184 #define NOMEM_INTR_DELAY 2500
 2185 
 2186 #ifdef TCP_OFFLOAD
 2187 /**
 2188  *      write_ofld_wr - write an offload work request
 2189  *      @adap: the adapter
 2190  *      @m: the packet to send
 2191  *      @q: the Tx queue
 2192  *      @pidx: index of the first Tx descriptor to write
 2193  *      @gen: the generation value to use
 2194  *      @ndesc: number of descriptors the packet will occupy
 2195  *
 2196  *      Write an offload work request to send the supplied packet.  The packet
 2197  *      data already carry the work request with most fields populated.
 2198  */
 2199 static void
 2200 write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q,
 2201     unsigned int pidx, unsigned int gen, unsigned int ndesc)
 2202 {
 2203         unsigned int sgl_flits, flits;
 2204         int i, idx, nsegs, wrlen;
 2205         struct work_request_hdr *from;
 2206         struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1];
 2207         struct tx_desc *d = &q->desc[pidx];
 2208         struct txq_state txqs;
 2209         struct sglist_seg *segs;
 2210         struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2211         struct sglist *sgl;
 2212 
 2213         from = (void *)(oh + 1);        /* Start of WR within mbuf */
 2214         wrlen = m->m_len - sizeof(*oh);
 2215 
 2216         if (!(oh->flags & F_HDR_SGL)) {
 2217                 write_imm(d, (caddr_t)from, wrlen, gen);
 2218 
 2219                 /*
 2220                  * mbuf with "real" immediate tx data will be enqueue_wr'd by
 2221                  * t3_push_frames and freed in wr_ack.  Others, like those sent
 2222                  * down by close_conn, t3_send_reset, etc. should be freed here.
 2223                  */
 2224                 if (!(oh->flags & F_HDR_DF))
 2225                         m_free(m);
 2226                 return;
 2227         }
 2228 
 2229         memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from));
 2230 
 2231         sgl = oh->sgl;
 2232         flits = wrlen / 8;
 2233         sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl;
 2234 
 2235         nsegs = sgl->sg_nseg;
 2236         segs = sgl->sg_segs;
 2237         for (idx = 0, i = 0; i < nsegs; i++) {
 2238                 KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__));
 2239                 if (i && idx == 0) 
 2240                         ++sgp;
 2241                 sgp->len[idx] = htobe32(segs[i].ss_len);
 2242                 sgp->addr[idx] = htobe64(segs[i].ss_paddr);
 2243                 idx ^= 1;
 2244         }
 2245         if (idx) {
 2246                 sgp->len[idx] = 0;
 2247                 sgp->addr[idx] = 0;
 2248         }
 2249 
 2250         sgl_flits = sgl_len(nsegs);
 2251         txqs.gen = gen;
 2252         txqs.pidx = pidx;
 2253         txqs.compl = 0;
 2254 
 2255         write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits,
 2256             from->wrh_hi, from->wrh_lo);
 2257 }
 2258 
 2259 /**
 2260  *      ofld_xmit - send a packet through an offload queue
 2261  *      @adap: the adapter
 2262  *      @q: the Tx offload queue
 2263  *      @m: the packet
 2264  *
 2265  *      Send an offload packet through an SGE offload queue.
 2266  */
 2267 static int
 2268 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 2269 {
 2270         int ret;
 2271         unsigned int ndesc;
 2272         unsigned int pidx, gen;
 2273         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2274         struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2275 
 2276         ndesc = G_HDR_NDESC(oh->flags);
 2277 
 2278         TXQ_LOCK(qs);
 2279 again:  reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2280         ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
 2281         if (__predict_false(ret)) {
 2282                 if (ret == 1) {
 2283                         TXQ_UNLOCK(qs);
 2284                         return (EINTR);
 2285                 }
 2286                 goto again;
 2287         }
 2288 
 2289         gen = q->gen;
 2290         q->in_use += ndesc;
 2291         pidx = q->pidx;
 2292         q->pidx += ndesc;
 2293         if (q->pidx >= q->size) {
 2294                 q->pidx -= q->size;
 2295                 q->gen ^= 1;
 2296         }
 2297 
 2298         write_ofld_wr(adap, m, q, pidx, gen, ndesc);
 2299         check_ring_tx_db(adap, q, 1);
 2300         TXQ_UNLOCK(qs);
 2301 
 2302         return (0);
 2303 }
 2304 
 2305 /**
 2306  *      restart_offloadq - restart a suspended offload queue
 2307  *      @qs: the queue set cotaining the offload queue
 2308  *
 2309  *      Resumes transmission on a suspended Tx offload queue.
 2310  */
 2311 static void
 2312 restart_offloadq(void *data, int npending)
 2313 {
 2314         struct mbuf *m;
 2315         struct sge_qset *qs = data;
 2316         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2317         adapter_t *adap = qs->port->adapter;
 2318 
 2319         TXQ_LOCK(qs);
 2320 again:
 2321         while ((m = mbufq_first(&q->sendq)) != NULL) {
 2322                 unsigned int gen, pidx;
 2323                 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2324                 unsigned int ndesc = G_HDR_NDESC(oh->flags);
 2325 
 2326                 if (__predict_false(q->size - q->in_use < ndesc)) {
 2327                         setbit(&qs->txq_stopped, TXQ_OFLD);
 2328                         if (should_restart_tx(q) &&
 2329                             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
 2330                                 goto again;
 2331                         q->stops++;
 2332                         break;
 2333                 }
 2334 
 2335                 gen = q->gen;
 2336                 q->in_use += ndesc;
 2337                 pidx = q->pidx;
 2338                 q->pidx += ndesc;
 2339                 if (q->pidx >= q->size) {
 2340                         q->pidx -= q->size;
 2341                         q->gen ^= 1;
 2342                 }
 2343                 
 2344                 (void)mbufq_dequeue(&q->sendq);
 2345                 TXQ_UNLOCK(qs);
 2346                 write_ofld_wr(adap, m, q, pidx, gen, ndesc);
 2347                 TXQ_LOCK(qs);
 2348         }
 2349 #if USE_GTS
 2350         set_bit(TXQ_RUNNING, &q->flags);
 2351         set_bit(TXQ_LAST_PKT_DB, &q->flags);
 2352 #endif
 2353         TXQ_UNLOCK(qs);
 2354         wmb();
 2355         t3_write_reg(adap, A_SG_KDOORBELL,
 2356                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 2357 }
 2358 
 2359 /**
 2360  *      t3_offload_tx - send an offload packet
 2361  *      @m: the packet
 2362  *
 2363  *      Sends an offload packet.  We use the packet priority to select the
 2364  *      appropriate Tx queue as follows: bit 0 indicates whether the packet
 2365  *      should be sent as regular or control, bits 1-3 select the queue set.
 2366  */
 2367 int
 2368 t3_offload_tx(struct adapter *sc, struct mbuf *m)
 2369 {
 2370         struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
 2371         struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)];
 2372 
 2373         if (oh->flags & F_HDR_CTRL) {
 2374                 m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */
 2375                 return (ctrl_xmit(sc, qs, m));
 2376         } else
 2377                 return (ofld_xmit(sc, qs, m));
 2378 }
 2379 #endif
 2380 
 2381 static void
 2382 restart_tx(struct sge_qset *qs)
 2383 {
 2384         struct adapter *sc = qs->port->adapter;
 2385 
 2386         if (isset(&qs->txq_stopped, TXQ_OFLD) &&
 2387             should_restart_tx(&qs->txq[TXQ_OFLD]) &&
 2388             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
 2389                 qs->txq[TXQ_OFLD].restarts++;
 2390                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2391         }
 2392 
 2393         if (isset(&qs->txq_stopped, TXQ_CTRL) &&
 2394             should_restart_tx(&qs->txq[TXQ_CTRL]) &&
 2395             test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
 2396                 qs->txq[TXQ_CTRL].restarts++;
 2397                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2398         }
 2399 }
 2400 
 2401 /**
 2402  *      t3_sge_alloc_qset - initialize an SGE queue set
 2403  *      @sc: the controller softc
 2404  *      @id: the queue set id
 2405  *      @nports: how many Ethernet ports will be using this queue set
 2406  *      @irq_vec_idx: the IRQ vector index for response queue interrupts
 2407  *      @p: configuration parameters for this queue set
 2408  *      @ntxq: number of Tx queues for the queue set
 2409  *      @pi: port info for queue set
 2410  *
 2411  *      Allocate resources and initialize an SGE queue set.  A queue set
 2412  *      comprises a response queue, two Rx free-buffer queues, and up to 3
 2413  *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
 2414  *      queue, offload queue, and control queue.
 2415  */
 2416 int
 2417 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
 2418                   const struct qset_params *p, int ntxq, struct port_info *pi)
 2419 {
 2420         struct sge_qset *q = &sc->sge.qs[id];
 2421         int i, ret = 0;
 2422 
 2423         MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
 2424         q->port = pi;
 2425         q->adap = sc;
 2426 
 2427         if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
 2428             M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
 2429                 device_printf(sc->dev, "failed to allocate mbuf ring\n");
 2430                 goto err;
 2431         }
 2432         if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
 2433             M_NOWAIT | M_ZERO)) == NULL) {
 2434                 device_printf(sc->dev, "failed to allocate ifq\n");
 2435                 goto err;
 2436         }
 2437         ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);     
 2438         callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
 2439         callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
 2440         q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
 2441         q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
 2442 
 2443         init_qset_cntxt(q, id);
 2444         q->idx = id;
 2445         if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
 2446                     sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
 2447                     &q->fl[0].desc, &q->fl[0].sdesc,
 2448                     &q->fl[0].desc_tag, &q->fl[0].desc_map,
 2449                     sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
 2450                 printf("error %d from alloc ring fl0\n", ret);
 2451                 goto err;
 2452         }
 2453 
 2454         if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
 2455                     sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
 2456                     &q->fl[1].desc, &q->fl[1].sdesc,
 2457                     &q->fl[1].desc_tag, &q->fl[1].desc_map,
 2458                     sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
 2459                 printf("error %d from alloc ring fl1\n", ret);
 2460                 goto err;
 2461         }
 2462 
 2463         if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
 2464                     &q->rspq.phys_addr, &q->rspq.desc, NULL,
 2465                     &q->rspq.desc_tag, &q->rspq.desc_map,
 2466                     NULL, NULL)) != 0) {
 2467                 printf("error %d from alloc ring rspq\n", ret);
 2468                 goto err;
 2469         }
 2470 
 2471         snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
 2472             device_get_unit(sc->dev), irq_vec_idx);
 2473         MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
 2474 
 2475         for (i = 0; i < ntxq; ++i) {
 2476                 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
 2477 
 2478                 if ((ret = alloc_ring(sc, p->txq_size[i],
 2479                             sizeof(struct tx_desc), sz,
 2480                             &q->txq[i].phys_addr, &q->txq[i].desc,
 2481                             &q->txq[i].sdesc, &q->txq[i].desc_tag,
 2482                             &q->txq[i].desc_map,
 2483                             sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
 2484                         printf("error %d from alloc ring tx %i\n", ret, i);
 2485                         goto err;
 2486                 }
 2487                 mbufq_init(&q->txq[i].sendq, INT_MAX);
 2488                 q->txq[i].gen = 1;
 2489                 q->txq[i].size = p->txq_size[i];
 2490         }
 2491 
 2492 #ifdef TCP_OFFLOAD
 2493         TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
 2494 #endif
 2495         TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
 2496         TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2497         TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2498 
 2499         q->fl[0].gen = q->fl[1].gen = 1;
 2500         q->fl[0].size = p->fl_size;
 2501         q->fl[1].size = p->jumbo_size;
 2502 
 2503         q->rspq.gen = 1;
 2504         q->rspq.cidx = 0;
 2505         q->rspq.size = p->rspq_size;
 2506 
 2507         q->txq[TXQ_ETH].stop_thres = nports *
 2508             flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
 2509 
 2510         q->fl[0].buf_size = MCLBYTES;
 2511         q->fl[0].zone = zone_pack;
 2512         q->fl[0].type = EXT_PACKET;
 2513 
 2514         if (p->jumbo_buf_size ==  MJUM16BYTES) {
 2515                 q->fl[1].zone = zone_jumbo16;
 2516                 q->fl[1].type = EXT_JUMBO16;
 2517         } else if (p->jumbo_buf_size ==  MJUM9BYTES) {
 2518                 q->fl[1].zone = zone_jumbo9;
 2519                 q->fl[1].type = EXT_JUMBO9;             
 2520         } else if (p->jumbo_buf_size ==  MJUMPAGESIZE) {
 2521                 q->fl[1].zone = zone_jumbop;
 2522                 q->fl[1].type = EXT_JUMBOP;
 2523         } else {
 2524                 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
 2525                 ret = EDOOFUS;
 2526                 goto err;
 2527         }
 2528         q->fl[1].buf_size = p->jumbo_buf_size;
 2529 
 2530         /* Allocate and setup the lro_ctrl structure */
 2531         q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
 2532 #if defined(INET6) || defined(INET)
 2533         ret = tcp_lro_init(&q->lro.ctrl);
 2534         if (ret) {
 2535                 printf("error %d from tcp_lro_init\n", ret);
 2536                 goto err;
 2537         }
 2538 #endif
 2539         q->lro.ctrl.ifp = pi->ifp;
 2540 
 2541         mtx_lock_spin(&sc->sge.reg_lock);
 2542         ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
 2543                                    q->rspq.phys_addr, q->rspq.size,
 2544                                    q->fl[0].buf_size, 1, 0);
 2545         if (ret) {
 2546                 printf("error %d from t3_sge_init_rspcntxt\n", ret);
 2547                 goto err_unlock;
 2548         }
 2549 
 2550         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2551                 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
 2552                                           q->fl[i].phys_addr, q->fl[i].size,
 2553                                           q->fl[i].buf_size, p->cong_thres, 1,
 2554                                           0);
 2555                 if (ret) {
 2556                         printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
 2557                         goto err_unlock;
 2558                 }
 2559         }
 2560 
 2561         ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
 2562                                  SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
 2563                                  q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
 2564                                  1, 0);
 2565         if (ret) {
 2566                 printf("error %d from t3_sge_init_ecntxt\n", ret);
 2567                 goto err_unlock;
 2568         }
 2569 
 2570         if (ntxq > 1) {
 2571                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
 2572                                          USE_GTS, SGE_CNTXT_OFLD, id,
 2573                                          q->txq[TXQ_OFLD].phys_addr,
 2574                                          q->txq[TXQ_OFLD].size, 0, 1, 0);
 2575                 if (ret) {
 2576                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2577                         goto err_unlock;
 2578                 }
 2579         }
 2580 
 2581         if (ntxq > 2) {
 2582                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
 2583                                          SGE_CNTXT_CTRL, id,
 2584                                          q->txq[TXQ_CTRL].phys_addr,
 2585                                          q->txq[TXQ_CTRL].size,
 2586                                          q->txq[TXQ_CTRL].token, 1, 0);
 2587                 if (ret) {
 2588                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2589                         goto err_unlock;
 2590                 }
 2591         }
 2592 
 2593         mtx_unlock_spin(&sc->sge.reg_lock);
 2594         t3_update_qset_coalesce(q, p);
 2595 
 2596         refill_fl(sc, &q->fl[0], q->fl[0].size);
 2597         refill_fl(sc, &q->fl[1], q->fl[1].size);
 2598         refill_rspq(sc, &q->rspq, q->rspq.size - 1);
 2599 
 2600         t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
 2601                      V_NEWTIMER(q->rspq.holdoff_tmr));
 2602 
 2603         return (0);
 2604 
 2605 err_unlock:
 2606         mtx_unlock_spin(&sc->sge.reg_lock);
 2607 err:    
 2608         TXQ_LOCK(q);
 2609         t3_free_qset(sc, q);
 2610 
 2611         return (ret);
 2612 }
 2613 
 2614 /*
 2615  * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
 2616  * ethernet data.  Hardware assistance with various checksums and any vlan tag
 2617  * will also be taken into account here.
 2618  */
 2619 void
 2620 t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad)
 2621 {
 2622         struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
 2623         struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
 2624         struct ifnet *ifp = pi->ifp;
 2625         
 2626         if (cpl->vlan_valid) {
 2627                 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
 2628                 m->m_flags |= M_VLANTAG;
 2629         } 
 2630 
 2631         m->m_pkthdr.rcvif = ifp;
 2632         /*
 2633          * adjust after conversion to mbuf chain
 2634          */
 2635         m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
 2636         m->m_len -= (sizeof(*cpl) + ethpad);
 2637         m->m_data += (sizeof(*cpl) + ethpad);
 2638 
 2639         if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) {
 2640                 struct ether_header *eh = mtod(m, void *);
 2641                 uint16_t eh_type;
 2642 
 2643                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 2644                         struct ether_vlan_header *evh = mtod(m, void *);
 2645 
 2646                         eh_type = evh->evl_proto;
 2647                 } else
 2648                         eh_type = eh->ether_type;
 2649 
 2650                 if (ifp->if_capenable & IFCAP_RXCSUM &&
 2651                     eh_type == htons(ETHERTYPE_IP)) {
 2652                         m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
 2653                             CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 2654                         m->m_pkthdr.csum_data = 0xffff;
 2655                 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
 2656                     eh_type == htons(ETHERTYPE_IPV6)) {
 2657                         m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
 2658                             CSUM_PSEUDO_HDR);
 2659                         m->m_pkthdr.csum_data = 0xffff;
 2660                 }
 2661         }
 2662 }
 2663 
 2664 /**
 2665  *      get_packet - return the next ingress packet buffer from a free list
 2666  *      @adap: the adapter that received the packet
 2667  *      @drop_thres: # of remaining buffers before we start dropping packets
 2668  *      @qs: the qset that the SGE free list holding the packet belongs to
 2669  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
 2670  *      @r: response descriptor 
 2671  *
 2672  *      Get the next packet from a free list and complete setup of the
 2673  *      sk_buff.  If the packet is small we make a copy and recycle the
 2674  *      original buffer, otherwise we use the original buffer itself.  If a
 2675  *      positive drop threshold is supplied packets are dropped and their
 2676  *      buffers recycled if (a) the number of remaining buffers is under the
 2677  *      threshold and the packet is too big to copy, or (b) the packet should
 2678  *      be copied but there is no memory for the copy.
 2679  */
 2680 static int
 2681 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
 2682     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
 2683 {
 2684 
 2685         unsigned int len_cq =  ntohl(r->len_cq);
 2686         struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
 2687         int mask, cidx = fl->cidx;
 2688         struct rx_sw_desc *sd = &fl->sdesc[cidx];
 2689         uint32_t len = G_RSPD_LEN(len_cq);
 2690         uint32_t flags = M_EXT;
 2691         uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
 2692         caddr_t cl;
 2693         struct mbuf *m;
 2694         int ret = 0;
 2695 
 2696         mask = fl->size - 1;
 2697         prefetch(fl->sdesc[(cidx + 1) & mask].m);
 2698         prefetch(fl->sdesc[(cidx + 2) & mask].m);
 2699         prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
 2700         prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 
 2701 
 2702         fl->credits--;
 2703         bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
 2704         
 2705         if (recycle_enable && len <= SGE_RX_COPY_THRES &&
 2706             sopeop == RSPQ_SOP_EOP) {
 2707                 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
 2708                         goto skip_recycle;
 2709                 cl = mtod(m, void *);
 2710                 memcpy(cl, sd->rxsd_cl, len);
 2711                 recycle_rx_buf(adap, fl, fl->cidx);
 2712                 m->m_pkthdr.len = m->m_len = len;
 2713                 m->m_flags = 0;
 2714                 mh->mh_head = mh->mh_tail = m;
 2715                 ret = 1;
 2716                 goto done;
 2717         } else {
 2718         skip_recycle:
 2719                 bus_dmamap_unload(fl->entry_tag, sd->map);
 2720                 cl = sd->rxsd_cl;
 2721                 m = sd->m;
 2722 
 2723                 if ((sopeop == RSPQ_SOP_EOP) ||
 2724                     (sopeop == RSPQ_SOP))
 2725                         flags |= M_PKTHDR;
 2726                 m_init(m, M_NOWAIT, MT_DATA, flags);
 2727                 if (fl->zone == zone_pack) {
 2728                         /*
 2729                          * restore clobbered data pointer
 2730                          */
 2731                         m->m_data = m->m_ext.ext_buf;
 2732                 } else {
 2733                         m_cljset(m, cl, fl->type);
 2734                 }
 2735                 m->m_len = len;
 2736         }               
 2737         switch(sopeop) {
 2738         case RSPQ_SOP_EOP:
 2739                 ret = 1;
 2740                 /* FALLTHROUGH */
 2741         case RSPQ_SOP:
 2742                 mh->mh_head = mh->mh_tail = m;
 2743                 m->m_pkthdr.len = len;
 2744                 break;
 2745         case RSPQ_EOP:
 2746                 ret = 1;
 2747                 /* FALLTHROUGH */
 2748         case RSPQ_NSOP_NEOP:
 2749                 if (mh->mh_tail == NULL) {
 2750                         log(LOG_ERR, "discarding intermediate descriptor entry\n");
 2751                         m_freem(m);
 2752                         m = NULL;
 2753                         break;
 2754                 }
 2755                 mh->mh_tail->m_next = m;
 2756                 mh->mh_tail = m;
 2757                 mh->mh_head->m_pkthdr.len += len;
 2758                 break;
 2759         }
 2760         if (cxgb_debug && m != NULL)
 2761                 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
 2762 done:
 2763         if (++fl->cidx == fl->size)
 2764                 fl->cidx = 0;
 2765 
 2766         return (ret);
 2767 }
 2768 
 2769 /**
 2770  *      handle_rsp_cntrl_info - handles control information in a response
 2771  *      @qs: the queue set corresponding to the response
 2772  *      @flags: the response control flags
 2773  *
 2774  *      Handles the control information of an SGE response, such as GTS
 2775  *      indications and completion credits for the queue set's Tx queues.
 2776  *      HW coalesces credits, we don't do any extra SW coalescing.
 2777  */
 2778 static __inline void
 2779 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
 2780 {
 2781         unsigned int credits;
 2782 
 2783 #if USE_GTS
 2784         if (flags & F_RSPD_TXQ0_GTS)
 2785                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
 2786 #endif
 2787         credits = G_RSPD_TXQ0_CR(flags);
 2788         if (credits) 
 2789                 qs->txq[TXQ_ETH].processed += credits;
 2790 
 2791         credits = G_RSPD_TXQ2_CR(flags);
 2792         if (credits)
 2793                 qs->txq[TXQ_CTRL].processed += credits;
 2794 
 2795 # if USE_GTS
 2796         if (flags & F_RSPD_TXQ1_GTS)
 2797                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
 2798 # endif
 2799         credits = G_RSPD_TXQ1_CR(flags);
 2800         if (credits)
 2801                 qs->txq[TXQ_OFLD].processed += credits;
 2802 
 2803 }
 2804 
 2805 static void
 2806 check_ring_db(adapter_t *adap, struct sge_qset *qs,
 2807     unsigned int sleeping)
 2808 {
 2809         ;
 2810 }
 2811 
 2812 /**
 2813  *      process_responses - process responses from an SGE response queue
 2814  *      @adap: the adapter
 2815  *      @qs: the queue set to which the response queue belongs
 2816  *      @budget: how many responses can be processed in this round
 2817  *
 2818  *      Process responses from an SGE response queue up to the supplied budget.
 2819  *      Responses include received packets as well as credits and other events
 2820  *      for the queues that belong to the response queue's queue set.
 2821  *      A negative budget is effectively unlimited.
 2822  *
 2823  *      Additionally choose the interrupt holdoff time for the next interrupt
 2824  *      on this queue.  If the system is under memory shortage use a fairly
 2825  *      long delay to help recovery.
 2826  */
 2827 static int
 2828 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
 2829 {
 2830         struct sge_rspq *rspq = &qs->rspq;
 2831         struct rsp_desc *r = &rspq->desc[rspq->cidx];
 2832         int budget_left = budget;
 2833         unsigned int sleeping = 0;
 2834 #if defined(INET6) || defined(INET)
 2835         int lro_enabled = qs->lro.enabled;
 2836         int skip_lro;
 2837         struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
 2838 #endif
 2839         struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
 2840 #ifdef DEBUG    
 2841         static int last_holdoff = 0;
 2842         if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
 2843                 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
 2844                 last_holdoff = rspq->holdoff_tmr;
 2845         }
 2846 #endif
 2847         rspq->next_holdoff = rspq->holdoff_tmr;
 2848 
 2849         while (__predict_true(budget_left && is_new_response(r, rspq))) {
 2850                 int eth, eop = 0, ethpad = 0;
 2851                 uint32_t flags = ntohl(r->flags);
 2852                 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
 2853                 uint8_t opcode = r->rss_hdr.opcode;
 2854                 
 2855                 eth = (opcode == CPL_RX_PKT);
 2856                 
 2857                 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
 2858                         struct mbuf *m;
 2859 
 2860                         if (cxgb_debug)
 2861                                 printf("async notification\n");
 2862 
 2863                         if (mh->mh_head == NULL) {
 2864                                 mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA);
 2865                                 m = mh->mh_head;
 2866                         } else {
 2867                                 m = m_gethdr(M_NOWAIT, MT_DATA);
 2868                         }
 2869                         if (m == NULL)
 2870                                 goto no_mem;
 2871 
 2872                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
 2873                         m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
 2874                         *mtod(m, uint8_t *) = CPL_ASYNC_NOTIF;
 2875                         opcode = CPL_ASYNC_NOTIF;
 2876                         eop = 1;
 2877                         rspq->async_notif++;
 2878                         goto skip;
 2879                 } else if  (flags & F_RSPD_IMM_DATA_VALID) {
 2880                         struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
 2881 
 2882                         if (m == NULL) {        
 2883                 no_mem:
 2884                                 rspq->next_holdoff = NOMEM_INTR_DELAY;
 2885                                 budget_left--;
 2886                                 break;
 2887                         }
 2888                         if (mh->mh_head == NULL)
 2889                                 mh->mh_head = m;
 2890                         else 
 2891                                 mh->mh_tail->m_next = m;
 2892                         mh->mh_tail = m;
 2893 
 2894                         get_imm_packet(adap, r, m);
 2895                         mh->mh_head->m_pkthdr.len += m->m_len;
 2896                         eop = 1;
 2897                         rspq->imm_data++;
 2898                 } else if (r->len_cq) {
 2899                         int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
 2900                         
 2901                         eop = get_packet(adap, drop_thresh, qs, mh, r);
 2902                         if (eop) {
 2903                                 if (r->rss_hdr.hash_type && !adap->timestamp) {
 2904                                         M_HASHTYPE_SET(mh->mh_head,
 2905                                             M_HASHTYPE_OPAQUE_HASH);
 2906                                         mh->mh_head->m_pkthdr.flowid = rss_hash;
 2907                                 }
 2908                         }
 2909                         
 2910                         ethpad = 2;
 2911                 } else {
 2912                         rspq->pure_rsps++;
 2913                 }
 2914         skip:
 2915                 if (flags & RSPD_CTRL_MASK) {
 2916                         sleeping |= flags & RSPD_GTS_MASK;
 2917                         handle_rsp_cntrl_info(qs, flags);
 2918                 }
 2919 
 2920                 if (!eth && eop) {
 2921                         rspq->offload_pkts++;
 2922 #ifdef TCP_OFFLOAD
 2923                         adap->cpl_handler[opcode](qs, r, mh->mh_head);
 2924 #else
 2925                         m_freem(mh->mh_head);
 2926 #endif
 2927                         mh->mh_head = NULL;
 2928                 } else if (eth && eop) {
 2929                         struct mbuf *m = mh->mh_head;
 2930 
 2931                         t3_rx_eth(adap, m, ethpad);
 2932 
 2933                         /*
 2934                          * The T304 sends incoming packets on any qset.  If LRO
 2935                          * is also enabled, we could end up sending packet up
 2936                          * lro_ctrl->ifp's input.  That is incorrect.
 2937                          *
 2938                          * The mbuf's rcvif was derived from the cpl header and
 2939                          * is accurate.  Skip LRO and just use that.
 2940                          */
 2941 #if defined(INET6) || defined(INET)
 2942                         skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
 2943 
 2944                         if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
 2945                             && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
 2946                             ) {
 2947                                 /* successfully queue'd for LRO */
 2948                         } else
 2949 #endif
 2950                         {
 2951                                 /*
 2952                                  * LRO not enabled, packet unsuitable for LRO,
 2953                                  * or unable to queue.  Pass it up right now in
 2954                                  * either case.
 2955                                  */
 2956                                 struct ifnet *ifp = m->m_pkthdr.rcvif;
 2957                                 (*ifp->if_input)(ifp, m);
 2958                         }
 2959                         mh->mh_head = NULL;
 2960 
 2961                 }
 2962 
 2963                 r++;
 2964                 if (__predict_false(++rspq->cidx == rspq->size)) {
 2965                         rspq->cidx = 0;
 2966                         rspq->gen ^= 1;
 2967                         r = rspq->desc;
 2968                 }
 2969 
 2970                 if (++rspq->credits >= 64) {
 2971                         refill_rspq(adap, rspq, rspq->credits);
 2972                         rspq->credits = 0;
 2973                 }
 2974                 __refill_fl_lt(adap, &qs->fl[0], 32);
 2975                 __refill_fl_lt(adap, &qs->fl[1], 32);
 2976                 --budget_left;
 2977         }
 2978 
 2979 #if defined(INET6) || defined(INET)
 2980         /* Flush LRO */
 2981         tcp_lro_flush_all(lro_ctrl);
 2982 #endif
 2983 
 2984         if (sleeping)
 2985                 check_ring_db(adap, qs, sleeping);
 2986 
 2987         mb();  /* commit Tx queue processed updates */
 2988         if (__predict_false(qs->txq_stopped > 1))
 2989                 restart_tx(qs);
 2990 
 2991         __refill_fl_lt(adap, &qs->fl[0], 512);
 2992         __refill_fl_lt(adap, &qs->fl[1], 512);
 2993         budget -= budget_left;
 2994         return (budget);
 2995 }
 2996 
 2997 /*
 2998  * A helper function that processes responses and issues GTS.
 2999  */
 3000 static __inline int
 3001 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
 3002 {
 3003         int work;
 3004         static int last_holdoff = 0;
 3005         
 3006         work = process_responses(adap, rspq_to_qset(rq), -1);
 3007 
 3008         if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
 3009                 printf("next_holdoff=%d\n", rq->next_holdoff);
 3010                 last_holdoff = rq->next_holdoff;
 3011         }
 3012         t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
 3013             V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
 3014         
 3015         return (work);
 3016 }
 3017 
 3018 #ifdef DEBUGNET
 3019 int
 3020 cxgb_debugnet_poll_rx(adapter_t *adap, struct sge_qset *qs)
 3021 {
 3022 
 3023         return (process_responses_gts(adap, &qs->rspq));
 3024 }
 3025 #endif
 3026 
 3027 /*
 3028  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
 3029  * Handles data events from SGE response queues as well as error and other
 3030  * async events as they all use the same interrupt pin.  We use one SGE
 3031  * response queue per port in this mode and protect all response queues with
 3032  * queue 0's lock.
 3033  */
 3034 void
 3035 t3b_intr(void *data)
 3036 {
 3037         uint32_t i, map;
 3038         adapter_t *adap = data;
 3039         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3040         
 3041         t3_write_reg(adap, A_PL_CLI, 0);
 3042         map = t3_read_reg(adap, A_SG_DATA_INTR);
 3043 
 3044         if (!map) 
 3045                 return;
 3046 
 3047         if (__predict_false(map & F_ERRINTR)) {
 3048                 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
 3049                 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
 3050                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3051         }
 3052 
 3053         mtx_lock(&q0->lock);
 3054         for_each_port(adap, i)
 3055             if (map & (1 << i))
 3056                         process_responses_gts(adap, &adap->sge.qs[i].rspq);
 3057         mtx_unlock(&q0->lock);
 3058 }
 3059 
 3060 /*
 3061  * The MSI interrupt handler.  This needs to handle data events from SGE
 3062  * response queues as well as error and other async events as they all use
 3063  * the same MSI vector.  We use one SGE response queue per port in this mode
 3064  * and protect all response queues with queue 0's lock.
 3065  */
 3066 void
 3067 t3_intr_msi(void *data)
 3068 {
 3069         adapter_t *adap = data;
 3070         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3071         int i, new_packets = 0;
 3072 
 3073         mtx_lock(&q0->lock);
 3074 
 3075         for_each_port(adap, i)
 3076             if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 
 3077                     new_packets = 1;
 3078         mtx_unlock(&q0->lock);
 3079         if (new_packets == 0) {
 3080                 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
 3081                 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
 3082                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3083         }
 3084 }
 3085 
 3086 void
 3087 t3_intr_msix(void *data)
 3088 {
 3089         struct sge_qset *qs = data;
 3090         adapter_t *adap = qs->port->adapter;
 3091         struct sge_rspq *rspq = &qs->rspq;
 3092 
 3093         if (process_responses_gts(adap, rspq) == 0)
 3094                 rspq->unhandled_irqs++;
 3095 }
 3096 
 3097 #define QDUMP_SBUF_SIZE         32 * 400
 3098 static int
 3099 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
 3100 {
 3101         struct sge_rspq *rspq;
 3102         struct sge_qset *qs;
 3103         int i, err, dump_end, idx;
 3104         struct sbuf *sb;
 3105         struct rsp_desc *rspd;
 3106         uint32_t data[4];
 3107         
 3108         rspq = arg1;
 3109         qs = rspq_to_qset(rspq);
 3110         if (rspq->rspq_dump_count == 0) 
 3111                 return (0);
 3112         if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
 3113                 log(LOG_WARNING,
 3114                     "dump count is too large %d\n", rspq->rspq_dump_count);
 3115                 rspq->rspq_dump_count = 0;
 3116                 return (EINVAL);
 3117         }
 3118         if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
 3119                 log(LOG_WARNING,
 3120                     "dump start of %d is greater than queue size\n",
 3121                     rspq->rspq_dump_start);
 3122                 rspq->rspq_dump_start = 0;
 3123                 return (EINVAL);
 3124         }
 3125         err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
 3126         if (err)
 3127                 return (err);
 3128         err = sysctl_wire_old_buffer(req, 0);
 3129         if (err)
 3130                 return (err);
 3131         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3132 
 3133         sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
 3134             (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
 3135             ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
 3136         sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
 3137             ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
 3138         
 3139         sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
 3140             (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
 3141         
 3142         dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
 3143         for (i = rspq->rspq_dump_start; i < dump_end; i++) {
 3144                 idx = i & (RSPQ_Q_SIZE-1);
 3145                 
 3146                 rspd = &rspq->desc[idx];
 3147                 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
 3148                     idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
 3149                     rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
 3150                 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
 3151                     rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
 3152                     be32toh(rspd->len_cq), rspd->intr_gen);
 3153         }
 3154 
 3155         err = sbuf_finish(sb);
 3156         sbuf_delete(sb);
 3157         return (err);
 3158 }       
 3159 
 3160 static int
 3161 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
 3162 {
 3163         struct sge_txq *txq;
 3164         struct sge_qset *qs;
 3165         int i, j, err, dump_end;
 3166         struct sbuf *sb;
 3167         struct tx_desc *txd;
 3168         uint32_t *WR, wr_hi, wr_lo, gen;
 3169         uint32_t data[4];
 3170         
 3171         txq = arg1;
 3172         qs = txq_to_qset(txq, TXQ_ETH);
 3173         if (txq->txq_dump_count == 0) {
 3174                 return (0);
 3175         }
 3176         if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
 3177                 log(LOG_WARNING,
 3178                     "dump count is too large %d\n", txq->txq_dump_count);
 3179                 txq->txq_dump_count = 1;
 3180                 return (EINVAL);
 3181         }
 3182         if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
 3183                 log(LOG_WARNING,
 3184                     "dump start of %d is greater than queue size\n",
 3185                     txq->txq_dump_start);
 3186                 txq->txq_dump_start = 0;
 3187                 return (EINVAL);
 3188         }
 3189         err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
 3190         if (err)
 3191                 return (err);
 3192         err = sysctl_wire_old_buffer(req, 0);
 3193         if (err)
 3194                 return (err);
 3195         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3196 
 3197         sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
 3198             (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 
 3199             (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
 3200         sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
 3201             ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
 3202             ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
 3203         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3204             txq->txq_dump_start,
 3205             (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
 3206 
 3207         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3208         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3209                 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
 3210                 WR = (uint32_t *)txd->flit;
 3211                 wr_hi = ntohl(WR[0]);
 3212                 wr_lo = ntohl(WR[1]);           
 3213                 gen = G_WR_GEN(wr_lo);
 3214                 
 3215                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3216                     wr_hi, wr_lo, gen);
 3217                 for (j = 2; j < 30; j += 4) 
 3218                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3219                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3220 
 3221         }
 3222         err = sbuf_finish(sb);
 3223         sbuf_delete(sb);
 3224         return (err);
 3225 }
 3226 
 3227 static int
 3228 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
 3229 {
 3230         struct sge_txq *txq;
 3231         struct sge_qset *qs;
 3232         int i, j, err, dump_end;
 3233         struct sbuf *sb;
 3234         struct tx_desc *txd;
 3235         uint32_t *WR, wr_hi, wr_lo, gen;
 3236         
 3237         txq = arg1;
 3238         qs = txq_to_qset(txq, TXQ_CTRL);
 3239         if (txq->txq_dump_count == 0) {
 3240                 return (0);
 3241         }
 3242         if (txq->txq_dump_count > 256) {
 3243                 log(LOG_WARNING,
 3244                     "dump count is too large %d\n", txq->txq_dump_count);
 3245                 txq->txq_dump_count = 1;
 3246                 return (EINVAL);
 3247         }
 3248         if (txq->txq_dump_start > 255) {
 3249                 log(LOG_WARNING,
 3250                     "dump start of %d is greater than queue size\n",
 3251                     txq->txq_dump_start);
 3252                 txq->txq_dump_start = 0;
 3253                 return (EINVAL);
 3254         }
 3255 
 3256         err = sysctl_wire_old_buffer(req, 0);
 3257         if (err != 0)
 3258                 return (err);
 3259         sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
 3260         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3261             txq->txq_dump_start,
 3262             (txq->txq_dump_start + txq->txq_dump_count) & 255);
 3263 
 3264         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3265         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3266                 txd = &txq->desc[i & (255)];
 3267                 WR = (uint32_t *)txd->flit;
 3268                 wr_hi = ntohl(WR[0]);
 3269                 wr_lo = ntohl(WR[1]);           
 3270                 gen = G_WR_GEN(wr_lo);
 3271                 
 3272                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3273                     wr_hi, wr_lo, gen);
 3274                 for (j = 2; j < 30; j += 4) 
 3275                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3276                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3277 
 3278         }
 3279         err = sbuf_finish(sb);
 3280         sbuf_delete(sb);
 3281         return (err);
 3282 }
 3283 
 3284 static int
 3285 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
 3286 {
 3287         adapter_t *sc = arg1;
 3288         struct qset_params *qsp = &sc->params.sge.qset[0]; 
 3289         int coalesce_usecs;     
 3290         struct sge_qset *qs;
 3291         int i, j, err, nqsets = 0;
 3292         struct mtx *lock;
 3293 
 3294         if ((sc->flags & FULL_INIT_DONE) == 0)
 3295                 return (ENXIO);
 3296                 
 3297         coalesce_usecs = qsp->coalesce_usecs;
 3298         err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
 3299 
 3300         if (err != 0) {
 3301                 return (err);
 3302         }
 3303         if (coalesce_usecs == qsp->coalesce_usecs)
 3304                 return (0);
 3305 
 3306         for (i = 0; i < sc->params.nports; i++) 
 3307                 for (j = 0; j < sc->port[i].nqsets; j++)
 3308                         nqsets++;
 3309 
 3310         coalesce_usecs = max(1, coalesce_usecs);
 3311 
 3312         for (i = 0; i < nqsets; i++) {
 3313                 qs = &sc->sge.qs[i];
 3314                 qsp = &sc->params.sge.qset[i];
 3315                 qsp->coalesce_usecs = coalesce_usecs;
 3316                 
 3317                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 3318                             &sc->sge.qs[0].rspq.lock;
 3319 
 3320                 mtx_lock(lock);
 3321                 t3_update_qset_coalesce(qs, qsp);
 3322                 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
 3323                     V_NEWTIMER(qs->rspq.holdoff_tmr));
 3324                 mtx_unlock(lock);
 3325         }
 3326 
 3327         return (0);
 3328 }
 3329 
 3330 static int
 3331 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)
 3332 {
 3333         adapter_t *sc = arg1;
 3334         int rc, timestamp;
 3335 
 3336         if ((sc->flags & FULL_INIT_DONE) == 0)
 3337                 return (ENXIO);
 3338 
 3339         timestamp = sc->timestamp;
 3340         rc = sysctl_handle_int(oidp, &timestamp, arg2, req);
 3341 
 3342         if (rc != 0)
 3343                 return (rc);
 3344 
 3345         if (timestamp != sc->timestamp) {
 3346                 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS,
 3347                     timestamp ? F_ENABLERXPKTTMSTPRSS : 0);
 3348                 sc->timestamp = timestamp;
 3349         }
 3350 
 3351         return (0);
 3352 }
 3353 
 3354 void
 3355 t3_add_attach_sysctls(adapter_t *sc)
 3356 {
 3357         struct sysctl_ctx_list *ctx;
 3358         struct sysctl_oid_list *children;
 3359 
 3360         ctx = device_get_sysctl_ctx(sc->dev);
 3361         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3362 
 3363         /* random information */
 3364         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3365             "firmware_version",
 3366             CTLFLAG_RD, sc->fw_version,
 3367             0, "firmware version");
 3368         SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
 3369             "hw_revision",
 3370             CTLFLAG_RD, &sc->params.rev,
 3371             0, "chip model");
 3372         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3373             "port_types",
 3374             CTLFLAG_RD, sc->port_types,
 3375             0, "type of ports");
 3376         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3377             "enable_debug",
 3378             CTLFLAG_RW, &cxgb_debug,
 3379             0, "enable verbose debugging output");
 3380         SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce",
 3381             CTLFLAG_RD, &sc->tunq_coalesce,
 3382             "#tunneled packets freed");
 3383         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3384             "txq_overrun",
 3385             CTLFLAG_RD, &txq_fills,
 3386             0, "#times txq overrun");
 3387         SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
 3388             "core_clock",
 3389             CTLFLAG_RD, &sc->params.vpd.cclk,
 3390             0, "core clock frequency (in KHz)");
 3391 }
 3392 
 3393 
 3394 static const char *rspq_name = "rspq";
 3395 static const char *txq_names[] =
 3396 {
 3397         "txq_eth",
 3398         "txq_ofld",
 3399         "txq_ctrl"      
 3400 };
 3401 
 3402 static int
 3403 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
 3404 {
 3405         struct port_info *p = arg1;
 3406         uint64_t *parg;
 3407 
 3408         if (!p)
 3409                 return (EINVAL);
 3410 
 3411         cxgb_refresh_stats(p);
 3412         parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
 3413 
 3414         return (sysctl_handle_64(oidp, parg, 0, req));
 3415 }
 3416 
 3417 void
 3418 t3_add_configured_sysctls(adapter_t *sc)
 3419 {
 3420         struct sysctl_ctx_list *ctx;
 3421         struct sysctl_oid_list *children;
 3422         int i, j;
 3423         
 3424         ctx = device_get_sysctl_ctx(sc->dev);
 3425         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3426 
 3427         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3428             "intr_coal",
 3429             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
 3430             0, t3_set_coalesce_usecs,
 3431             "I", "interrupt coalescing timer (us)");
 3432 
 3433         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3434             "pkt_timestamp",
 3435             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
 3436             0, t3_pkt_timestamp,
 3437             "I", "provide packet timestamp instead of connection hash");
 3438 
 3439         for (i = 0; i < sc->params.nports; i++) {
 3440                 struct port_info *pi = &sc->port[i];
 3441                 struct sysctl_oid *poid;
 3442                 struct sysctl_oid_list *poidlist;
 3443                 struct mac_stats *mstats = &pi->mac.stats;
 3444                 
 3445                 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
 3446                 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 
 3447                     pi->namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
 3448                     "port statistics");
 3449                 poidlist = SYSCTL_CHILDREN(poid);
 3450                 SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO,
 3451                     "nqsets", CTLFLAG_RD, &pi->nqsets,
 3452                     0, "#queue sets");
 3453 
 3454                 for (j = 0; j < pi->nqsets; j++) {
 3455                         struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
 3456                         struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
 3457                                           *ctrlqpoid, *lropoid;
 3458                         struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
 3459                                                *txqpoidlist, *ctrlqpoidlist,
 3460                                                *lropoidlist;
 3461                         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 3462                         
 3463                         snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
 3464                         
 3465                         qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 
 3466                             qs->namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
 3467                             "qset statistics");
 3468                         qspoidlist = SYSCTL_CHILDREN(qspoid);
 3469 
 3470                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
 3471                                         CTLFLAG_RD, &qs->fl[0].empty, 0,
 3472                                         "freelist #0 empty");
 3473                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
 3474                                         CTLFLAG_RD, &qs->fl[1].empty, 0,
 3475                                         "freelist #1 empty");
 3476 
 3477                         rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3478                             rspq_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
 3479                             "rspq statistics");
 3480                         rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
 3481 
 3482                         txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3483                             txq_names[0], CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
 3484                             "txq statistics");
 3485                         txqpoidlist = SYSCTL_CHILDREN(txqpoid);
 3486 
 3487                         ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3488                             txq_names[2], CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
 3489                             "ctrlq statistics");
 3490                         ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
 3491 
 3492                         lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3493                             "lro_stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
 3494                             "LRO statistics");
 3495                         lropoidlist = SYSCTL_CHILDREN(lropoid);
 3496 
 3497                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
 3498                             CTLFLAG_RD, &qs->rspq.size,
 3499                             0, "#entries in response queue");
 3500                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
 3501                             CTLFLAG_RD, &qs->rspq.cidx,
 3502                             0, "consumer index");
 3503                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
 3504                             CTLFLAG_RD, &qs->rspq.credits,
 3505                             0, "#credits");
 3506                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
 3507                             CTLFLAG_RD, &qs->rspq.starved,
 3508                             0, "#times starved");
 3509                         SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
 3510                             CTLFLAG_RD, &qs->rspq.phys_addr,
 3511                             "physical_address_of the queue");
 3512                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
 3513                             CTLFLAG_RW, &qs->rspq.rspq_dump_start,
 3514                             0, "start rspq dump entry");
 3515                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
 3516                             CTLFLAG_RW, &qs->rspq.rspq_dump_count,
 3517                             0, "#rspq entries to dump");
 3518                         SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
 3519                             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
 3520                             &qs->rspq, 0, t3_dump_rspq, "A",
 3521                             "dump of the response queue");
 3522 
 3523                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
 3524                             CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
 3525                             "#tunneled packets dropped");
 3526                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
 3527                             CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.mq_len,
 3528                             0, "#tunneled packets waiting to be sent");
 3529 #if 0                   
 3530                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
 3531                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
 3532                             0, "#tunneled packets queue producer index");
 3533                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
 3534                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
 3535                             0, "#tunneled packets queue consumer index");
 3536 #endif                  
 3537                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed",
 3538                             CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
 3539                             0, "#tunneled packets processed by the card");
 3540                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
 3541                             CTLFLAG_RD, &txq->cleaned,
 3542                             0, "#tunneled packets cleaned");
 3543                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
 3544                             CTLFLAG_RD, &txq->in_use,
 3545                             0, "#tunneled packet slots in use");
 3546                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees",
 3547                             CTLFLAG_RD, &txq->txq_frees,
 3548                             "#tunneled packets freed");
 3549                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
 3550                             CTLFLAG_RD, &txq->txq_skipped,
 3551                             0, "#tunneled packet descriptors skipped");
 3552                         SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
 3553                             CTLFLAG_RD, &txq->txq_coalesced,
 3554                             "#tunneled packets coalesced");
 3555                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
 3556                             CTLFLAG_RD, &txq->txq_enqueued,
 3557                             0, "#tunneled packets enqueued to hardware");
 3558                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
 3559                             CTLFLAG_RD, &qs->txq_stopped,
 3560                             0, "tx queues stopped");
 3561                         SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr",
 3562                             CTLFLAG_RD, &txq->phys_addr,
 3563                             "physical_address_of the queue");
 3564                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
 3565                             CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
 3566                             0, "txq generation");
 3567                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
 3568                             CTLFLAG_RD, &txq->cidx,
 3569                             0, "hardware queue cidx");                  
 3570                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
 3571                             CTLFLAG_RD, &txq->pidx,
 3572                             0, "hardware queue pidx");
 3573                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
 3574                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
 3575                             0, "txq start idx for dump");
 3576                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
 3577                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
 3578                             0, "txq #entries to dump");                 
 3579                         SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
 3580                             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
 3581                             &qs->txq[TXQ_ETH], 0, t3_dump_txq_eth, "A",
 3582                             "dump of the transmit queue");
 3583 
 3584                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
 3585                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
 3586                             0, "ctrlq start idx for dump");
 3587                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
 3588                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
 3589                             0, "ctrl #entries to dump");                        
 3590                         SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
 3591                             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
 3592                             &qs->txq[TXQ_CTRL], 0, t3_dump_txq_ctrl, "A",
 3593                             "dump of the transmit queue");
 3594 
 3595                         SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_queued",
 3596                             CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
 3597                         SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_flushed",
 3598                             CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
 3599                         SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
 3600                             CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
 3601                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
 3602                             CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
 3603                 }
 3604 
 3605                 /* Now add a node for mac stats. */
 3606                 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
 3607                     CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "MAC statistics");
 3608                 poidlist = SYSCTL_CHILDREN(poid);
 3609 
 3610                 /*
 3611                  * We (ab)use the length argument (arg2) to pass on the offset
 3612                  * of the data that we are interested in.  This is only required
 3613                  * for the quad counters that are updated from the hardware (we
 3614                  * make sure that we return the latest value).
 3615                  * sysctl_handle_macstat first updates *all* the counters from
 3616                  * the hardware, and then returns the latest value of the
 3617                  * requested counter.  Best would be to update only the
 3618                  * requested counter from hardware, but t3_mac_update_stats()
 3619                  * hides all the register details and we don't want to dive into
 3620                  * all that here.
 3621                  */
 3622 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
 3623     CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pi, \
 3624     offsetof(struct mac_stats, a), sysctl_handle_macstat, "QU", 0)
 3625                 CXGB_SYSCTL_ADD_QUAD(tx_octets);
 3626                 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
 3627                 CXGB_SYSCTL_ADD_QUAD(tx_frames);
 3628                 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
 3629                 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
 3630                 CXGB_SYSCTL_ADD_QUAD(tx_pause);
 3631                 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
 3632                 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
 3633                 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
 3634                 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
 3635                 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
 3636                 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
 3637                 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
 3638                 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
 3639                 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
 3640                 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
 3641                 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
 3642                 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
 3643                 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
 3644                 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
 3645                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
 3646                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
 3647                 CXGB_SYSCTL_ADD_QUAD(rx_octets);
 3648                 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
 3649                 CXGB_SYSCTL_ADD_QUAD(rx_frames);
 3650                 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
 3651                 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
 3652                 CXGB_SYSCTL_ADD_QUAD(rx_pause);
 3653                 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
 3654                 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
 3655                 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
 3656                 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
 3657                 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
 3658                 CXGB_SYSCTL_ADD_QUAD(rx_runt);
 3659                 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
 3660                 CXGB_SYSCTL_ADD_QUAD(rx_short);
 3661                 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
 3662                 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
 3663                 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
 3664                 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
 3665                 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
 3666                 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
 3667                 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
 3668                 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
 3669                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
 3670                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
 3671 #undef CXGB_SYSCTL_ADD_QUAD
 3672 
 3673 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
 3674     CTLFLAG_RD, &mstats->a, 0)
 3675                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
 3676                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
 3677                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
 3678                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
 3679                 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
 3680                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
 3681                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
 3682                 CXGB_SYSCTL_ADD_ULONG(num_toggled);
 3683                 CXGB_SYSCTL_ADD_ULONG(num_resets);
 3684                 CXGB_SYSCTL_ADD_ULONG(link_faults);
 3685 #undef CXGB_SYSCTL_ADD_ULONG
 3686         }
 3687 }
 3688         
 3689 /**
 3690  *      t3_get_desc - dump an SGE descriptor for debugging purposes
 3691  *      @qs: the queue set
 3692  *      @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
 3693  *      @idx: the descriptor index in the queue
 3694  *      @data: where to dump the descriptor contents
 3695  *
 3696  *      Dumps the contents of a HW descriptor of an SGE queue.  Returns the
 3697  *      size of the descriptor.
 3698  */
 3699 int
 3700 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
 3701                 unsigned char *data)
 3702 {
 3703         if (qnum >= 6)
 3704                 return (EINVAL);
 3705 
 3706         if (qnum < 3) {
 3707                 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
 3708                         return -EINVAL;
 3709                 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
 3710                 return sizeof(struct tx_desc);
 3711         }
 3712 
 3713         if (qnum == 3) {
 3714                 if (!qs->rspq.desc || idx >= qs->rspq.size)
 3715                         return (EINVAL);
 3716                 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
 3717                 return sizeof(struct rsp_desc);
 3718         }
 3719 
 3720         qnum -= 4;
 3721         if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
 3722                 return (EINVAL);
 3723         memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
 3724         return sizeof(struct rx_desc);
 3725 }

Cache object: c62edcb4a7003a1a5c851e29f30286e6


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.