The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/cxgb/cxgb_sge.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /**************************************************************************
    2 
    3 Copyright (c) 2007-2009, Chelsio Inc.
    4 All rights reserved.
    5 
    6 Redistribution and use in source and binary forms, with or without
    7 modification, are permitted provided that the following conditions are met:
    8 
    9  1. Redistributions of source code must retain the above copyright notice,
   10     this list of conditions and the following disclaimer.
   11 
   12  2. Neither the name of the Chelsio Corporation nor the names of its
   13     contributors may be used to endorse or promote products derived from
   14     this software without specific prior written permission.
   15  
   16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   26 POSSIBILITY OF SUCH DAMAGE.
   27 
   28 ***************************************************************************/
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD: releng/8.0/sys/dev/cxgb/cxgb_sge.c 195512 2009-07-09 19:27:58Z np $");
   32 
   33 #include <sys/param.h>
   34 #include <sys/systm.h>
   35 #include <sys/kernel.h>
   36 #include <sys/module.h>
   37 #include <sys/bus.h>
   38 #include <sys/conf.h>
   39 #include <machine/bus.h>
   40 #include <machine/resource.h>
   41 #include <sys/bus_dma.h>
   42 #include <sys/rman.h>
   43 #include <sys/queue.h>
   44 #include <sys/sysctl.h>
   45 #include <sys/taskqueue.h>
   46 
   47 #include <sys/proc.h>
   48 #include <sys/sbuf.h>
   49 #include <sys/sched.h>
   50 #include <sys/smp.h>
   51 #include <sys/systm.h>
   52 #include <sys/syslog.h>
   53 
   54 #include <net/bpf.h>    
   55 
   56 #include <netinet/in_systm.h>
   57 #include <netinet/in.h>
   58 #include <netinet/ip.h>
   59 #include <netinet/tcp.h>
   60 
   61 #include <dev/pci/pcireg.h>
   62 #include <dev/pci/pcivar.h>
   63 
   64 #include <vm/vm.h>
   65 #include <vm/pmap.h>
   66 
   67 #include <cxgb_include.h>
   68 #include <sys/mvec.h>
   69 
   70 int     txq_fills = 0;
   71 int     multiq_tx_enable = 1;
   72 
   73 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
   74 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
   75 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
   76 SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
   77     "size of per-queue mbuf ring");
   78 
   79 static int cxgb_tx_coalesce_force = 0;
   80 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force);
   81 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW,
   82     &cxgb_tx_coalesce_force, 0,
   83     "coalesce small packets into a single work request regardless of ring state");
   84 
   85 #define COALESCE_START_DEFAULT          TX_ETH_Q_SIZE>>1
   86 #define COALESCE_START_MAX              (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
   87 #define COALESCE_STOP_DEFAULT           TX_ETH_Q_SIZE>>2
   88 #define COALESCE_STOP_MIN               TX_ETH_Q_SIZE>>5
   89 #define TX_RECLAIM_DEFAULT              TX_ETH_Q_SIZE>>5
   90 #define TX_RECLAIM_MAX                  TX_ETH_Q_SIZE>>2
   91 #define TX_RECLAIM_MIN                  TX_ETH_Q_SIZE>>6
   92 
   93 
   94 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
   95 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start",
   96     &cxgb_tx_coalesce_enable_start);
   97 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW,
   98     &cxgb_tx_coalesce_enable_start, 0,
   99     "coalesce enable threshold");
  100 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
  101 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop);
  102 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW,
  103     &cxgb_tx_coalesce_enable_stop, 0,
  104     "coalesce disable threshold");
  105 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  106 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold);
  107 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW,
  108     &cxgb_tx_reclaim_threshold, 0,
  109     "tx cleaning minimum threshold");
  110 
  111 /*
  112  * XXX don't re-enable this until TOE stops assuming
  113  * we have an m_ext
  114  */
  115 static int recycle_enable = 0;
  116 int cxgb_ext_freed = 0;
  117 int cxgb_ext_inited = 0;
  118 int fl_q_size = 0;
  119 int jumbo_q_size = 0;
  120 
  121 extern int cxgb_use_16k_clusters;
  122 extern int nmbjumbo4;
  123 extern int nmbjumbo9;
  124 extern int nmbjumbo16;
  125 
  126 #define USE_GTS 0
  127 
  128 #define SGE_RX_SM_BUF_SIZE      1536
  129 #define SGE_RX_DROP_THRES       16
  130 #define SGE_RX_COPY_THRES       128
  131 
  132 /*
  133  * Period of the Tx buffer reclaim timer.  This timer does not need to run
  134  * frequently as Tx buffers are usually reclaimed by new Tx packets.
  135  */
  136 #define TX_RECLAIM_PERIOD       (hz >> 1)
  137 
  138 /* 
  139  * Values for sge_txq.flags
  140  */
  141 enum {
  142         TXQ_RUNNING     = 1 << 0,  /* fetch engine is running */
  143         TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
  144 };
  145 
  146 struct tx_desc {
  147         uint64_t        flit[TX_DESC_FLITS];
  148 } __packed;
  149 
  150 struct rx_desc {
  151         uint32_t        addr_lo;
  152         uint32_t        len_gen;
  153         uint32_t        gen2;
  154         uint32_t        addr_hi;
  155 } __packed;;
  156 
  157 struct rsp_desc {               /* response queue descriptor */
  158         struct rss_header       rss_hdr;
  159         uint32_t                flags;
  160         uint32_t                len_cq;
  161         uint8_t                 imm_data[47];
  162         uint8_t                 intr_gen;
  163 } __packed;
  164 
  165 #define RX_SW_DESC_MAP_CREATED  (1 << 0)
  166 #define TX_SW_DESC_MAP_CREATED  (1 << 1)
  167 #define RX_SW_DESC_INUSE        (1 << 3)
  168 #define TX_SW_DESC_MAPPED       (1 << 4)
  169 
  170 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
  171 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
  172 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
  173 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
  174 
  175 struct tx_sw_desc {                /* SW state per Tx descriptor */
  176         struct mbuf     *m;
  177         bus_dmamap_t    map;
  178         int             flags;
  179 };
  180 
  181 struct rx_sw_desc {                /* SW state per Rx descriptor */
  182         caddr_t         rxsd_cl;
  183         struct mbuf     *m;
  184         bus_dmamap_t    map;
  185         int             flags;
  186 };
  187 
  188 struct txq_state {
  189         unsigned int    compl;
  190         unsigned int    gen;
  191         unsigned int    pidx;
  192 };
  193 
  194 struct refill_fl_cb_arg {
  195         int               error;
  196         bus_dma_segment_t seg;
  197         int               nseg;
  198 };
  199 
  200 
  201 /*
  202  * Maps a number of flits to the number of Tx descriptors that can hold them.
  203  * The formula is
  204  *
  205  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
  206  *
  207  * HW allows up to 4 descriptors to be combined into a WR.
  208  */
  209 static uint8_t flit_desc_map[] = {
  210         0,
  211 #if SGE_NUM_GENBITS == 1
  212         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  213         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  214         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  215         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
  216 #elif SGE_NUM_GENBITS == 2
  217         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  218         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  219         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  220         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  221 #else
  222 # error "SGE_NUM_GENBITS must be 1 or 2"
  223 #endif
  224 };
  225 
  226 #define TXQ_LOCK_ASSERT(qs)     mtx_assert(&(qs)->lock, MA_OWNED)
  227 #define TXQ_TRYLOCK(qs)         mtx_trylock(&(qs)->lock)        
  228 #define TXQ_LOCK(qs)            mtx_lock(&(qs)->lock)   
  229 #define TXQ_UNLOCK(qs)          mtx_unlock(&(qs)->lock) 
  230 #define TXQ_RING_EMPTY(qs)      drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  231 #define TXQ_RING_FLUSH(qs)      drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  232 #define TXQ_RING_DEQUEUE_COND(qs, func, arg)                            \
  233         drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
  234 #define TXQ_RING_DEQUEUE(qs) \
  235         drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
  236 
  237 int cxgb_debug = 0;
  238 
  239 static void sge_timer_cb(void *arg);
  240 static void sge_timer_reclaim(void *arg, int ncount);
  241 static void sge_txq_reclaim_handler(void *arg, int ncount);
  242 static void cxgb_start_locked(struct sge_qset *qs);
  243 
  244 /*
  245  * XXX need to cope with bursty scheduling by looking at a wider
  246  * window than we are now for determining the need for coalescing
  247  *
  248  */
  249 static __inline uint64_t
  250 check_pkt_coalesce(struct sge_qset *qs) 
  251 { 
  252         struct adapter *sc; 
  253         struct sge_txq *txq; 
  254         uint8_t *fill;
  255 
  256         if (__predict_false(cxgb_tx_coalesce_force))
  257                 return (1);
  258         txq = &qs->txq[TXQ_ETH]; 
  259         sc = qs->port->adapter; 
  260         fill = &sc->tunq_fill[qs->idx];
  261 
  262         if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
  263                 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
  264         if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
  265                 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
  266         /*
  267          * if the hardware transmit queue is more than 1/8 full
  268          * we mark it as coalescing - we drop back from coalescing
  269          * when we go below 1/32 full and there are no packets enqueued, 
  270          * this provides us with some degree of hysteresis
  271          */
  272         if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
  273             TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
  274                 *fill = 0; 
  275         else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
  276                 *fill = 1; 
  277 
  278         return (sc->tunq_coalesce);
  279 } 
  280 
  281 #ifdef __LP64__
  282 static void
  283 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  284 {
  285         uint64_t wr_hilo;
  286 #if _BYTE_ORDER == _LITTLE_ENDIAN
  287         wr_hilo = wr_hi;
  288         wr_hilo |= (((uint64_t)wr_lo)<<32);
  289 #else
  290         wr_hilo = wr_lo;
  291         wr_hilo |= (((uint64_t)wr_hi)<<32);
  292 #endif  
  293         wrp->wrh_hilo = wr_hilo;
  294 }
  295 #else
  296 static void
  297 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
  298 {
  299 
  300         wrp->wrh_hi = wr_hi;
  301         wmb();
  302         wrp->wrh_lo = wr_lo;
  303 }
  304 #endif
  305 
  306 struct coalesce_info {
  307         int count;
  308         int nbytes;
  309 };
  310 
  311 static int
  312 coalesce_check(struct mbuf *m, void *arg)
  313 {
  314         struct coalesce_info *ci = arg;
  315         int *count = &ci->count;
  316         int *nbytes = &ci->nbytes;
  317 
  318         if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
  319                 (*count < 7) && (m->m_next == NULL))) {
  320                 *count += 1;
  321                 *nbytes += m->m_len;
  322                 return (1);
  323         }
  324         return (0);
  325 }
  326 
  327 static struct mbuf *
  328 cxgb_dequeue(struct sge_qset *qs)
  329 {
  330         struct mbuf *m, *m_head, *m_tail;
  331         struct coalesce_info ci;
  332 
  333         
  334         if (check_pkt_coalesce(qs) == 0) 
  335                 return TXQ_RING_DEQUEUE(qs);
  336 
  337         m_head = m_tail = NULL;
  338         ci.count = ci.nbytes = 0;
  339         do {
  340                 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
  341                 if (m_head == NULL) {
  342                         m_tail = m_head = m;
  343                 } else if (m != NULL) {
  344                         m_tail->m_nextpkt = m;
  345                         m_tail = m;
  346                 }
  347         } while (m != NULL);
  348         if (ci.count > 7)
  349                 panic("trying to coalesce %d packets in to one WR", ci.count);
  350         return (m_head);
  351 }
  352         
  353 /**
  354  *      reclaim_completed_tx - reclaims completed Tx descriptors
  355  *      @adapter: the adapter
  356  *      @q: the Tx queue to reclaim completed descriptors from
  357  *
  358  *      Reclaims Tx descriptors that the SGE has indicated it has processed,
  359  *      and frees the associated buffers if possible.  Called with the Tx
  360  *      queue's lock held.
  361  */
  362 static __inline int
  363 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
  364 {
  365         struct sge_txq *q = &qs->txq[queue];
  366         int reclaim = desc_reclaimable(q);
  367 
  368         if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
  369             (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
  370                 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
  371 
  372         if (reclaim < reclaim_min)
  373                 return (0);
  374 
  375         mtx_assert(&qs->lock, MA_OWNED);
  376         if (reclaim > 0) {
  377                 t3_free_tx_desc(qs, reclaim, queue);
  378                 q->cleaned += reclaim;
  379                 q->in_use -= reclaim;
  380         }
  381         if (isset(&qs->txq_stopped, TXQ_ETH))
  382                 clrbit(&qs->txq_stopped, TXQ_ETH);
  383 
  384         return (reclaim);
  385 }
  386 
  387 /**
  388  *      should_restart_tx - are there enough resources to restart a Tx queue?
  389  *      @q: the Tx queue
  390  *
  391  *      Checks if there are enough descriptors to restart a suspended Tx queue.
  392  */
  393 static __inline int
  394 should_restart_tx(const struct sge_txq *q)
  395 {
  396         unsigned int r = q->processed - q->cleaned;
  397 
  398         return q->in_use - r < (q->size >> 1);
  399 }
  400 
  401 /**
  402  *      t3_sge_init - initialize SGE
  403  *      @adap: the adapter
  404  *      @p: the SGE parameters
  405  *
  406  *      Performs SGE initialization needed every time after a chip reset.
  407  *      We do not initialize any of the queue sets here, instead the driver
  408  *      top-level must request those individually.  We also do not enable DMA
  409  *      here, that should be done after the queues have been set up.
  410  */
  411 void
  412 t3_sge_init(adapter_t *adap, struct sge_params *p)
  413 {
  414         u_int ctrl, ups;
  415 
  416         ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
  417 
  418         ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
  419                F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
  420                V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
  421                V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
  422 #if SGE_NUM_GENBITS == 1
  423         ctrl |= F_EGRGENCTRL;
  424 #endif
  425         if (adap->params.rev > 0) {
  426                 if (!(adap->flags & (USING_MSIX | USING_MSI)))
  427                         ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
  428         }
  429         t3_write_reg(adap, A_SG_CONTROL, ctrl);
  430         t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
  431                      V_LORCQDRBTHRSH(512));
  432         t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
  433         t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
  434                      V_TIMEOUT(200 * core_ticks_per_usec(adap)));
  435         t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
  436                      adap->params.rev < T3_REV_C ? 1000 : 500);
  437         t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
  438         t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
  439         t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
  440         t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
  441         t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
  442 }
  443 
  444 
  445 /**
  446  *      sgl_len - calculates the size of an SGL of the given capacity
  447  *      @n: the number of SGL entries
  448  *
  449  *      Calculates the number of flits needed for a scatter/gather list that
  450  *      can hold the given number of entries.
  451  */
  452 static __inline unsigned int
  453 sgl_len(unsigned int n)
  454 {
  455         return ((3 * n) / 2 + (n & 1));
  456 }
  457 
  458 /**
  459  *      get_imm_packet - return the next ingress packet buffer from a response
  460  *      @resp: the response descriptor containing the packet data
  461  *
  462  *      Return a packet containing the immediate data of the given response.
  463  */
  464 static int
  465 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
  466 {
  467 
  468         m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
  469         m->m_ext.ext_buf = NULL;
  470         m->m_ext.ext_type = 0;
  471         memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 
  472         return (0);     
  473 }
  474 
  475 static __inline u_int
  476 flits_to_desc(u_int n)
  477 {
  478         return (flit_desc_map[n]);
  479 }
  480 
  481 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
  482                     F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
  483                     V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
  484                     F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
  485                     F_HIRCQPARITYERROR)
  486 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
  487 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
  488                       F_RSPQDISABLED)
  489 
  490 /**
  491  *      t3_sge_err_intr_handler - SGE async event interrupt handler
  492  *      @adapter: the adapter
  493  *
  494  *      Interrupt handler for SGE asynchronous (non-data) events.
  495  */
  496 void
  497 t3_sge_err_intr_handler(adapter_t *adapter)
  498 {
  499         unsigned int v, status;
  500 
  501         status = t3_read_reg(adapter, A_SG_INT_CAUSE);
  502         if (status & SGE_PARERR)
  503                 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
  504                          status & SGE_PARERR);
  505         if (status & SGE_FRAMINGERR)
  506                 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
  507                          status & SGE_FRAMINGERR);
  508         if (status & F_RSPQCREDITOVERFOW)
  509                 CH_ALERT(adapter, "SGE response queue credit overflow\n");
  510 
  511         if (status & F_RSPQDISABLED) {
  512                 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
  513 
  514                 CH_ALERT(adapter,
  515                          "packet delivered to disabled response queue (0x%x)\n",
  516                          (v >> S_RSPQ0DISABLED) & 0xff);
  517         }
  518 
  519         t3_write_reg(adapter, A_SG_INT_CAUSE, status);
  520         if (status & SGE_FATALERR)
  521                 t3_fatal_err(adapter);
  522 }
  523 
  524 void
  525 t3_sge_prep(adapter_t *adap, struct sge_params *p)
  526 {
  527         int i, nqsets;
  528 
  529         nqsets = min(SGE_QSETS, mp_ncpus*4);
  530 
  531         fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
  532 
  533         while (!powerof2(fl_q_size))
  534                 fl_q_size--;
  535 #if __FreeBSD_version >= 700111
  536         if (cxgb_use_16k_clusters) 
  537                 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
  538         else
  539                 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
  540 #else
  541         jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE);
  542 #endif
  543         while (!powerof2(jumbo_q_size))
  544                 jumbo_q_size--;         
  545         
  546         /* XXX Does ETHER_ALIGN need to be accounted for here? */
  547         p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data);
  548 
  549         for (i = 0; i < SGE_QSETS; ++i) {
  550                 struct qset_params *q = p->qset + i;
  551 
  552                 if (adap->params.nports > 2) {
  553                         q->coalesce_usecs = 50;
  554                 } else {
  555 #ifdef INVARIANTS                       
  556                         q->coalesce_usecs = 10;
  557 #else
  558                         q->coalesce_usecs = 5;
  559 #endif                  
  560                 }
  561                 q->polling = 0;
  562                 q->rspq_size = RSPQ_Q_SIZE;
  563                 q->fl_size = fl_q_size;
  564                 q->jumbo_size = jumbo_q_size;
  565                 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
  566                 q->txq_size[TXQ_OFLD] = 1024;
  567                 q->txq_size[TXQ_CTRL] = 256;
  568                 q->cong_thres = 0;
  569         }
  570 }
  571 
  572 int
  573 t3_sge_alloc(adapter_t *sc)
  574 {
  575 
  576         /* The parent tag. */
  577         if (bus_dma_tag_create( NULL,                   /* parent */
  578                                 1, 0,                   /* algnmnt, boundary */
  579                                 BUS_SPACE_MAXADDR,      /* lowaddr */
  580                                 BUS_SPACE_MAXADDR,      /* highaddr */
  581                                 NULL, NULL,             /* filter, filterarg */
  582                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
  583                                 BUS_SPACE_UNRESTRICTED, /* nsegments */
  584                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
  585                                 0,                      /* flags */
  586                                 NULL, NULL,             /* lock, lockarg */
  587                                 &sc->parent_dmat)) {
  588                 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
  589                 return (ENOMEM);
  590         }
  591 
  592         /*
  593          * DMA tag for normal sized RX frames
  594          */
  595         if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
  596                 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
  597                 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
  598                 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
  599                 return (ENOMEM);
  600         }
  601 
  602         /* 
  603          * DMA tag for jumbo sized RX frames.
  604          */
  605         if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
  606                 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
  607                 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
  608                 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
  609                 return (ENOMEM);
  610         }
  611 
  612         /* 
  613          * DMA tag for TX frames.
  614          */
  615         if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
  616                 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  617                 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  618                 NULL, NULL, &sc->tx_dmat)) {
  619                 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
  620                 return (ENOMEM);
  621         }
  622 
  623         return (0);
  624 }
  625 
  626 int
  627 t3_sge_free(struct adapter * sc)
  628 {
  629 
  630         if (sc->tx_dmat != NULL)
  631                 bus_dma_tag_destroy(sc->tx_dmat);
  632 
  633         if (sc->rx_jumbo_dmat != NULL)
  634                 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
  635 
  636         if (sc->rx_dmat != NULL)
  637                 bus_dma_tag_destroy(sc->rx_dmat);
  638 
  639         if (sc->parent_dmat != NULL)
  640                 bus_dma_tag_destroy(sc->parent_dmat);
  641 
  642         return (0);
  643 }
  644 
  645 void
  646 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
  647 {
  648 
  649         qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
  650         qs->rspq.polling = 0 /* p->polling */;
  651 }
  652 
  653 #if !defined(__i386__) && !defined(__amd64__)
  654 static void
  655 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
  656 {
  657         struct refill_fl_cb_arg *cb_arg = arg;
  658         
  659         cb_arg->error = error;
  660         cb_arg->seg = segs[0];
  661         cb_arg->nseg = nseg;
  662 
  663 }
  664 #endif
  665 /**
  666  *      refill_fl - refill an SGE free-buffer list
  667  *      @sc: the controller softc
  668  *      @q: the free-list to refill
  669  *      @n: the number of new buffers to allocate
  670  *
  671  *      (Re)populate an SGE free-buffer list with up to @n new packet buffers.
  672  *      The caller must assure that @n does not exceed the queue's capacity.
  673  */
  674 static void
  675 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
  676 {
  677         struct rx_sw_desc *sd = &q->sdesc[q->pidx];
  678         struct rx_desc *d = &q->desc[q->pidx];
  679         struct refill_fl_cb_arg cb_arg;
  680         struct mbuf *m;
  681         caddr_t cl;
  682         int err, count = 0;
  683         
  684         cb_arg.error = 0;
  685         while (n--) {
  686                 /*
  687                  * We only allocate a cluster, mbuf allocation happens after rx
  688                  */
  689                 if (q->zone == zone_pack) {
  690                         if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
  691                                 break;
  692                         cl = m->m_ext.ext_buf;                  
  693                 } else {
  694                         if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
  695                                 break;
  696                         if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
  697                                 uma_zfree(q->zone, cl);
  698                                 break;
  699                         }
  700                 }
  701                 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
  702                         if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
  703                                 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
  704                                 uma_zfree(q->zone, cl);
  705                                 goto done;
  706                         }
  707                         sd->flags |= RX_SW_DESC_MAP_CREATED;
  708                 }
  709 #if !defined(__i386__) && !defined(__amd64__)
  710                 err = bus_dmamap_load(q->entry_tag, sd->map,
  711                     cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
  712                 
  713                 if (err != 0 || cb_arg.error) {
  714                         if (q->zone == zone_pack)
  715                                 uma_zfree(q->zone, cl);
  716                         m_free(m);
  717                         goto done;
  718                 }
  719 #else
  720                 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
  721 #endif          
  722                 sd->flags |= RX_SW_DESC_INUSE;
  723                 sd->rxsd_cl = cl;
  724                 sd->m = m;
  725                 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
  726                 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
  727                 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
  728                 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
  729 
  730                 d++;
  731                 sd++;
  732 
  733                 if (++q->pidx == q->size) {
  734                         q->pidx = 0;
  735                         q->gen ^= 1;
  736                         sd = q->sdesc;
  737                         d = q->desc;
  738                 }
  739                 q->credits++;
  740                 count++;
  741         }
  742 
  743 done:
  744         if (count)
  745                 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  746 }
  747 
  748 
  749 /**
  750  *      free_rx_bufs - free the Rx buffers on an SGE free list
  751  *      @sc: the controle softc
  752  *      @q: the SGE free list to clean up
  753  *
  754  *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
  755  *      this queue should be stopped before calling this function.
  756  */
  757 static void
  758 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
  759 {
  760         u_int cidx = q->cidx;
  761 
  762         while (q->credits--) {
  763                 struct rx_sw_desc *d = &q->sdesc[cidx];
  764 
  765                 if (d->flags & RX_SW_DESC_INUSE) {
  766                         bus_dmamap_unload(q->entry_tag, d->map);
  767                         bus_dmamap_destroy(q->entry_tag, d->map);
  768                         if (q->zone == zone_pack) {
  769                                 m_init(d->m, zone_pack, MCLBYTES,
  770                                     M_NOWAIT, MT_DATA, M_EXT);
  771                                 uma_zfree(zone_pack, d->m);
  772                         } else {
  773                                 m_init(d->m, zone_mbuf, MLEN,
  774                                     M_NOWAIT, MT_DATA, 0);
  775                                 uma_zfree(zone_mbuf, d->m);
  776                                 uma_zfree(q->zone, d->rxsd_cl);
  777                         }                       
  778                 }
  779                 
  780                 d->rxsd_cl = NULL;
  781                 d->m = NULL;
  782                 if (++cidx == q->size)
  783                         cidx = 0;
  784         }
  785 }
  786 
  787 static __inline void
  788 __refill_fl(adapter_t *adap, struct sge_fl *fl)
  789 {
  790         refill_fl(adap, fl, min(16U, fl->size - fl->credits));
  791 }
  792 
  793 static __inline void
  794 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
  795 {
  796         if ((fl->size - fl->credits) < max)
  797                 refill_fl(adap, fl, min(max, fl->size - fl->credits));
  798 }
  799 
  800 /**
  801  *      recycle_rx_buf - recycle a receive buffer
  802  *      @adapter: the adapter
  803  *      @q: the SGE free list
  804  *      @idx: index of buffer to recycle
  805  *
  806  *      Recycles the specified buffer on the given free list by adding it at
  807  *      the next available slot on the list.
  808  */
  809 static void
  810 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
  811 {
  812         struct rx_desc *from = &q->desc[idx];
  813         struct rx_desc *to   = &q->desc[q->pidx];
  814 
  815         q->sdesc[q->pidx] = q->sdesc[idx];
  816         to->addr_lo = from->addr_lo;        // already big endian
  817         to->addr_hi = from->addr_hi;        // likewise
  818         wmb();  /* necessary ? */
  819         to->len_gen = htobe32(V_FLD_GEN1(q->gen));
  820         to->gen2 = htobe32(V_FLD_GEN2(q->gen));
  821         q->credits++;
  822 
  823         if (++q->pidx == q->size) {
  824                 q->pidx = 0;
  825                 q->gen ^= 1;
  826         }
  827         t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  828 }
  829 
  830 static void
  831 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
  832 {
  833         uint32_t *addr;
  834 
  835         addr = arg;
  836         *addr = segs[0].ds_addr;
  837 }
  838 
  839 static int
  840 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
  841     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
  842     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
  843 {
  844         size_t len = nelem * elem_size;
  845         void *s = NULL;
  846         void *p = NULL;
  847         int err;
  848 
  849         if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
  850                                       BUS_SPACE_MAXADDR_32BIT,
  851                                       BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
  852                                       len, 0, NULL, NULL, tag)) != 0) {
  853                 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
  854                 return (ENOMEM);
  855         }
  856 
  857         if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
  858                                     map)) != 0) {
  859                 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
  860                 return (ENOMEM);
  861         }
  862 
  863         bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
  864         bzero(p, len);
  865         *(void **)desc = p;
  866 
  867         if (sw_size) {
  868                 len = nelem * sw_size;
  869                 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
  870                 *(void **)sdesc = s;
  871         }
  872         if (parent_entry_tag == NULL)
  873                 return (0);
  874             
  875         if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
  876                                       BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
  877                                       NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  878                                       TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  879                                       NULL, NULL, entry_tag)) != 0) {
  880                 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
  881                 return (ENOMEM);
  882         }
  883         return (0);
  884 }
  885 
  886 static void
  887 sge_slow_intr_handler(void *arg, int ncount)
  888 {
  889         adapter_t *sc = arg;
  890 
  891         t3_slow_intr_handler(sc);
  892 }
  893 
  894 /**
  895  *      sge_timer_cb - perform periodic maintenance of an SGE qset
  896  *      @data: the SGE queue set to maintain
  897  *
  898  *      Runs periodically from a timer to perform maintenance of an SGE queue
  899  *      set.  It performs two tasks:
  900  *
  901  *      a) Cleans up any completed Tx descriptors that may still be pending.
  902  *      Normal descriptor cleanup happens when new packets are added to a Tx
  903  *      queue so this timer is relatively infrequent and does any cleanup only
  904  *      if the Tx queue has not seen any new packets in a while.  We make a
  905  *      best effort attempt to reclaim descriptors, in that we don't wait
  906  *      around if we cannot get a queue's lock (which most likely is because
  907  *      someone else is queueing new packets and so will also handle the clean
  908  *      up).  Since control queues use immediate data exclusively we don't
  909  *      bother cleaning them up here.
  910  *
  911  *      b) Replenishes Rx queues that have run out due to memory shortage.
  912  *      Normally new Rx buffers are added when existing ones are consumed but
  913  *      when out of memory a queue can become empty.  We try to add only a few
  914  *      buffers here, the queue will be replenished fully as these new buffers
  915  *      are used up if memory shortage has subsided.
  916  *      
  917  *      c) Return coalesced response queue credits in case a response queue is
  918  *      starved.
  919  *
  920  *      d) Ring doorbells for T304 tunnel queues since we have seen doorbell 
  921  *      fifo overflows and the FW doesn't implement any recovery scheme yet.
  922  */
  923 static void
  924 sge_timer_cb(void *arg)
  925 {
  926         adapter_t *sc = arg;
  927         if ((sc->flags & USING_MSIX) == 0) {
  928                 
  929                 struct port_info *pi;
  930                 struct sge_qset *qs;
  931                 struct sge_txq  *txq;
  932                 int i, j;
  933                 int reclaim_ofl, refill_rx;
  934 
  935                 if (sc->open_device_map == 0) 
  936                         return;
  937 
  938                 for (i = 0; i < sc->params.nports; i++) {
  939                         pi = &sc->port[i];
  940                         for (j = 0; j < pi->nqsets; j++) {
  941                                 qs = &sc->sge.qs[pi->first_qset + j];
  942                                 txq = &qs->txq[0];
  943                                 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
  944                                 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 
  945                                     (qs->fl[1].credits < qs->fl[1].size));
  946                                 if (reclaim_ofl || refill_rx) {
  947                                         taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
  948                                         break;
  949                                 }
  950                         }
  951                 }
  952         }
  953         
  954         if (sc->params.nports > 2) {
  955                 int i;
  956 
  957                 for_each_port(sc, i) {
  958                         struct port_info *pi = &sc->port[i];
  959 
  960                         t3_write_reg(sc, A_SG_KDOORBELL, 
  961                                      F_SELEGRCNTX | 
  962                                      (FW_TUNNEL_SGEEC_START + pi->first_qset));
  963                 }
  964         }       
  965         if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
  966             sc->open_device_map != 0)
  967                 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
  968 }
  969 
  970 /*
  971  * This is meant to be a catch-all function to keep sge state private
  972  * to sge.c
  973  *
  974  */
  975 int
  976 t3_sge_init_adapter(adapter_t *sc)
  977 {
  978         callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
  979         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
  980         TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
  981         return (0);
  982 }
  983 
  984 int
  985 t3_sge_reset_adapter(adapter_t *sc)
  986 {
  987         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
  988         return (0);
  989 }
  990 
  991 int
  992 t3_sge_init_port(struct port_info *pi)
  993 {
  994         TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
  995         return (0);
  996 }
  997 
  998 /**
  999  *      refill_rspq - replenish an SGE response queue
 1000  *      @adapter: the adapter
 1001  *      @q: the response queue to replenish
 1002  *      @credits: how many new responses to make available
 1003  *
 1004  *      Replenishes a response queue by making the supplied number of responses
 1005  *      available to HW.
 1006  */
 1007 static __inline void
 1008 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
 1009 {
 1010 
 1011         /* mbufs are allocated on demand when a rspq entry is processed. */
 1012         t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
 1013                      V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 1014 }
 1015 
 1016 static void
 1017 sge_txq_reclaim_handler(void *arg, int ncount)
 1018 {
 1019         struct sge_qset *qs = arg;
 1020         int i;
 1021 
 1022         for (i = 0; i < 3; i++)
 1023                 reclaim_completed_tx(qs, 16, i);
 1024 }
 1025 
 1026 static void
 1027 sge_timer_reclaim(void *arg, int ncount)
 1028 {
 1029         struct port_info *pi = arg;
 1030         int i, nqsets = pi->nqsets;
 1031         adapter_t *sc = pi->adapter;
 1032         struct sge_qset *qs;
 1033         struct mtx *lock;
 1034         
 1035         KASSERT((sc->flags & USING_MSIX) == 0,
 1036             ("can't call timer reclaim for msi-x"));
 1037 
 1038         for (i = 0; i < nqsets; i++) {
 1039                 qs = &sc->sge.qs[pi->first_qset + i];
 1040 
 1041                 reclaim_completed_tx(qs, 16, TXQ_OFLD);
 1042                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 1043                             &sc->sge.qs[0].rspq.lock;
 1044 
 1045                 if (mtx_trylock(lock)) {
 1046                         /* XXX currently assume that we are *NOT* polling */
 1047                         uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
 1048 
 1049                         if (qs->fl[0].credits < qs->fl[0].size - 16)
 1050                                 __refill_fl(sc, &qs->fl[0]);
 1051                         if (qs->fl[1].credits < qs->fl[1].size - 16)
 1052                                 __refill_fl(sc, &qs->fl[1]);
 1053                         
 1054                         if (status & (1 << qs->rspq.cntxt_id)) {
 1055                                 if (qs->rspq.credits) {
 1056                                         refill_rspq(sc, &qs->rspq, 1);
 1057                                         qs->rspq.credits--;
 1058                                         t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 
 1059                                             1 << qs->rspq.cntxt_id);
 1060                                 }
 1061                         }
 1062                         mtx_unlock(lock);
 1063                 }
 1064         }
 1065 }
 1066 
 1067 /**
 1068  *      init_qset_cntxt - initialize an SGE queue set context info
 1069  *      @qs: the queue set
 1070  *      @id: the queue set id
 1071  *
 1072  *      Initializes the TIDs and context ids for the queues of a queue set.
 1073  */
 1074 static void
 1075 init_qset_cntxt(struct sge_qset *qs, u_int id)
 1076 {
 1077 
 1078         qs->rspq.cntxt_id = id;
 1079         qs->fl[0].cntxt_id = 2 * id;
 1080         qs->fl[1].cntxt_id = 2 * id + 1;
 1081         qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
 1082         qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
 1083         qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 1084         qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 1085         qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
 1086 
 1087         mbufq_init(&qs->txq[TXQ_ETH].sendq);
 1088         mbufq_init(&qs->txq[TXQ_OFLD].sendq);
 1089         mbufq_init(&qs->txq[TXQ_CTRL].sendq);
 1090 }
 1091 
 1092 
 1093 static void
 1094 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
 1095 {
 1096         txq->in_use += ndesc;
 1097         /*
 1098          * XXX we don't handle stopping of queue
 1099          * presumably start handles this when we bump against the end
 1100          */
 1101         txqs->gen = txq->gen;
 1102         txq->unacked += ndesc;
 1103         txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
 1104         txq->unacked &= 31;
 1105         txqs->pidx = txq->pidx;
 1106         txq->pidx += ndesc;
 1107 #ifdef INVARIANTS
 1108         if (((txqs->pidx > txq->cidx) &&
 1109                 (txq->pidx < txqs->pidx) &&
 1110                 (txq->pidx >= txq->cidx)) ||
 1111             ((txqs->pidx < txq->cidx) &&
 1112                 (txq->pidx >= txq-> cidx)) ||
 1113             ((txqs->pidx < txq->cidx) &&
 1114                 (txq->cidx < txqs->pidx)))
 1115                 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
 1116                     txqs->pidx, txq->pidx, txq->cidx);
 1117 #endif
 1118         if (txq->pidx >= txq->size) {
 1119                 txq->pidx -= txq->size;
 1120                 txq->gen ^= 1;
 1121         }
 1122 
 1123 }
 1124 
 1125 /**
 1126  *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 1127  *      @m: the packet mbufs
 1128  *      @nsegs: the number of segments 
 1129  *
 1130  *      Returns the number of Tx descriptors needed for the given Ethernet
 1131  *      packet.  Ethernet packets require addition of WR and CPL headers.
 1132  */
 1133 static __inline unsigned int
 1134 calc_tx_descs(const struct mbuf *m, int nsegs)
 1135 {
 1136         unsigned int flits;
 1137 
 1138         if (m->m_pkthdr.len <= PIO_LEN)
 1139                 return 1;
 1140 
 1141         flits = sgl_len(nsegs) + 2;
 1142 #ifdef TSO_SUPPORTED
 1143         if (m->m_pkthdr.csum_flags & CSUM_TSO)
 1144                 flits++;
 1145 #endif  
 1146         return flits_to_desc(flits);
 1147 }
 1148 
 1149 static unsigned int
 1150 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
 1151     struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
 1152 {
 1153         struct mbuf *m0;
 1154         int err, pktlen, pass = 0;
 1155         bus_dma_tag_t tag = txq->entry_tag;
 1156 
 1157 retry:
 1158         err = 0;
 1159         m0 = *m;
 1160         pktlen = m0->m_pkthdr.len;
 1161 #if defined(__i386__) || defined(__amd64__)
 1162         if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) {
 1163                 goto done;
 1164         } else
 1165 #endif
 1166                 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0);
 1167 
 1168         if (err == 0) {
 1169                 goto done;
 1170         }
 1171         if (err == EFBIG && pass == 0) {
 1172                 pass = 1;
 1173                 /* Too many segments, try to defrag */
 1174                 m0 = m_defrag(m0, M_DONTWAIT);
 1175                 if (m0 == NULL) {
 1176                         m_freem(*m);
 1177                         *m = NULL;
 1178                         return (ENOBUFS);
 1179                 }
 1180                 *m = m0;
 1181                 goto retry;
 1182         } else if (err == ENOMEM) {
 1183                 return (err);
 1184         } if (err) {
 1185                 if (cxgb_debug)
 1186                         printf("map failure err=%d pktlen=%d\n", err, pktlen);
 1187                 m_freem(m0);
 1188                 *m = NULL;
 1189                 return (err);
 1190         }
 1191 done:
 1192 #if !defined(__i386__) && !defined(__amd64__)
 1193         bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE);
 1194 #endif  
 1195         txsd->flags |= TX_SW_DESC_MAPPED;
 1196 
 1197         return (0);
 1198 }
 1199 
 1200 /**
 1201  *      make_sgl - populate a scatter/gather list for a packet
 1202  *      @sgp: the SGL to populate
 1203  *      @segs: the packet dma segments
 1204  *      @nsegs: the number of segments
 1205  *
 1206  *      Generates a scatter/gather list for the buffers that make up a packet
 1207  *      and returns the SGL size in 8-byte words.  The caller must size the SGL
 1208  *      appropriately.
 1209  */
 1210 static __inline void
 1211 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
 1212 {
 1213         int i, idx;
 1214         
 1215         for (idx = 0, i = 0; i < nsegs; i++) {
 1216                 /*
 1217                  * firmware doesn't like empty segments
 1218                  */
 1219                 if (segs[i].ds_len == 0)
 1220                         continue;
 1221                 if (i && idx == 0) 
 1222                         ++sgp;
 1223                 
 1224                 sgp->len[idx] = htobe32(segs[i].ds_len);
 1225                 sgp->addr[idx] = htobe64(segs[i].ds_addr);
 1226                 idx ^= 1;
 1227         }
 1228         
 1229         if (idx) {
 1230                 sgp->len[idx] = 0;
 1231                 sgp->addr[idx] = 0;
 1232         }
 1233 }
 1234         
 1235 /**
 1236  *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
 1237  *      @adap: the adapter
 1238  *      @q: the Tx queue
 1239  *
 1240  *      Ring the doorbell if a Tx queue is asleep.  There is a natural race,
 1241  *      where the HW is going to sleep just after we checked, however,
 1242  *      then the interrupt handler will detect the outstanding TX packet
 1243  *      and ring the doorbell for us.
 1244  *
 1245  *      When GTS is disabled we unconditionally ring the doorbell.
 1246  */
 1247 static __inline void
 1248 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
 1249 {
 1250 #if USE_GTS
 1251         clear_bit(TXQ_LAST_PKT_DB, &q->flags);
 1252         if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
 1253                 set_bit(TXQ_LAST_PKT_DB, &q->flags);
 1254 #ifdef T3_TRACE
 1255                 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
 1256                           q->cntxt_id);
 1257 #endif
 1258                 t3_write_reg(adap, A_SG_KDOORBELL,
 1259                              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1260         }
 1261 #else
 1262         wmb();            /* write descriptors before telling HW */
 1263         t3_write_reg(adap, A_SG_KDOORBELL,
 1264                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1265 #endif
 1266 }
 1267 
 1268 static __inline void
 1269 wr_gen2(struct tx_desc *d, unsigned int gen)
 1270 {
 1271 #if SGE_NUM_GENBITS == 2
 1272         d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
 1273 #endif
 1274 }
 1275 
 1276 /**
 1277  *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
 1278  *      @ndesc: number of Tx descriptors spanned by the SGL
 1279  *      @txd: first Tx descriptor to be written
 1280  *      @txqs: txq state (generation and producer index)
 1281  *      @txq: the SGE Tx queue
 1282  *      @sgl: the SGL
 1283  *      @flits: number of flits to the start of the SGL in the first descriptor
 1284  *      @sgl_flits: the SGL size in flits
 1285  *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
 1286  *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
 1287  *
 1288  *      Write a work request header and an associated SGL.  If the SGL is
 1289  *      small enough to fit into one Tx descriptor it has already been written
 1290  *      and we just need to write the WR header.  Otherwise we distribute the
 1291  *      SGL across the number of descriptors it spans.
 1292  */
 1293 static void
 1294 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
 1295     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
 1296     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
 1297 {
 1298 
 1299         struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
 1300         struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
 1301         
 1302         if (__predict_true(ndesc == 1)) {
 1303                 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1304                         V_WR_SGLSFLT(flits)) | wr_hi,
 1305                     htonl(V_WR_LEN(flits + sgl_flits) |
 1306                         V_WR_GEN(txqs->gen)) | wr_lo);
 1307                 /* XXX gen? */
 1308                 wr_gen2(txd, txqs->gen);
 1309                 
 1310         } else {
 1311                 unsigned int ogen = txqs->gen;
 1312                 const uint64_t *fp = (const uint64_t *)sgl;
 1313                 struct work_request_hdr *wp = wrp;
 1314                 
 1315                 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
 1316                     V_WR_SGLSFLT(flits)) | wr_hi;
 1317                 
 1318                 while (sgl_flits) {
 1319                         unsigned int avail = WR_FLITS - flits;
 1320 
 1321                         if (avail > sgl_flits)
 1322                                 avail = sgl_flits;
 1323                         memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
 1324                         sgl_flits -= avail;
 1325                         ndesc--;
 1326                         if (!sgl_flits)
 1327                                 break;
 1328                         
 1329                         fp += avail;
 1330                         txd++;
 1331                         txsd++;
 1332                         if (++txqs->pidx == txq->size) {
 1333                                 txqs->pidx = 0;
 1334                                 txqs->gen ^= 1;
 1335                                 txd = txq->desc;
 1336                                 txsd = txq->sdesc;
 1337                         }
 1338 
 1339                         /*
 1340                          * when the head of the mbuf chain
 1341                          * is freed all clusters will be freed
 1342                          * with it
 1343                          */
 1344                         wrp = (struct work_request_hdr *)txd;
 1345                         wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
 1346                             V_WR_SGLSFLT(1)) | wr_hi;
 1347                         wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
 1348                                     sgl_flits + 1)) |
 1349                             V_WR_GEN(txqs->gen)) | wr_lo;
 1350                         wr_gen2(txd, txqs->gen);
 1351                         flits = 1;
 1352                 }
 1353                 wrp->wrh_hi |= htonl(F_WR_EOP);
 1354                 wmb();
 1355                 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
 1356                 wr_gen2((struct tx_desc *)wp, ogen);
 1357         }
 1358 }
 1359 
 1360 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
 1361 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
 1362 
 1363 #ifdef VLAN_SUPPORTED
 1364 #define GET_VTAG(cntrl, m) \
 1365 do { \
 1366         if ((m)->m_flags & M_VLANTAG)                                               \
 1367                 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
 1368 } while (0)
 1369 
 1370 #else
 1371 #define GET_VTAG(cntrl, m)
 1372 #endif
 1373 
 1374 static int
 1375 t3_encap(struct sge_qset *qs, struct mbuf **m)
 1376 {
 1377         adapter_t *sc;
 1378         struct mbuf *m0;
 1379         struct sge_txq *txq;
 1380         struct txq_state txqs;
 1381         struct port_info *pi;
 1382         unsigned int ndesc, flits, cntrl, mlen;
 1383         int err, nsegs, tso_info = 0;
 1384 
 1385         struct work_request_hdr *wrp;
 1386         struct tx_sw_desc *txsd;
 1387         struct sg_ent *sgp, *sgl;
 1388         uint32_t wr_hi, wr_lo, sgl_flits; 
 1389         bus_dma_segment_t segs[TX_MAX_SEGS];
 1390 
 1391         struct tx_desc *txd;
 1392                 
 1393         pi = qs->port;
 1394         sc = pi->adapter;
 1395         txq = &qs->txq[TXQ_ETH];
 1396         txd = &txq->desc[txq->pidx];
 1397         txsd = &txq->sdesc[txq->pidx];
 1398         sgl = txq->txq_sgl;
 1399 
 1400         prefetch(txd);
 1401         m0 = *m;
 1402         
 1403         DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset);
 1404         DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan);
 1405         
 1406         mtx_assert(&qs->lock, MA_OWNED);
 1407         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1408         KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
 1409         
 1410 #ifdef VLAN_SUPPORTED
 1411         if  (m0->m_nextpkt == NULL && m0->m_next != NULL &&
 1412             m0->m_pkthdr.csum_flags & (CSUM_TSO))
 1413                 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
 1414 #endif
 1415         if (m0->m_nextpkt != NULL) {
 1416                 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
 1417                 ndesc = 1;
 1418                 mlen = 0;
 1419         } else {
 1420                 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
 1421                     &m0, segs, &nsegs))) {
 1422                         if (cxgb_debug)
 1423                                 printf("failed ... err=%d\n", err);
 1424                         return (err);
 1425                 }
 1426                 mlen = m0->m_pkthdr.len;
 1427                 ndesc = calc_tx_descs(m0, nsegs);
 1428         }
 1429         txq_prod(txq, ndesc, &txqs);
 1430 
 1431         KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
 1432         txsd->m = m0;
 1433 
 1434         if (m0->m_nextpkt != NULL) {
 1435                 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
 1436                 int i, fidx;
 1437 
 1438                 if (nsegs > 7)
 1439                         panic("trying to coalesce %d packets in to one WR", nsegs);
 1440                 txq->txq_coalesced += nsegs;
 1441                 wrp = (struct work_request_hdr *)txd;
 1442                 flits = nsegs*2 + 1;
 1443 
 1444                 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
 1445                         struct cpl_tx_pkt_batch_entry *cbe;
 1446                         uint64_t flit;
 1447                         uint32_t *hflit = (uint32_t *)&flit;
 1448                         int cflags = m0->m_pkthdr.csum_flags;
 1449 
 1450                         cntrl = V_TXPKT_INTF(pi->txpkt_intf);
 1451                         GET_VTAG(cntrl, m0);
 1452                         cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1453                         if (__predict_false(!(cflags & CSUM_IP)))
 1454                                 cntrl |= F_TXPKT_IPCSUM_DIS;
 1455                         if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP))))
 1456                                 cntrl |= F_TXPKT_L4CSUM_DIS;
 1457 
 1458                         hflit[0] = htonl(cntrl);
 1459                         hflit[1] = htonl(segs[i].ds_len | 0x80000000);
 1460                         flit |= htobe64(1 << 24);
 1461                         cbe = &cpl_batch->pkt_entry[i];
 1462                         cbe->cntrl = hflit[0];
 1463                         cbe->len = hflit[1];
 1464                         cbe->addr = htobe64(segs[i].ds_addr);
 1465                 }
 1466 
 1467                 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1468                     V_WR_SGLSFLT(flits)) |
 1469                     htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1470                 wr_lo = htonl(V_WR_LEN(flits) |
 1471                     V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
 1472                 set_wr_hdr(wrp, wr_hi, wr_lo);
 1473                 wmb();
 1474                 wr_gen2(txd, txqs.gen);
 1475                 check_ring_tx_db(sc, txq);
 1476                 return (0);             
 1477         } else if (tso_info) {
 1478                 int min_size = TCPPKTHDRSIZE, eth_type, tagged;
 1479                 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
 1480                 struct ip *ip;
 1481                 struct tcphdr *tcp;
 1482                 char *pkthdr;
 1483 
 1484                 txd->flit[2] = 0;
 1485                 GET_VTAG(cntrl, m0);
 1486                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
 1487                 hdr->cntrl = htonl(cntrl);
 1488                 hdr->len = htonl(mlen | 0x80000000);
 1489 
 1490                 DPRINTF("tso buf len=%d\n", mlen);
 1491 
 1492                 tagged = m0->m_flags & M_VLANTAG;
 1493                 if (!tagged)
 1494                         min_size -= ETHER_VLAN_ENCAP_LEN;
 1495 
 1496                 if (__predict_false(mlen < min_size)) {
 1497                         printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
 1498                             m0, mlen, m0->m_pkthdr.tso_segsz,
 1499                             m0->m_pkthdr.csum_flags, m0->m_flags);
 1500                         panic("tx tso packet too small");
 1501                 }
 1502 
 1503                 /* Make sure that ether, ip, tcp headers are all in m0 */
 1504                 if (__predict_false(m0->m_len < min_size)) {
 1505                         m0 = m_pullup(m0, min_size);
 1506                         if (__predict_false(m0 == NULL)) {
 1507                                 /* XXX panic probably an overreaction */
 1508                                 panic("couldn't fit header into mbuf");
 1509                         }
 1510                 }
 1511                 pkthdr = m0->m_data;
 1512 
 1513                 if (tagged) {
 1514                         eth_type = CPL_ETH_II_VLAN;
 1515                         ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
 1516                             ETHER_VLAN_ENCAP_LEN);
 1517                 } else {
 1518                         eth_type = CPL_ETH_II;
 1519                         ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
 1520                 }
 1521                 tcp = (struct tcphdr *)((uint8_t *)ip +
 1522                     sizeof(*ip)); 
 1523 
 1524                 tso_info |= V_LSO_ETH_TYPE(eth_type) |
 1525                             V_LSO_IPHDR_WORDS(ip->ip_hl) |
 1526                             V_LSO_TCPHDR_WORDS(tcp->th_off);
 1527                 hdr->lso_info = htonl(tso_info);
 1528 
 1529                 if (__predict_false(mlen <= PIO_LEN)) {
 1530                         /* pkt not undersized but fits in PIO_LEN
 1531                          * Indicates a TSO bug at the higher levels.
 1532                          *
 1533                          */
 1534                         DPRINTF("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
 1535                             m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags);
 1536                         txsd->m = NULL;
 1537                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
 1538                         flits = (mlen + 7) / 8 + 3;
 1539                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1540                                           V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1541                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1542                         wr_lo = htonl(V_WR_LEN(flits) |
 1543                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1544                         set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
 1545                         wmb();
 1546                         wr_gen2(txd, txqs.gen);
 1547                         check_ring_tx_db(sc, txq);
 1548                         return (0);
 1549                 }
 1550                 flits = 3;      
 1551         } else {
 1552                 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
 1553                 
 1554                 GET_VTAG(cntrl, m0);
 1555                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1556                 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
 1557                         cntrl |= F_TXPKT_IPCSUM_DIS;
 1558                 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
 1559                         cntrl |= F_TXPKT_L4CSUM_DIS;
 1560                 cpl->cntrl = htonl(cntrl);
 1561                 cpl->len = htonl(mlen | 0x80000000);
 1562 
 1563                 if (mlen <= PIO_LEN) {
 1564                         txsd->m = NULL;
 1565                         m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
 1566                         flits = (mlen + 7) / 8 + 2;
 1567                         
 1568                         wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1569                             V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1570                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1571                         wr_lo = htonl(V_WR_LEN(flits) |
 1572                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1573                         set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
 1574                         wmb();
 1575                         wr_gen2(txd, txqs.gen);
 1576                         check_ring_tx_db(sc, txq);
 1577                         return (0);
 1578                 }
 1579                 flits = 2;
 1580         }
 1581         wrp = (struct work_request_hdr *)txd;
 1582         sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
 1583         make_sgl(sgp, segs, nsegs);
 1584 
 1585         sgl_flits = sgl_len(nsegs);
 1586 
 1587         KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
 1588         wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1589         wr_lo = htonl(V_WR_TID(txq->token));
 1590         write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
 1591             sgl_flits, wr_hi, wr_lo);
 1592         check_ring_tx_db(pi->adapter, txq);
 1593 
 1594         return (0);
 1595 }
 1596 
 1597 void
 1598 cxgb_tx_watchdog(void *arg)
 1599 {
 1600         struct sge_qset *qs = arg;
 1601         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1602 
 1603         if (qs->coalescing != 0 &&
 1604             (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
 1605             TXQ_RING_EMPTY(qs))
 1606                 qs->coalescing = 0; 
 1607         else if (qs->coalescing == 0 &&
 1608             (txq->in_use >= cxgb_tx_coalesce_enable_start))
 1609                 qs->coalescing = 1;
 1610         if (TXQ_TRYLOCK(qs)) {
 1611                 qs->qs_flags |= QS_FLUSHING;
 1612                 cxgb_start_locked(qs);
 1613                 qs->qs_flags &= ~QS_FLUSHING;
 1614                 TXQ_UNLOCK(qs);
 1615         }
 1616         if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
 1617                 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
 1618                     qs, txq->txq_watchdog.c_cpu);
 1619 }
 1620 
 1621 static void
 1622 cxgb_tx_timeout(void *arg)
 1623 {
 1624         struct sge_qset *qs = arg;
 1625         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1626 
 1627         if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
 1628                 qs->coalescing = 1;     
 1629         if (TXQ_TRYLOCK(qs)) {
 1630                 qs->qs_flags |= QS_TIMEOUT;
 1631                 cxgb_start_locked(qs);
 1632                 qs->qs_flags &= ~QS_TIMEOUT;
 1633                 TXQ_UNLOCK(qs);
 1634         }
 1635 }
 1636 
 1637 static void
 1638 cxgb_start_locked(struct sge_qset *qs)
 1639 {
 1640         struct mbuf *m_head = NULL;
 1641         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1642         int avail, txmax;
 1643         int in_use_init = txq->in_use;
 1644         struct port_info *pi = qs->port;
 1645         struct ifnet *ifp = pi->ifp;
 1646         avail = txq->size - txq->in_use - 4;
 1647         txmax = min(TX_START_MAX_DESC, avail);
 1648 
 1649         if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
 1650                 reclaim_completed_tx(qs, 0, TXQ_ETH);
 1651 
 1652         if (!pi->link_config.link_ok) {
 1653                 TXQ_RING_FLUSH(qs);
 1654                 return;
 1655         }
 1656         TXQ_LOCK_ASSERT(qs);
 1657         while ((txq->in_use - in_use_init < txmax) &&
 1658             !TXQ_RING_EMPTY(qs) &&
 1659             (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
 1660             pi->link_config.link_ok) {
 1661                 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1662 
 1663                 if ((m_head = cxgb_dequeue(qs)) == NULL)
 1664                         break;
 1665                 /*
 1666                  *  Encapsulation can modify our pointer, and or make it
 1667                  *  NULL on failure.  In that event, we can't requeue.
 1668                  */
 1669                 if (t3_encap(qs, &m_head) || m_head == NULL)
 1670                         break;
 1671                 
 1672                 /* Send a copy of the frame to the BPF listener */
 1673                 ETHER_BPF_MTAP(ifp, m_head);
 1674 
 1675                 /*
 1676                  * We sent via PIO, no longer need a copy
 1677                  */
 1678                 if (m_head->m_nextpkt == NULL &&
 1679                     m_head->m_pkthdr.len <= PIO_LEN)
 1680                         m_freem(m_head);
 1681 
 1682                 m_head = NULL;
 1683         }
 1684         if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
 1685             pi->link_config.link_ok)
 1686                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1687                     qs, txq->txq_timer.c_cpu);
 1688         if (m_head != NULL)
 1689                 m_freem(m_head);
 1690 }
 1691 
 1692 static int
 1693 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
 1694 {
 1695         struct port_info *pi = qs->port;
 1696         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 1697         struct buf_ring *br = txq->txq_mr;
 1698         int error, avail;
 1699 
 1700         avail = txq->size - txq->in_use;
 1701         TXQ_LOCK_ASSERT(qs);
 1702 
 1703         /*
 1704          * We can only do a direct transmit if the following are true:
 1705          * - we aren't coalescing (ring < 3/4 full)
 1706          * - the link is up -- checked in caller
 1707          * - there are no packets enqueued already
 1708          * - there is space in hardware transmit queue 
 1709          */
 1710         if (check_pkt_coalesce(qs) == 0 &&
 1711             TXQ_RING_EMPTY(qs) && avail > 4) {
 1712                 if (t3_encap(qs, &m)) {
 1713                         if (m != NULL &&
 1714                             (error = drbr_enqueue(ifp, br, m)) != 0) 
 1715                                 return (error);
 1716                 } else {
 1717                         /*
 1718                          * We've bypassed the buf ring so we need to update
 1719                          * the stats directly
 1720                          */
 1721                         txq->txq_direct_packets++;
 1722                         txq->txq_direct_bytes += m->m_pkthdr.len;
 1723                         /*
 1724                         ** Send a copy of the frame to the BPF
 1725                         ** listener and set the watchdog on.
 1726                         */
 1727                         ETHER_BPF_MTAP(ifp, m);
 1728                         /*
 1729                          * We sent via PIO, no longer need a copy
 1730                          */
 1731                         if (m->m_pkthdr.len <= PIO_LEN)
 1732                                 m_freem(m);
 1733 
 1734                 }
 1735         } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
 1736                 return (error);
 1737 
 1738         reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
 1739         if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
 1740             (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
 1741                 cxgb_start_locked(qs);
 1742         else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
 1743                 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
 1744                     qs, txq->txq_timer.c_cpu);
 1745         return (0);
 1746 }
 1747 
 1748 int
 1749 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
 1750 {
 1751         struct sge_qset *qs;
 1752         struct port_info *pi = ifp->if_softc;
 1753         int error, qidx = pi->first_qset;
 1754 
 1755         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
 1756             ||(!pi->link_config.link_ok)) {
 1757                 m_freem(m);
 1758                 return (0);
 1759         }
 1760         
 1761         if (m->m_flags & M_FLOWID)
 1762                 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
 1763 
 1764         qs = &pi->adapter->sge.qs[qidx];
 1765         
 1766         if (TXQ_TRYLOCK(qs)) {
 1767                 /* XXX running */
 1768                 error = cxgb_transmit_locked(ifp, qs, m);
 1769                 TXQ_UNLOCK(qs);
 1770         } else
 1771                 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
 1772         return (error);
 1773 }
 1774 void
 1775 cxgb_start(struct ifnet *ifp)
 1776 {
 1777         struct port_info *pi = ifp->if_softc;
 1778         struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset];
 1779         
 1780         if (!pi->link_config.link_ok)
 1781                 return;
 1782 
 1783         TXQ_LOCK(qs);
 1784         cxgb_start_locked(qs);
 1785         TXQ_UNLOCK(qs);
 1786 }
 1787 
 1788 void
 1789 cxgb_qflush(struct ifnet *ifp)
 1790 {
 1791         /*
 1792          * flush any enqueued mbufs in the buf_rings
 1793          * and in the transmit queues
 1794          * no-op for now
 1795          */
 1796         return;
 1797 }
 1798 
 1799 /**
 1800  *      write_imm - write a packet into a Tx descriptor as immediate data
 1801  *      @d: the Tx descriptor to write
 1802  *      @m: the packet
 1803  *      @len: the length of packet data to write as immediate data
 1804  *      @gen: the generation bit value to write
 1805  *
 1806  *      Writes a packet as immediate data into a Tx descriptor.  The packet
 1807  *      contains a work request at its beginning.  We must write the packet
 1808  *      carefully so the SGE doesn't read accidentally before it's written in
 1809  *      its entirety.
 1810  */
 1811 static __inline void
 1812 write_imm(struct tx_desc *d, struct mbuf *m,
 1813           unsigned int len, unsigned int gen)
 1814 {
 1815         struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
 1816         struct work_request_hdr *to = (struct work_request_hdr *)d;
 1817         uint32_t wr_hi, wr_lo;
 1818 
 1819         if (len > WR_LEN)
 1820                 panic("len too big %d\n", len);
 1821         if (len < sizeof(*from))
 1822                 panic("len too small %d", len);
 1823         
 1824         memcpy(&to[1], &from[1], len - sizeof(*from));
 1825         wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
 1826                                         V_WR_BCNTLFLT(len & 7));
 1827         wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) |
 1828                                         V_WR_LEN((len + 7) / 8));
 1829         set_wr_hdr(to, wr_hi, wr_lo);
 1830         wmb();
 1831         wr_gen2(d, gen);
 1832 
 1833         /*
 1834          * This check is a hack we should really fix the logic so
 1835          * that this can't happen
 1836          */
 1837         if (m->m_type != MT_DONTFREE)
 1838                 m_freem(m);
 1839         
 1840 }
 1841 
 1842 /**
 1843  *      check_desc_avail - check descriptor availability on a send queue
 1844  *      @adap: the adapter
 1845  *      @q: the TX queue
 1846  *      @m: the packet needing the descriptors
 1847  *      @ndesc: the number of Tx descriptors needed
 1848  *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
 1849  *
 1850  *      Checks if the requested number of Tx descriptors is available on an
 1851  *      SGE send queue.  If the queue is already suspended or not enough
 1852  *      descriptors are available the packet is queued for later transmission.
 1853  *      Must be called with the Tx queue locked.
 1854  *
 1855  *      Returns 0 if enough descriptors are available, 1 if there aren't
 1856  *      enough descriptors and the packet has been queued, and 2 if the caller
 1857  *      needs to retry because there weren't enough descriptors at the
 1858  *      beginning of the call but some freed up in the mean time.
 1859  */
 1860 static __inline int
 1861 check_desc_avail(adapter_t *adap, struct sge_txq *q,
 1862                  struct mbuf *m, unsigned int ndesc,
 1863                  unsigned int qid)
 1864 {
 1865         /* 
 1866          * XXX We currently only use this for checking the control queue
 1867          * the control queue is only used for binding qsets which happens
 1868          * at init time so we are guaranteed enough descriptors
 1869          */
 1870         if (__predict_false(!mbufq_empty(&q->sendq))) {
 1871 addq_exit:      mbufq_tail(&q->sendq, m);
 1872                 return 1;
 1873         }
 1874         if (__predict_false(q->size - q->in_use < ndesc)) {
 1875 
 1876                 struct sge_qset *qs = txq_to_qset(q, qid);
 1877 
 1878                 setbit(&qs->txq_stopped, qid);
 1879                 if (should_restart_tx(q) &&
 1880                     test_and_clear_bit(qid, &qs->txq_stopped))
 1881                         return 2;
 1882 
 1883                 q->stops++;
 1884                 goto addq_exit;
 1885         }
 1886         return 0;
 1887 }
 1888 
 1889 
 1890 /**
 1891  *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
 1892  *      @q: the SGE control Tx queue
 1893  *
 1894  *      This is a variant of reclaim_completed_tx() that is used for Tx queues
 1895  *      that send only immediate data (presently just the control queues) and
 1896  *      thus do not have any mbufs
 1897  */
 1898 static __inline void
 1899 reclaim_completed_tx_imm(struct sge_txq *q)
 1900 {
 1901         unsigned int reclaim = q->processed - q->cleaned;
 1902 
 1903         q->in_use -= reclaim;
 1904         q->cleaned += reclaim;
 1905 }
 1906 
 1907 static __inline int
 1908 immediate(const struct mbuf *m)
 1909 {
 1910         return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
 1911 }
 1912 
 1913 /**
 1914  *      ctrl_xmit - send a packet through an SGE control Tx queue
 1915  *      @adap: the adapter
 1916  *      @q: the control queue
 1917  *      @m: the packet
 1918  *
 1919  *      Send a packet through an SGE control Tx queue.  Packets sent through
 1920  *      a control queue must fit entirely as immediate data in a single Tx
 1921  *      descriptor and have no page fragments.
 1922  */
 1923 static int
 1924 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 1925 {
 1926         int ret;
 1927         struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
 1928         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1929         
 1930         if (__predict_false(!immediate(m))) {
 1931                 m_freem(m);
 1932                 return 0;
 1933         }
 1934         
 1935         wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
 1936         wrp->wrh_lo = htonl(V_WR_TID(q->token));
 1937 
 1938         TXQ_LOCK(qs);
 1939 again:  reclaim_completed_tx_imm(q);
 1940 
 1941         ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
 1942         if (__predict_false(ret)) {
 1943                 if (ret == 1) {
 1944                         TXQ_UNLOCK(qs);
 1945                         log(LOG_ERR, "no desc available\n");
 1946                         return (ENOSPC);
 1947                 }
 1948                 goto again;
 1949         }
 1950         write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
 1951         
 1952         q->in_use++;
 1953         if (++q->pidx >= q->size) {
 1954                 q->pidx = 0;
 1955                 q->gen ^= 1;
 1956         }
 1957         TXQ_UNLOCK(qs);
 1958         t3_write_reg(adap, A_SG_KDOORBELL,
 1959                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1960         return (0);
 1961 }
 1962 
 1963 
 1964 /**
 1965  *      restart_ctrlq - restart a suspended control queue
 1966  *      @qs: the queue set cotaining the control queue
 1967  *
 1968  *      Resumes transmission on a suspended Tx control queue.
 1969  */
 1970 static void
 1971 restart_ctrlq(void *data, int npending)
 1972 {
 1973         struct mbuf *m;
 1974         struct sge_qset *qs = (struct sge_qset *)data;
 1975         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1976         adapter_t *adap = qs->port->adapter;
 1977 
 1978         log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use);
 1979         
 1980         TXQ_LOCK(qs);
 1981 again:  reclaim_completed_tx_imm(q);
 1982 
 1983         while (q->in_use < q->size &&
 1984                (m = mbufq_dequeue(&q->sendq)) != NULL) {
 1985 
 1986                 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
 1987 
 1988                 if (++q->pidx >= q->size) {
 1989                         q->pidx = 0;
 1990                         q->gen ^= 1;
 1991                 }
 1992                 q->in_use++;
 1993         }
 1994         if (!mbufq_empty(&q->sendq)) {
 1995                 setbit(&qs->txq_stopped, TXQ_CTRL);
 1996 
 1997                 if (should_restart_tx(q) &&
 1998                     test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
 1999                         goto again;
 2000                 q->stops++;
 2001         }
 2002         TXQ_UNLOCK(qs);
 2003         t3_write_reg(adap, A_SG_KDOORBELL,
 2004                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 2005 }
 2006 
 2007 
 2008 /*
 2009  * Send a management message through control queue 0
 2010  */
 2011 int
 2012 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
 2013 {
 2014         return ctrl_xmit(adap, &adap->sge.qs[0], m);
 2015 }
 2016 
 2017 /**
 2018  *      free_qset - free the resources of an SGE queue set
 2019  *      @sc: the controller owning the queue set
 2020  *      @q: the queue set
 2021  *
 2022  *      Release the HW and SW resources associated with an SGE queue set, such
 2023  *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 2024  *      queue set must be quiesced prior to calling this.
 2025  */
 2026 static void
 2027 t3_free_qset(adapter_t *sc, struct sge_qset *q)
 2028 {
 2029         int i;
 2030         
 2031         reclaim_completed_tx(q, 0, TXQ_ETH);
 2032         for (i = 0; i < SGE_TXQ_PER_SET; i++) {
 2033                 if (q->txq[i].txq_mr != NULL) 
 2034                         buf_ring_free(q->txq[i].txq_mr, M_DEVBUF);
 2035                 if (q->txq[i].txq_ifq != NULL) {
 2036                         ifq_delete(q->txq[i].txq_ifq);
 2037                         free(q->txq[i].txq_ifq, M_DEVBUF);
 2038                 }
 2039         }
 2040         
 2041         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2042                 if (q->fl[i].desc) {
 2043                         mtx_lock_spin(&sc->sge.reg_lock);
 2044                         t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
 2045                         mtx_unlock_spin(&sc->sge.reg_lock);
 2046                         bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
 2047                         bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
 2048                                         q->fl[i].desc_map);
 2049                         bus_dma_tag_destroy(q->fl[i].desc_tag);
 2050                         bus_dma_tag_destroy(q->fl[i].entry_tag);
 2051                 }
 2052                 if (q->fl[i].sdesc) {
 2053                         free_rx_bufs(sc, &q->fl[i]);
 2054                         free(q->fl[i].sdesc, M_DEVBUF);
 2055                 }
 2056         }
 2057 
 2058         mtx_unlock(&q->lock);
 2059         MTX_DESTROY(&q->lock);
 2060         for (i = 0; i < SGE_TXQ_PER_SET; i++) {
 2061                 if (q->txq[i].desc) {
 2062                         mtx_lock_spin(&sc->sge.reg_lock);
 2063                         t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
 2064                         mtx_unlock_spin(&sc->sge.reg_lock);
 2065                         bus_dmamap_unload(q->txq[i].desc_tag,
 2066                                         q->txq[i].desc_map);
 2067                         bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
 2068                                         q->txq[i].desc_map);
 2069                         bus_dma_tag_destroy(q->txq[i].desc_tag);
 2070                         bus_dma_tag_destroy(q->txq[i].entry_tag);
 2071                 }
 2072                 if (q->txq[i].sdesc) {
 2073                         free(q->txq[i].sdesc, M_DEVBUF);
 2074                 }
 2075         }
 2076 
 2077         if (q->rspq.desc) {
 2078                 mtx_lock_spin(&sc->sge.reg_lock);
 2079                 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
 2080                 mtx_unlock_spin(&sc->sge.reg_lock);
 2081                 
 2082                 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
 2083                 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
 2084                                 q->rspq.desc_map);
 2085                 bus_dma_tag_destroy(q->rspq.desc_tag);
 2086                 MTX_DESTROY(&q->rspq.lock);
 2087         }
 2088 
 2089 #ifdef LRO_SUPPORTED
 2090         tcp_lro_free(&q->lro.ctrl);
 2091 #endif
 2092 
 2093         bzero(q, sizeof(*q));
 2094 }
 2095 
 2096 /**
 2097  *      t3_free_sge_resources - free SGE resources
 2098  *      @sc: the adapter softc
 2099  *
 2100  *      Frees resources used by the SGE queue sets.
 2101  */
 2102 void
 2103 t3_free_sge_resources(adapter_t *sc)
 2104 {
 2105         int i, nqsets;
 2106         
 2107         for (nqsets = i = 0; i < (sc)->params.nports; i++) 
 2108                 nqsets += sc->port[i].nqsets;
 2109 
 2110         for (i = 0; i < nqsets; ++i) {
 2111                 TXQ_LOCK(&sc->sge.qs[i]);
 2112                 t3_free_qset(sc, &sc->sge.qs[i]);
 2113         }
 2114         
 2115 }
 2116 
 2117 /**
 2118  *      t3_sge_start - enable SGE
 2119  *      @sc: the controller softc
 2120  *
 2121  *      Enables the SGE for DMAs.  This is the last step in starting packet
 2122  *      transfers.
 2123  */
 2124 void
 2125 t3_sge_start(adapter_t *sc)
 2126 {
 2127         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
 2128 }
 2129 
 2130 /**
 2131  *      t3_sge_stop - disable SGE operation
 2132  *      @sc: the adapter
 2133  *
 2134  *      Disables the DMA engine.  This can be called in emeregencies (e.g.,
 2135  *      from error interrupts) or from normal process context.  In the latter
 2136  *      case it also disables any pending queue restart tasklets.  Note that
 2137  *      if it is called in interrupt context it cannot disable the restart
 2138  *      tasklets as it cannot wait, however the tasklets will have no effect
 2139  *      since the doorbells are disabled and the driver will call this again
 2140  *      later from process context, at which time the tasklets will be stopped
 2141  *      if they are still running.
 2142  */
 2143 void
 2144 t3_sge_stop(adapter_t *sc)
 2145 {
 2146         int i, nqsets;
 2147         
 2148         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
 2149 
 2150         if (sc->tq == NULL)
 2151                 return;
 2152         
 2153         for (nqsets = i = 0; i < (sc)->params.nports; i++) 
 2154                 nqsets += sc->port[i].nqsets;
 2155 #ifdef notyet
 2156         /*
 2157          * 
 2158          * XXX
 2159          */
 2160         for (i = 0; i < nqsets; ++i) {
 2161                 struct sge_qset *qs = &sc->sge.qs[i];
 2162                 
 2163                 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2164                 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2165         }
 2166 #endif
 2167 }
 2168 
 2169 /**
 2170  *      t3_free_tx_desc - reclaims Tx descriptors and their buffers
 2171  *      @adapter: the adapter
 2172  *      @q: the Tx queue to reclaim descriptors from
 2173  *      @reclaimable: the number of descriptors to reclaim
 2174  *      @m_vec_size: maximum number of buffers to reclaim
 2175  *      @desc_reclaimed: returns the number of descriptors reclaimed
 2176  *
 2177  *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 2178  *      Tx buffers.  Called with the Tx queue lock held.
 2179  *
 2180  *      Returns number of buffers of reclaimed   
 2181  */
 2182 void
 2183 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
 2184 {
 2185         struct tx_sw_desc *txsd;
 2186         unsigned int cidx, mask;
 2187         struct sge_txq *q = &qs->txq[queue];
 2188 
 2189 #ifdef T3_TRACE
 2190         T3_TRACE2(sc->tb[q->cntxt_id & 7],
 2191                   "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
 2192 #endif
 2193         cidx = q->cidx;
 2194         mask = q->size - 1;
 2195         txsd = &q->sdesc[cidx];
 2196 
 2197         mtx_assert(&qs->lock, MA_OWNED);
 2198         while (reclaimable--) {
 2199                 prefetch(q->sdesc[(cidx + 1) & mask].m);
 2200                 prefetch(q->sdesc[(cidx + 2) & mask].m);
 2201 
 2202                 if (txsd->m != NULL) {
 2203                         if (txsd->flags & TX_SW_DESC_MAPPED) {
 2204                                 bus_dmamap_unload(q->entry_tag, txsd->map);
 2205                                 txsd->flags &= ~TX_SW_DESC_MAPPED;
 2206                         }
 2207                         m_freem_list(txsd->m);
 2208                         txsd->m = NULL;
 2209                 } else
 2210                         q->txq_skipped++;
 2211                 
 2212                 ++txsd;
 2213                 if (++cidx == q->size) {
 2214                         cidx = 0;
 2215                         txsd = q->sdesc;
 2216                 }
 2217         }
 2218         q->cidx = cidx;
 2219 
 2220 }
 2221 
 2222 /**
 2223  *      is_new_response - check if a response is newly written
 2224  *      @r: the response descriptor
 2225  *      @q: the response queue
 2226  *
 2227  *      Returns true if a response descriptor contains a yet unprocessed
 2228  *      response.
 2229  */
 2230 static __inline int
 2231 is_new_response(const struct rsp_desc *r,
 2232     const struct sge_rspq *q)
 2233 {
 2234         return (r->intr_gen & F_RSPD_GEN2) == q->gen;
 2235 }
 2236 
 2237 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
 2238 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
 2239                         V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
 2240                         V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
 2241                         V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
 2242 
 2243 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
 2244 #define NOMEM_INTR_DELAY 2500
 2245 
 2246 /**
 2247  *      write_ofld_wr - write an offload work request
 2248  *      @adap: the adapter
 2249  *      @m: the packet to send
 2250  *      @q: the Tx queue
 2251  *      @pidx: index of the first Tx descriptor to write
 2252  *      @gen: the generation value to use
 2253  *      @ndesc: number of descriptors the packet will occupy
 2254  *
 2255  *      Write an offload work request to send the supplied packet.  The packet
 2256  *      data already carry the work request with most fields populated.
 2257  */
 2258 static void
 2259 write_ofld_wr(adapter_t *adap, struct mbuf *m,
 2260     struct sge_txq *q, unsigned int pidx,
 2261     unsigned int gen, unsigned int ndesc,
 2262     bus_dma_segment_t *segs, unsigned int nsegs)
 2263 {
 2264         unsigned int sgl_flits, flits;
 2265         struct work_request_hdr *from;
 2266         struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
 2267         struct tx_desc *d = &q->desc[pidx];
 2268         struct txq_state txqs;
 2269         
 2270         if (immediate(m) && nsegs == 0) {
 2271                 write_imm(d, m, m->m_len, gen);
 2272                 return;
 2273         }
 2274 
 2275         /* Only TX_DATA builds SGLs */
 2276         from = mtod(m, struct work_request_hdr *);
 2277         memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
 2278 
 2279         flits = m->m_len / 8;
 2280         sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
 2281 
 2282         make_sgl(sgp, segs, nsegs);
 2283         sgl_flits = sgl_len(nsegs);
 2284 
 2285         txqs.gen = gen;
 2286         txqs.pidx = pidx;
 2287         txqs.compl = 0;
 2288 
 2289         write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
 2290             from->wrh_hi, from->wrh_lo);
 2291 }
 2292 
 2293 /**
 2294  *      calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
 2295  *      @m: the packet
 2296  *
 2297  *      Returns the number of Tx descriptors needed for the given offload
 2298  *      packet.  These packets are already fully constructed.
 2299  */
 2300 static __inline unsigned int
 2301 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
 2302 {
 2303         unsigned int flits, cnt = 0;
 2304         int ndescs;
 2305 
 2306         if (m->m_len <= WR_LEN && nsegs == 0)
 2307                 return (1);                 /* packet fits as immediate data */
 2308 
 2309         /*
 2310          * This needs to be re-visited for TOE
 2311          */
 2312 
 2313         cnt = nsegs;
 2314                 
 2315         /* headers */
 2316         flits = m->m_len / 8;
 2317 
 2318         ndescs = flits_to_desc(flits + sgl_len(cnt));
 2319 
 2320         return (ndescs);
 2321 }
 2322 
 2323 /**
 2324  *      ofld_xmit - send a packet through an offload queue
 2325  *      @adap: the adapter
 2326  *      @q: the Tx offload queue
 2327  *      @m: the packet
 2328  *
 2329  *      Send an offload packet through an SGE offload queue.
 2330  */
 2331 static int
 2332 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
 2333 {
 2334         int ret, nsegs;
 2335         unsigned int ndesc;
 2336         unsigned int pidx, gen;
 2337         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2338         bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
 2339         struct tx_sw_desc *stx;
 2340 
 2341         nsegs = m_get_sgllen(m);
 2342         vsegs = m_get_sgl(m);
 2343         ndesc = calc_tx_descs_ofld(m, nsegs);
 2344         busdma_map_sgl(vsegs, segs, nsegs);
 2345 
 2346         stx = &q->sdesc[q->pidx];
 2347         
 2348         TXQ_LOCK(qs);
 2349 again:  reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2350         ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
 2351         if (__predict_false(ret)) {
 2352                 if (ret == 1) {
 2353                         printf("no ofld desc avail\n");
 2354                         
 2355                         m_set_priority(m, ndesc);     /* save for restart */
 2356                         TXQ_UNLOCK(qs);
 2357                         return (EINTR);
 2358                 }
 2359                 goto again;
 2360         }
 2361 
 2362         gen = q->gen;
 2363         q->in_use += ndesc;
 2364         pidx = q->pidx;
 2365         q->pidx += ndesc;
 2366         if (q->pidx >= q->size) {
 2367                 q->pidx -= q->size;
 2368                 q->gen ^= 1;
 2369         }
 2370 #ifdef T3_TRACE
 2371         T3_TRACE5(adap->tb[q->cntxt_id & 7],
 2372                   "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
 2373                   ndesc, pidx, skb->len, skb->len - skb->data_len,
 2374                   skb_shinfo(skb)->nr_frags);
 2375 #endif
 2376         TXQ_UNLOCK(qs);
 2377 
 2378         write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
 2379         check_ring_tx_db(adap, q);
 2380         return (0);
 2381 }
 2382 
 2383 /**
 2384  *      restart_offloadq - restart a suspended offload queue
 2385  *      @qs: the queue set cotaining the offload queue
 2386  *
 2387  *      Resumes transmission on a suspended Tx offload queue.
 2388  */
 2389 static void
 2390 restart_offloadq(void *data, int npending)
 2391 {
 2392         struct mbuf *m;
 2393         struct sge_qset *qs = data;
 2394         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 2395         adapter_t *adap = qs->port->adapter;
 2396         bus_dma_segment_t segs[TX_MAX_SEGS];
 2397         struct tx_sw_desc *stx = &q->sdesc[q->pidx];
 2398         int nsegs, cleaned;
 2399                 
 2400         TXQ_LOCK(qs);
 2401 again:  cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
 2402 
 2403         while ((m = mbufq_peek(&q->sendq)) != NULL) {
 2404                 unsigned int gen, pidx;
 2405                 unsigned int ndesc = m_get_priority(m);
 2406 
 2407                 if (__predict_false(q->size - q->in_use < ndesc)) {
 2408                         setbit(&qs->txq_stopped, TXQ_OFLD);
 2409                         if (should_restart_tx(q) &&
 2410                             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
 2411                                 goto again;
 2412                         q->stops++;
 2413                         break;
 2414                 }
 2415 
 2416                 gen = q->gen;
 2417                 q->in_use += ndesc;
 2418                 pidx = q->pidx;
 2419                 q->pidx += ndesc;
 2420                 if (q->pidx >= q->size) {
 2421                         q->pidx -= q->size;
 2422                         q->gen ^= 1;
 2423                 }
 2424                 
 2425                 (void)mbufq_dequeue(&q->sendq);
 2426                 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
 2427                 TXQ_UNLOCK(qs);
 2428                 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
 2429                 TXQ_LOCK(qs);
 2430         }
 2431 #if USE_GTS
 2432         set_bit(TXQ_RUNNING, &q->flags);
 2433         set_bit(TXQ_LAST_PKT_DB, &q->flags);
 2434 #endif
 2435         TXQ_UNLOCK(qs);
 2436         wmb();
 2437         t3_write_reg(adap, A_SG_KDOORBELL,
 2438                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 2439 }
 2440 
 2441 /**
 2442  *      queue_set - return the queue set a packet should use
 2443  *      @m: the packet
 2444  *
 2445  *      Maps a packet to the SGE queue set it should use.  The desired queue
 2446  *      set is carried in bits 1-3 in the packet's priority.
 2447  */
 2448 static __inline int
 2449 queue_set(const struct mbuf *m)
 2450 {
 2451         return m_get_priority(m) >> 1;
 2452 }
 2453 
 2454 /**
 2455  *      is_ctrl_pkt - return whether an offload packet is a control packet
 2456  *      @m: the packet
 2457  *
 2458  *      Determines whether an offload packet should use an OFLD or a CTRL
 2459  *      Tx queue.  This is indicated by bit 0 in the packet's priority.
 2460  */
 2461 static __inline int
 2462 is_ctrl_pkt(const struct mbuf *m)
 2463 {
 2464         return m_get_priority(m) & 1;
 2465 }
 2466 
 2467 /**
 2468  *      t3_offload_tx - send an offload packet
 2469  *      @tdev: the offload device to send to
 2470  *      @m: the packet
 2471  *
 2472  *      Sends an offload packet.  We use the packet priority to select the
 2473  *      appropriate Tx queue as follows: bit 0 indicates whether the packet
 2474  *      should be sent as regular or control, bits 1-3 select the queue set.
 2475  */
 2476 int
 2477 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
 2478 {
 2479         adapter_t *adap = tdev2adap(tdev);
 2480         struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
 2481 
 2482         if (__predict_false(is_ctrl_pkt(m))) 
 2483                 return ctrl_xmit(adap, qs, m);
 2484 
 2485         return ofld_xmit(adap, qs, m);
 2486 }
 2487 
 2488 /**
 2489  *      deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
 2490  *      @tdev: the offload device that will be receiving the packets
 2491  *      @q: the SGE response queue that assembled the bundle
 2492  *      @m: the partial bundle
 2493  *      @n: the number of packets in the bundle
 2494  *
 2495  *      Delivers a (partial) bundle of Rx offload packets to an offload device.
 2496  */
 2497 static __inline void
 2498 deliver_partial_bundle(struct t3cdev *tdev,
 2499                         struct sge_rspq *q,
 2500                         struct mbuf *mbufs[], int n)
 2501 {
 2502         if (n) {
 2503                 q->offload_bundles++;
 2504                 cxgb_ofld_recv(tdev, mbufs, n);
 2505         }
 2506 }
 2507 
 2508 static __inline int
 2509 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
 2510     struct mbuf *m, struct mbuf *rx_gather[],
 2511     unsigned int gather_idx)
 2512 {
 2513         
 2514         rq->offload_pkts++;
 2515         m->m_pkthdr.header = mtod(m, void *);
 2516         rx_gather[gather_idx++] = m;
 2517         if (gather_idx == RX_BUNDLE_SIZE) {
 2518                 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
 2519                 gather_idx = 0;
 2520                 rq->offload_bundles++;
 2521         }
 2522         return (gather_idx);
 2523 }
 2524 
 2525 static void
 2526 restart_tx(struct sge_qset *qs)
 2527 {
 2528         struct adapter *sc = qs->port->adapter;
 2529         
 2530         
 2531         if (isset(&qs->txq_stopped, TXQ_OFLD) &&
 2532             should_restart_tx(&qs->txq[TXQ_OFLD]) &&
 2533             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
 2534                 qs->txq[TXQ_OFLD].restarts++;
 2535                 DPRINTF("restarting TXQ_OFLD\n");
 2536                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2537         }
 2538         DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
 2539             qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
 2540             qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
 2541             qs->txq[TXQ_CTRL].in_use);
 2542         
 2543         if (isset(&qs->txq_stopped, TXQ_CTRL) &&
 2544             should_restart_tx(&qs->txq[TXQ_CTRL]) &&
 2545             test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
 2546                 qs->txq[TXQ_CTRL].restarts++;
 2547                 DPRINTF("restarting TXQ_CTRL\n");
 2548                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2549         }
 2550 }
 2551 
 2552 /**
 2553  *      t3_sge_alloc_qset - initialize an SGE queue set
 2554  *      @sc: the controller softc
 2555  *      @id: the queue set id
 2556  *      @nports: how many Ethernet ports will be using this queue set
 2557  *      @irq_vec_idx: the IRQ vector index for response queue interrupts
 2558  *      @p: configuration parameters for this queue set
 2559  *      @ntxq: number of Tx queues for the queue set
 2560  *      @pi: port info for queue set
 2561  *
 2562  *      Allocate resources and initialize an SGE queue set.  A queue set
 2563  *      comprises a response queue, two Rx free-buffer queues, and up to 3
 2564  *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
 2565  *      queue, offload queue, and control queue.
 2566  */
 2567 int
 2568 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
 2569                   const struct qset_params *p, int ntxq, struct port_info *pi)
 2570 {
 2571         struct sge_qset *q = &sc->sge.qs[id];
 2572         int i, ret = 0;
 2573 
 2574         MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
 2575         q->port = pi;
 2576 
 2577         for (i = 0; i < SGE_TXQ_PER_SET; i++) {
 2578                 
 2579                 if ((q->txq[i].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
 2580                             M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
 2581                         device_printf(sc->dev, "failed to allocate mbuf ring\n");
 2582                         goto err;
 2583                 }
 2584                 if ((q->txq[i].txq_ifq =
 2585                         malloc(sizeof(struct ifaltq), M_DEVBUF, M_NOWAIT|M_ZERO))
 2586                     == NULL) {
 2587                         device_printf(sc->dev, "failed to allocate ifq\n");
 2588                         goto err;
 2589                 }
 2590                 ifq_init(q->txq[i].txq_ifq, pi->ifp);   
 2591                 callout_init(&q->txq[i].txq_timer, 1);
 2592                 callout_init(&q->txq[i].txq_watchdog, 1);
 2593                 q->txq[i].txq_timer.c_cpu = id % mp_ncpus;
 2594                 q->txq[i].txq_watchdog.c_cpu = id % mp_ncpus;
 2595         }
 2596         init_qset_cntxt(q, id);
 2597         q->idx = id;
 2598         if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
 2599                     sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
 2600                     &q->fl[0].desc, &q->fl[0].sdesc,
 2601                     &q->fl[0].desc_tag, &q->fl[0].desc_map,
 2602                     sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
 2603                 printf("error %d from alloc ring fl0\n", ret);
 2604                 goto err;
 2605         }
 2606 
 2607         if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
 2608                     sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
 2609                     &q->fl[1].desc, &q->fl[1].sdesc,
 2610                     &q->fl[1].desc_tag, &q->fl[1].desc_map,
 2611                     sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
 2612                 printf("error %d from alloc ring fl1\n", ret);
 2613                 goto err;
 2614         }
 2615 
 2616         if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
 2617                     &q->rspq.phys_addr, &q->rspq.desc, NULL,
 2618                     &q->rspq.desc_tag, &q->rspq.desc_map,
 2619                     NULL, NULL)) != 0) {
 2620                 printf("error %d from alloc ring rspq\n", ret);
 2621                 goto err;
 2622         }
 2623 
 2624         for (i = 0; i < ntxq; ++i) {
 2625                 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
 2626 
 2627                 if ((ret = alloc_ring(sc, p->txq_size[i],
 2628                             sizeof(struct tx_desc), sz,
 2629                             &q->txq[i].phys_addr, &q->txq[i].desc,
 2630                             &q->txq[i].sdesc, &q->txq[i].desc_tag,
 2631                             &q->txq[i].desc_map,
 2632                             sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
 2633                         printf("error %d from alloc ring tx %i\n", ret, i);
 2634                         goto err;
 2635                 }
 2636                 mbufq_init(&q->txq[i].sendq);
 2637                 q->txq[i].gen = 1;
 2638                 q->txq[i].size = p->txq_size[i];
 2639         }
 2640         
 2641         TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
 2642         TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
 2643         TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2644         TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
 2645 
 2646         q->fl[0].gen = q->fl[1].gen = 1;
 2647         q->fl[0].size = p->fl_size;
 2648         q->fl[1].size = p->jumbo_size;
 2649 
 2650         q->rspq.gen = 1;
 2651         q->rspq.cidx = 0;
 2652         q->rspq.size = p->rspq_size;
 2653 
 2654         q->txq[TXQ_ETH].stop_thres = nports *
 2655             flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
 2656 
 2657         q->fl[0].buf_size = MCLBYTES;
 2658         q->fl[0].zone = zone_pack;
 2659         q->fl[0].type = EXT_PACKET;
 2660 #if __FreeBSD_version > 800000
 2661         if (cxgb_use_16k_clusters) {            
 2662                 q->fl[1].buf_size = MJUM16BYTES;
 2663                 q->fl[1].zone = zone_jumbo16;
 2664                 q->fl[1].type = EXT_JUMBO16;
 2665         } else {
 2666                 q->fl[1].buf_size = MJUM9BYTES;
 2667                 q->fl[1].zone = zone_jumbo9;
 2668                 q->fl[1].type = EXT_JUMBO9;             
 2669         }
 2670 #else
 2671         q->fl[1].buf_size = MJUMPAGESIZE;
 2672         q->fl[1].zone = zone_jumbop;
 2673         q->fl[1].type = EXT_JUMBOP;
 2674 #endif
 2675 
 2676 #ifdef LRO_SUPPORTED
 2677         /* Allocate and setup the lro_ctrl structure */
 2678         q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
 2679         ret = tcp_lro_init(&q->lro.ctrl);
 2680         if (ret) {
 2681                 printf("error %d from tcp_lro_init\n", ret);
 2682                 goto err;
 2683         }
 2684         q->lro.ctrl.ifp = pi->ifp;
 2685 #endif
 2686 
 2687         mtx_lock_spin(&sc->sge.reg_lock);
 2688         ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
 2689                                    q->rspq.phys_addr, q->rspq.size,
 2690                                    q->fl[0].buf_size, 1, 0);
 2691         if (ret) {
 2692                 printf("error %d from t3_sge_init_rspcntxt\n", ret);
 2693                 goto err_unlock;
 2694         }
 2695 
 2696         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2697                 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
 2698                                           q->fl[i].phys_addr, q->fl[i].size,
 2699                                           q->fl[i].buf_size, p->cong_thres, 1,
 2700                                           0);
 2701                 if (ret) {
 2702                         printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
 2703                         goto err_unlock;
 2704                 }
 2705         }
 2706 
 2707         ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
 2708                                  SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
 2709                                  q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
 2710                                  1, 0);
 2711         if (ret) {
 2712                 printf("error %d from t3_sge_init_ecntxt\n", ret);
 2713                 goto err_unlock;
 2714         }
 2715 
 2716         if (ntxq > 1) {
 2717                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
 2718                                          USE_GTS, SGE_CNTXT_OFLD, id,
 2719                                          q->txq[TXQ_OFLD].phys_addr,
 2720                                          q->txq[TXQ_OFLD].size, 0, 1, 0);
 2721                 if (ret) {
 2722                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2723                         goto err_unlock;
 2724                 }
 2725         }
 2726 
 2727         if (ntxq > 2) {
 2728                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
 2729                                          SGE_CNTXT_CTRL, id,
 2730                                          q->txq[TXQ_CTRL].phys_addr,
 2731                                          q->txq[TXQ_CTRL].size,
 2732                                          q->txq[TXQ_CTRL].token, 1, 0);
 2733                 if (ret) {
 2734                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2735                         goto err_unlock;
 2736                 }
 2737         }
 2738         
 2739         snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
 2740             device_get_unit(sc->dev), irq_vec_idx);
 2741         MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
 2742         
 2743         mtx_unlock_spin(&sc->sge.reg_lock);
 2744         t3_update_qset_coalesce(q, p);
 2745         q->port = pi;
 2746         
 2747         refill_fl(sc, &q->fl[0], q->fl[0].size);
 2748         refill_fl(sc, &q->fl[1], q->fl[1].size);
 2749         refill_rspq(sc, &q->rspq, q->rspq.size - 1);
 2750 
 2751         t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
 2752                      V_NEWTIMER(q->rspq.holdoff_tmr));
 2753 
 2754         return (0);
 2755 
 2756 err_unlock:
 2757         mtx_unlock_spin(&sc->sge.reg_lock);
 2758 err:    
 2759         TXQ_LOCK(q);
 2760         t3_free_qset(sc, q);
 2761 
 2762         return (ret);
 2763 }
 2764 
 2765 /*
 2766  * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
 2767  * ethernet data.  Hardware assistance with various checksums and any vlan tag
 2768  * will also be taken into account here.
 2769  */
 2770 void
 2771 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
 2772 {
 2773         struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
 2774         struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
 2775         struct ifnet *ifp = pi->ifp;
 2776         
 2777         DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
 2778 
 2779         if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
 2780             cpl->csum_valid && cpl->csum == 0xffff) {
 2781                 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
 2782                 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
 2783                 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
 2784                 m->m_pkthdr.csum_data = 0xffff;
 2785         }
 2786         /* 
 2787          * XXX need to add VLAN support for 6.x
 2788          */
 2789 #ifdef VLAN_SUPPORTED
 2790         if (__predict_false(cpl->vlan_valid)) {
 2791                 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
 2792                 m->m_flags |= M_VLANTAG;
 2793         } 
 2794 #endif
 2795         
 2796         m->m_pkthdr.rcvif = ifp;
 2797         m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
 2798         /*
 2799          * adjust after conversion to mbuf chain
 2800          */
 2801         m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
 2802         m->m_len -= (sizeof(*cpl) + ethpad);
 2803         m->m_data += (sizeof(*cpl) + ethpad);
 2804 }
 2805 
 2806 /**
 2807  *      get_packet - return the next ingress packet buffer from a free list
 2808  *      @adap: the adapter that received the packet
 2809  *      @drop_thres: # of remaining buffers before we start dropping packets
 2810  *      @qs: the qset that the SGE free list holding the packet belongs to
 2811  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
 2812  *      @r: response descriptor 
 2813  *
 2814  *      Get the next packet from a free list and complete setup of the
 2815  *      sk_buff.  If the packet is small we make a copy and recycle the
 2816  *      original buffer, otherwise we use the original buffer itself.  If a
 2817  *      positive drop threshold is supplied packets are dropped and their
 2818  *      buffers recycled if (a) the number of remaining buffers is under the
 2819  *      threshold and the packet is too big to copy, or (b) the packet should
 2820  *      be copied but there is no memory for the copy.
 2821  */
 2822 static int
 2823 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
 2824     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
 2825 {
 2826 
 2827         unsigned int len_cq =  ntohl(r->len_cq);
 2828         struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
 2829         int mask, cidx = fl->cidx;
 2830         struct rx_sw_desc *sd = &fl->sdesc[cidx];
 2831         uint32_t len = G_RSPD_LEN(len_cq);
 2832         uint32_t flags = M_EXT;
 2833         uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
 2834         caddr_t cl;
 2835         struct mbuf *m;
 2836         int ret = 0;
 2837 
 2838         mask = fl->size - 1;
 2839         prefetch(fl->sdesc[(cidx + 1) & mask].m);
 2840         prefetch(fl->sdesc[(cidx + 2) & mask].m);
 2841         prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
 2842         prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 
 2843 
 2844         fl->credits--;
 2845         bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
 2846         
 2847         if (recycle_enable && len <= SGE_RX_COPY_THRES &&
 2848             sopeop == RSPQ_SOP_EOP) {
 2849                 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
 2850                         goto skip_recycle;
 2851                 cl = mtod(m, void *);
 2852                 memcpy(cl, sd->rxsd_cl, len);
 2853                 recycle_rx_buf(adap, fl, fl->cidx);
 2854                 m->m_pkthdr.len = m->m_len = len;
 2855                 m->m_flags = 0;
 2856                 mh->mh_head = mh->mh_tail = m;
 2857                 ret = 1;
 2858                 goto done;
 2859         } else {
 2860         skip_recycle:
 2861                 bus_dmamap_unload(fl->entry_tag, sd->map);
 2862                 cl = sd->rxsd_cl;
 2863                 m = sd->m;
 2864 
 2865                 if ((sopeop == RSPQ_SOP_EOP) ||
 2866                     (sopeop == RSPQ_SOP))
 2867                         flags |= M_PKTHDR;
 2868                 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags);
 2869                 if (fl->zone == zone_pack) {
 2870                         /*
 2871                          * restore clobbered data pointer
 2872                          */
 2873                         m->m_data = m->m_ext.ext_buf;
 2874                 } else {
 2875                         m_cljset(m, cl, fl->type);
 2876                 }
 2877                 m->m_len = len;
 2878         }               
 2879         switch(sopeop) {
 2880         case RSPQ_SOP_EOP:
 2881                 ret = 1;
 2882                 /* FALLTHROUGH */
 2883         case RSPQ_SOP:
 2884                 mh->mh_head = mh->mh_tail = m;
 2885                 m->m_pkthdr.len = len;
 2886                 break;
 2887         case RSPQ_EOP:
 2888                 ret = 1;
 2889                 /* FALLTHROUGH */
 2890         case RSPQ_NSOP_NEOP:
 2891                 if (mh->mh_tail == NULL) {
 2892                         log(LOG_ERR, "discarding intermediate descriptor entry\n");
 2893                         m_freem(m);
 2894                         break;
 2895                 }
 2896                 mh->mh_tail->m_next = m;
 2897                 mh->mh_tail = m;
 2898                 mh->mh_head->m_pkthdr.len += len;
 2899                 break;
 2900         }
 2901         if (cxgb_debug)
 2902                 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
 2903 done:
 2904         if (++fl->cidx == fl->size)
 2905                 fl->cidx = 0;
 2906 
 2907         return (ret);
 2908 }
 2909 
 2910 /**
 2911  *      handle_rsp_cntrl_info - handles control information in a response
 2912  *      @qs: the queue set corresponding to the response
 2913  *      @flags: the response control flags
 2914  *
 2915  *      Handles the control information of an SGE response, such as GTS
 2916  *      indications and completion credits for the queue set's Tx queues.
 2917  *      HW coalesces credits, we don't do any extra SW coalescing.
 2918  */
 2919 static __inline void
 2920 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
 2921 {
 2922         unsigned int credits;
 2923 
 2924 #if USE_GTS
 2925         if (flags & F_RSPD_TXQ0_GTS)
 2926                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
 2927 #endif
 2928         credits = G_RSPD_TXQ0_CR(flags);
 2929         if (credits) 
 2930                 qs->txq[TXQ_ETH].processed += credits;
 2931         
 2932         credits = G_RSPD_TXQ2_CR(flags);
 2933         if (credits) 
 2934                 qs->txq[TXQ_CTRL].processed += credits;
 2935 
 2936 # if USE_GTS
 2937         if (flags & F_RSPD_TXQ1_GTS)
 2938                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
 2939 # endif
 2940         credits = G_RSPD_TXQ1_CR(flags);
 2941         if (credits)
 2942                 qs->txq[TXQ_OFLD].processed += credits;
 2943 
 2944 }
 2945 
 2946 static void
 2947 check_ring_db(adapter_t *adap, struct sge_qset *qs,
 2948     unsigned int sleeping)
 2949 {
 2950         ;
 2951 }
 2952 
 2953 /**
 2954  *      process_responses - process responses from an SGE response queue
 2955  *      @adap: the adapter
 2956  *      @qs: the queue set to which the response queue belongs
 2957  *      @budget: how many responses can be processed in this round
 2958  *
 2959  *      Process responses from an SGE response queue up to the supplied budget.
 2960  *      Responses include received packets as well as credits and other events
 2961  *      for the queues that belong to the response queue's queue set.
 2962  *      A negative budget is effectively unlimited.
 2963  *
 2964  *      Additionally choose the interrupt holdoff time for the next interrupt
 2965  *      on this queue.  If the system is under memory shortage use a fairly
 2966  *      long delay to help recovery.
 2967  */
 2968 static int
 2969 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
 2970 {
 2971         struct sge_rspq *rspq = &qs->rspq;
 2972         struct rsp_desc *r = &rspq->desc[rspq->cidx];
 2973         int budget_left = budget;
 2974         unsigned int sleeping = 0;
 2975 #ifdef LRO_SUPPORTED
 2976         int lro_enabled = qs->lro.enabled;
 2977         int skip_lro;
 2978         struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
 2979 #endif
 2980         struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
 2981         int ngathered = 0;
 2982 #ifdef DEBUG    
 2983         static int last_holdoff = 0;
 2984         if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
 2985                 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
 2986                 last_holdoff = rspq->holdoff_tmr;
 2987         }
 2988 #endif
 2989         rspq->next_holdoff = rspq->holdoff_tmr;
 2990 
 2991         while (__predict_true(budget_left && is_new_response(r, rspq))) {
 2992                 int eth, eop = 0, ethpad = 0;
 2993                 uint32_t flags = ntohl(r->flags);
 2994                 uint32_t rss_csum = *(const uint32_t *)r;
 2995                 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
 2996                 
 2997                 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
 2998                 
 2999                 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
 3000                         struct mbuf *m;
 3001 
 3002                         if (cxgb_debug)
 3003                                 printf("async notification\n");
 3004 
 3005                         if (rspq->rspq_mh.mh_head == NULL) {
 3006                                 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
 3007                                 m = rspq->rspq_mh.mh_head;
 3008                         } else {
 3009                                 m = m_gethdr(M_DONTWAIT, MT_DATA);
 3010                         }
 3011                         if (m == NULL)
 3012                                 goto no_mem;
 3013 
 3014                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
 3015                         m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
 3016                         *mtod(m, char *) = CPL_ASYNC_NOTIF;
 3017                         rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
 3018                         eop = 1;
 3019                         rspq->async_notif++;
 3020                         goto skip;
 3021                 } else if  (flags & F_RSPD_IMM_DATA_VALID) {
 3022                         struct mbuf *m = NULL;
 3023 
 3024                         DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
 3025                             r->rss_hdr.opcode, rspq->cidx);
 3026                         if (rspq->rspq_mh.mh_head == NULL)
 3027                                 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
 3028                         else 
 3029                                 m = m_gethdr(M_DONTWAIT, MT_DATA);
 3030 
 3031                         if (rspq->rspq_mh.mh_head == NULL &&  m == NULL) {      
 3032                 no_mem:
 3033                                 rspq->next_holdoff = NOMEM_INTR_DELAY;
 3034                                 budget_left--;
 3035                                 break;
 3036                         }
 3037                         get_imm_packet(adap, r, rspq->rspq_mh.mh_head);
 3038                         eop = 1;
 3039                         rspq->imm_data++;
 3040                 } else if (r->len_cq) {
 3041                         int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
 3042                         
 3043                         eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
 3044                         if (eop) {
 3045                                 rspq->rspq_mh.mh_head->m_flags |= M_FLOWID;
 3046                                 rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash;
 3047                         }
 3048                         
 3049                         ethpad = 2;
 3050                 } else {
 3051                         rspq->pure_rsps++;
 3052                 }
 3053         skip:
 3054                 if (flags & RSPD_CTRL_MASK) {
 3055                         sleeping |= flags & RSPD_GTS_MASK;
 3056                         handle_rsp_cntrl_info(qs, flags);
 3057                 }
 3058 
 3059                 r++;
 3060                 if (__predict_false(++rspq->cidx == rspq->size)) {
 3061                         rspq->cidx = 0;
 3062                         rspq->gen ^= 1;
 3063                         r = rspq->desc;
 3064                 }
 3065 
 3066                 if (++rspq->credits >= (rspq->size / 4)) {
 3067                         refill_rspq(adap, rspq, rspq->credits);
 3068                         rspq->credits = 0;
 3069                 }
 3070                 if (!eth && eop) {
 3071                         rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
 3072                         /*
 3073                          * XXX size mismatch
 3074                          */
 3075                         m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
 3076 
 3077                         
 3078                         ngathered = rx_offload(&adap->tdev, rspq,
 3079                             rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
 3080                         rspq->rspq_mh.mh_head = NULL;
 3081                         DPRINTF("received offload packet\n");
 3082                         
 3083                 } else if (eth && eop) {
 3084                         struct mbuf *m = rspq->rspq_mh.mh_head;
 3085 
 3086                         t3_rx_eth(adap, rspq, m, ethpad);
 3087 
 3088 #ifdef LRO_SUPPORTED
 3089                         /*
 3090                          * The T304 sends incoming packets on any qset.  If LRO
 3091                          * is also enabled, we could end up sending packet up
 3092                          * lro_ctrl->ifp's input.  That is incorrect.
 3093                          *
 3094                          * The mbuf's rcvif was derived from the cpl header and
 3095                          * is accurate.  Skip LRO and just use that.
 3096                          */
 3097                         skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
 3098 
 3099                         if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro &&
 3100                             (tcp_lro_rx(lro_ctrl, m, 0) == 0)) {
 3101                                 /* successfully queue'd for LRO */
 3102                         } else
 3103 #endif
 3104                         {
 3105                                 /*
 3106                                  * LRO not enabled, packet unsuitable for LRO,
 3107                                  * or unable to queue.  Pass it up right now in
 3108                                  * either case.
 3109                                  */
 3110                                 struct ifnet *ifp = m->m_pkthdr.rcvif;
 3111                                 (*ifp->if_input)(ifp, m);
 3112                         }
 3113                         rspq->rspq_mh.mh_head = NULL;
 3114 
 3115                 }
 3116                 __refill_fl_lt(adap, &qs->fl[0], 32);
 3117                 __refill_fl_lt(adap, &qs->fl[1], 32);
 3118                 --budget_left;
 3119         }
 3120 
 3121         deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
 3122 
 3123 #ifdef LRO_SUPPORTED
 3124         /* Flush LRO */
 3125         while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
 3126                 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
 3127                 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
 3128                 tcp_lro_flush(lro_ctrl, queued);
 3129         }
 3130 #endif
 3131 
 3132         if (sleeping)
 3133                 check_ring_db(adap, qs, sleeping);
 3134 
 3135         mb();  /* commit Tx queue processed updates */
 3136         if (__predict_false(qs->txq_stopped > 1)) {
 3137                 printf("restarting tx on %p\n", qs);
 3138                 
 3139                 restart_tx(qs);
 3140         }
 3141         
 3142         __refill_fl_lt(adap, &qs->fl[0], 512);
 3143         __refill_fl_lt(adap, &qs->fl[1], 512);
 3144         budget -= budget_left;
 3145         return (budget);
 3146 }
 3147 
 3148 /*
 3149  * A helper function that processes responses and issues GTS.
 3150  */
 3151 static __inline int
 3152 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
 3153 {
 3154         int work;
 3155         static int last_holdoff = 0;
 3156         
 3157         work = process_responses(adap, rspq_to_qset(rq), -1);
 3158 
 3159         if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
 3160                 printf("next_holdoff=%d\n", rq->next_holdoff);
 3161                 last_holdoff = rq->next_holdoff;
 3162         }
 3163         t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
 3164             V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
 3165         
 3166         return (work);
 3167 }
 3168 
 3169 
 3170 /*
 3171  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
 3172  * Handles data events from SGE response queues as well as error and other
 3173  * async events as they all use the same interrupt pin.  We use one SGE
 3174  * response queue per port in this mode and protect all response queues with
 3175  * queue 0's lock.
 3176  */
 3177 void
 3178 t3b_intr(void *data)
 3179 {
 3180         uint32_t i, map;
 3181         adapter_t *adap = data;
 3182         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3183         
 3184         t3_write_reg(adap, A_PL_CLI, 0);
 3185         map = t3_read_reg(adap, A_SG_DATA_INTR);
 3186 
 3187         if (!map) 
 3188                 return;
 3189 
 3190         if (__predict_false(map & F_ERRINTR))
 3191                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3192 
 3193         mtx_lock(&q0->lock);
 3194         for_each_port(adap, i)
 3195             if (map & (1 << i))
 3196                         process_responses_gts(adap, &adap->sge.qs[i].rspq);
 3197         mtx_unlock(&q0->lock);
 3198 }
 3199 
 3200 /*
 3201  * The MSI interrupt handler.  This needs to handle data events from SGE
 3202  * response queues as well as error and other async events as they all use
 3203  * the same MSI vector.  We use one SGE response queue per port in this mode
 3204  * and protect all response queues with queue 0's lock.
 3205  */
 3206 void
 3207 t3_intr_msi(void *data)
 3208 {
 3209         adapter_t *adap = data;
 3210         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 3211         int i, new_packets = 0;
 3212 
 3213         mtx_lock(&q0->lock);
 3214 
 3215         for_each_port(adap, i)
 3216             if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 
 3217                     new_packets = 1;
 3218         mtx_unlock(&q0->lock);
 3219         if (new_packets == 0)
 3220                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 3221 }
 3222 
 3223 void
 3224 t3_intr_msix(void *data)
 3225 {
 3226         struct sge_qset *qs = data;
 3227         adapter_t *adap = qs->port->adapter;
 3228         struct sge_rspq *rspq = &qs->rspq;
 3229 
 3230         if (process_responses_gts(adap, rspq) == 0)
 3231                 rspq->unhandled_irqs++;
 3232 }
 3233 
 3234 #define QDUMP_SBUF_SIZE         32 * 400
 3235 static int
 3236 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
 3237 {
 3238         struct sge_rspq *rspq;
 3239         struct sge_qset *qs;
 3240         int i, err, dump_end, idx;
 3241         static int multiplier = 1;
 3242         struct sbuf *sb;
 3243         struct rsp_desc *rspd;
 3244         uint32_t data[4];
 3245         
 3246         rspq = arg1;
 3247         qs = rspq_to_qset(rspq);
 3248         if (rspq->rspq_dump_count == 0) 
 3249                 return (0);
 3250         if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
 3251                 log(LOG_WARNING,
 3252                     "dump count is too large %d\n", rspq->rspq_dump_count);
 3253                 rspq->rspq_dump_count = 0;
 3254                 return (EINVAL);
 3255         }
 3256         if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
 3257                 log(LOG_WARNING,
 3258                     "dump start of %d is greater than queue size\n",
 3259                     rspq->rspq_dump_start);
 3260                 rspq->rspq_dump_start = 0;
 3261                 return (EINVAL);
 3262         }
 3263         err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
 3264         if (err)
 3265                 return (err);
 3266 retry_sbufops:
 3267         sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
 3268 
 3269         sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
 3270             (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
 3271             ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
 3272         sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
 3273             ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
 3274         
 3275         sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
 3276             (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
 3277         
 3278         dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
 3279         for (i = rspq->rspq_dump_start; i < dump_end; i++) {
 3280                 idx = i & (RSPQ_Q_SIZE-1);
 3281                 
 3282                 rspd = &rspq->desc[idx];
 3283                 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
 3284                     idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
 3285                     rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
 3286                 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
 3287                     rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
 3288                     be32toh(rspd->len_cq), rspd->intr_gen);
 3289         }
 3290         if (sbuf_overflowed(sb)) {
 3291                 sbuf_delete(sb);
 3292                 multiplier++;
 3293                 goto retry_sbufops;
 3294         }
 3295         sbuf_finish(sb);
 3296         err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
 3297         sbuf_delete(sb);
 3298         return (err);
 3299 }       
 3300 
 3301 static int
 3302 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
 3303 {
 3304         struct sge_txq *txq;
 3305         struct sge_qset *qs;
 3306         int i, j, err, dump_end;
 3307         static int multiplier = 1;
 3308         struct sbuf *sb;
 3309         struct tx_desc *txd;
 3310         uint32_t *WR, wr_hi, wr_lo, gen;
 3311         uint32_t data[4];
 3312         
 3313         txq = arg1;
 3314         qs = txq_to_qset(txq, TXQ_ETH);
 3315         if (txq->txq_dump_count == 0) {
 3316                 return (0);
 3317         }
 3318         if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
 3319                 log(LOG_WARNING,
 3320                     "dump count is too large %d\n", txq->txq_dump_count);
 3321                 txq->txq_dump_count = 1;
 3322                 return (EINVAL);
 3323         }
 3324         if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
 3325                 log(LOG_WARNING,
 3326                     "dump start of %d is greater than queue size\n",
 3327                     txq->txq_dump_start);
 3328                 txq->txq_dump_start = 0;
 3329                 return (EINVAL);
 3330         }
 3331         err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
 3332         if (err)
 3333                 return (err);
 3334         
 3335             
 3336 retry_sbufops:
 3337         sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
 3338 
 3339         sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
 3340             (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 
 3341             (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
 3342         sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
 3343             ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
 3344             ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
 3345         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3346             txq->txq_dump_start,
 3347             (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
 3348 
 3349         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3350         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3351                 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
 3352                 WR = (uint32_t *)txd->flit;
 3353                 wr_hi = ntohl(WR[0]);
 3354                 wr_lo = ntohl(WR[1]);           
 3355                 gen = G_WR_GEN(wr_lo);
 3356                 
 3357                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3358                     wr_hi, wr_lo, gen);
 3359                 for (j = 2; j < 30; j += 4) 
 3360                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3361                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3362 
 3363         }
 3364         if (sbuf_overflowed(sb)) {
 3365                 sbuf_delete(sb);
 3366                 multiplier++;
 3367                 goto retry_sbufops;
 3368         }
 3369         sbuf_finish(sb);
 3370         err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
 3371         sbuf_delete(sb);
 3372         return (err);
 3373 }
 3374 
 3375 static int
 3376 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
 3377 {
 3378         struct sge_txq *txq;
 3379         struct sge_qset *qs;
 3380         int i, j, err, dump_end;
 3381         static int multiplier = 1;
 3382         struct sbuf *sb;
 3383         struct tx_desc *txd;
 3384         uint32_t *WR, wr_hi, wr_lo, gen;
 3385         
 3386         txq = arg1;
 3387         qs = txq_to_qset(txq, TXQ_CTRL);
 3388         if (txq->txq_dump_count == 0) {
 3389                 return (0);
 3390         }
 3391         if (txq->txq_dump_count > 256) {
 3392                 log(LOG_WARNING,
 3393                     "dump count is too large %d\n", txq->txq_dump_count);
 3394                 txq->txq_dump_count = 1;
 3395                 return (EINVAL);
 3396         }
 3397         if (txq->txq_dump_start > 255) {
 3398                 log(LOG_WARNING,
 3399                     "dump start of %d is greater than queue size\n",
 3400                     txq->txq_dump_start);
 3401                 txq->txq_dump_start = 0;
 3402                 return (EINVAL);
 3403         }
 3404 
 3405 retry_sbufops:
 3406         sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
 3407         sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
 3408             txq->txq_dump_start,
 3409             (txq->txq_dump_start + txq->txq_dump_count) & 255);
 3410 
 3411         dump_end = txq->txq_dump_start + txq->txq_dump_count;
 3412         for (i = txq->txq_dump_start; i < dump_end; i++) {
 3413                 txd = &txq->desc[i & (255)];
 3414                 WR = (uint32_t *)txd->flit;
 3415                 wr_hi = ntohl(WR[0]);
 3416                 wr_lo = ntohl(WR[1]);           
 3417                 gen = G_WR_GEN(wr_lo);
 3418                 
 3419                 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
 3420                     wr_hi, wr_lo, gen);
 3421                 for (j = 2; j < 30; j += 4) 
 3422                         sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
 3423                             WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
 3424 
 3425         }
 3426         if (sbuf_overflowed(sb)) {
 3427                 sbuf_delete(sb);
 3428                 multiplier++;
 3429                 goto retry_sbufops;
 3430         }
 3431         sbuf_finish(sb);
 3432         err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
 3433         sbuf_delete(sb);
 3434         return (err);
 3435 }
 3436 
 3437 static int
 3438 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
 3439 {
 3440         adapter_t *sc = arg1;
 3441         struct qset_params *qsp = &sc->params.sge.qset[0]; 
 3442         int coalesce_usecs;     
 3443         struct sge_qset *qs;
 3444         int i, j, err, nqsets = 0;
 3445         struct mtx *lock;
 3446 
 3447         if ((sc->flags & FULL_INIT_DONE) == 0)
 3448                 return (ENXIO);
 3449                 
 3450         coalesce_usecs = qsp->coalesce_usecs;
 3451         err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
 3452 
 3453         if (err != 0) {
 3454                 return (err);
 3455         }
 3456         if (coalesce_usecs == qsp->coalesce_usecs)
 3457                 return (0);
 3458 
 3459         for (i = 0; i < sc->params.nports; i++) 
 3460                 for (j = 0; j < sc->port[i].nqsets; j++)
 3461                         nqsets++;
 3462 
 3463         coalesce_usecs = max(1, coalesce_usecs);
 3464 
 3465         for (i = 0; i < nqsets; i++) {
 3466                 qs = &sc->sge.qs[i];
 3467                 qsp = &sc->params.sge.qset[i];
 3468                 qsp->coalesce_usecs = coalesce_usecs;
 3469                 
 3470                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 3471                             &sc->sge.qs[0].rspq.lock;
 3472 
 3473                 mtx_lock(lock);
 3474                 t3_update_qset_coalesce(qs, qsp);
 3475                 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
 3476                     V_NEWTIMER(qs->rspq.holdoff_tmr));
 3477                 mtx_unlock(lock);
 3478         }
 3479 
 3480         return (0);
 3481 }
 3482 
 3483 
 3484 void
 3485 t3_add_attach_sysctls(adapter_t *sc)
 3486 {
 3487         struct sysctl_ctx_list *ctx;
 3488         struct sysctl_oid_list *children;
 3489 
 3490         ctx = device_get_sysctl_ctx(sc->dev);
 3491         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3492 
 3493         /* random information */
 3494         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3495             "firmware_version",
 3496             CTLFLAG_RD, &sc->fw_version,
 3497             0, "firmware version");
 3498         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3499             "hw_revision",
 3500             CTLFLAG_RD, &sc->params.rev,
 3501             0, "chip model");
 3502         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 3503             "port_types",
 3504             CTLFLAG_RD, &sc->port_types,
 3505             0, "type of ports");
 3506         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3507             "enable_debug",
 3508             CTLFLAG_RW, &cxgb_debug,
 3509             0, "enable verbose debugging output");
 3510         SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce",
 3511             CTLFLAG_RD, &sc->tunq_coalesce,
 3512             "#tunneled packets freed");
 3513         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 3514             "txq_overrun",
 3515             CTLFLAG_RD, &txq_fills,
 3516             0, "#times txq overrun");
 3517 }
 3518 
 3519 
 3520 static const char *rspq_name = "rspq";
 3521 static const char *txq_names[] =
 3522 {
 3523         "txq_eth",
 3524         "txq_ofld",
 3525         "txq_ctrl"      
 3526 };
 3527 
 3528 static int
 3529 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
 3530 {
 3531         struct port_info *p = arg1;
 3532         uint64_t *parg;
 3533 
 3534         if (!p)
 3535                 return (EINVAL);
 3536 
 3537         parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
 3538         PORT_LOCK(p);
 3539         t3_mac_update_stats(&p->mac);
 3540         PORT_UNLOCK(p);
 3541 
 3542         return (sysctl_handle_quad(oidp, parg, 0, req));
 3543 }
 3544 
 3545 void
 3546 t3_add_configured_sysctls(adapter_t *sc)
 3547 {
 3548         struct sysctl_ctx_list *ctx;
 3549         struct sysctl_oid_list *children;
 3550         int i, j;
 3551         
 3552         ctx = device_get_sysctl_ctx(sc->dev);
 3553         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 3554 
 3555         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 3556             "intr_coal",
 3557             CTLTYPE_INT|CTLFLAG_RW, sc,
 3558             0, t3_set_coalesce_usecs,
 3559             "I", "interrupt coalescing timer (us)");
 3560 
 3561         for (i = 0; i < sc->params.nports; i++) {
 3562                 struct port_info *pi = &sc->port[i];
 3563                 struct sysctl_oid *poid;
 3564                 struct sysctl_oid_list *poidlist;
 3565                 struct mac_stats *mstats = &pi->mac.stats;
 3566                 
 3567                 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
 3568                 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 
 3569                     pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
 3570                 poidlist = SYSCTL_CHILDREN(poid);
 3571                 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO, 
 3572                     "nqsets", CTLFLAG_RD, &pi->nqsets,
 3573                     0, "#queue sets");
 3574 
 3575                 for (j = 0; j < pi->nqsets; j++) {
 3576                         struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
 3577                         struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
 3578                                           *ctrlqpoid, *lropoid;
 3579                         struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
 3580                                                *txqpoidlist, *ctrlqpoidlist,
 3581                                                *lropoidlist;
 3582                         struct sge_txq *txq = &qs->txq[TXQ_ETH];
 3583                         
 3584                         snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
 3585                         
 3586                         qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 
 3587                             qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
 3588                         qspoidlist = SYSCTL_CHILDREN(qspoid);
 3589 
 3590                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
 3591                                         CTLFLAG_RD, &qs->fl[0].empty, 0,
 3592                                         "freelist #0 empty");
 3593                         SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
 3594                                         CTLFLAG_RD, &qs->fl[1].empty, 0,
 3595                                         "freelist #1 empty");
 3596 
 3597                         rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3598                             rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
 3599                         rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
 3600 
 3601                         txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3602                             txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
 3603                         txqpoidlist = SYSCTL_CHILDREN(txqpoid);
 3604 
 3605                         ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3606                             txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
 3607                         ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
 3608 
 3609                         lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
 3610                             "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
 3611                         lropoidlist = SYSCTL_CHILDREN(lropoid);
 3612 
 3613                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
 3614                             CTLFLAG_RD, &qs->rspq.size,
 3615                             0, "#entries in response queue");
 3616                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
 3617                             CTLFLAG_RD, &qs->rspq.cidx,
 3618                             0, "consumer index");
 3619                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
 3620                             CTLFLAG_RD, &qs->rspq.credits,
 3621                             0, "#credits");
 3622                         SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
 3623                             CTLFLAG_RD, &qs->rspq.phys_addr,
 3624                             "physical_address_of the queue");
 3625                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
 3626                             CTLFLAG_RW, &qs->rspq.rspq_dump_start,
 3627                             0, "start rspq dump entry");
 3628                         SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
 3629                             CTLFLAG_RW, &qs->rspq.rspq_dump_count,
 3630                             0, "#rspq entries to dump");
 3631                         SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
 3632                             CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
 3633                             0, t3_dump_rspq, "A", "dump of the response queue");
 3634 
 3635 
 3636                         SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped",
 3637                             CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops,
 3638                             0, "#tunneled packets dropped");
 3639                         SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
 3640                             CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
 3641                             0, "#tunneled packets waiting to be sent");
 3642 #if 0                   
 3643                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
 3644                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
 3645                             0, "#tunneled packets queue producer index");
 3646                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
 3647                             CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
 3648                             0, "#tunneled packets queue consumer index");
 3649 #endif                  
 3650                         SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
 3651                             CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
 3652                             0, "#tunneled packets processed by the card");
 3653                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
 3654                             CTLFLAG_RD, &txq->cleaned,
 3655                             0, "#tunneled packets cleaned");
 3656                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
 3657                             CTLFLAG_RD, &txq->in_use,
 3658                             0, "#tunneled packet slots in use");
 3659                         SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
 3660                             CTLFLAG_RD, &txq->txq_frees,
 3661                             "#tunneled packets freed");
 3662                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
 3663                             CTLFLAG_RD, &txq->txq_skipped,
 3664                             0, "#tunneled packet descriptors skipped");
 3665                         SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
 3666                             CTLFLAG_RD, &txq->txq_coalesced,
 3667                             "#tunneled packets coalesced");
 3668                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
 3669                             CTLFLAG_RD, &txq->txq_enqueued,
 3670                             0, "#tunneled packets enqueued to hardware");
 3671                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
 3672                             CTLFLAG_RD, &qs->txq_stopped,
 3673                             0, "tx queues stopped");
 3674                         SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
 3675                             CTLFLAG_RD, &txq->phys_addr,
 3676                             "physical_address_of the queue");
 3677                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
 3678                             CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
 3679                             0, "txq generation");
 3680                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
 3681                             CTLFLAG_RD, &txq->cidx,
 3682                             0, "hardware queue cidx");                  
 3683                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
 3684                             CTLFLAG_RD, &txq->pidx,
 3685                             0, "hardware queue pidx");
 3686                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
 3687                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
 3688                             0, "txq start idx for dump");
 3689                         SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
 3690                             CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
 3691                             0, "txq #entries to dump");                 
 3692                         SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
 3693                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
 3694                             0, t3_dump_txq_eth, "A", "dump of the transmit queue");
 3695 
 3696                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
 3697                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
 3698                             0, "ctrlq start idx for dump");
 3699                         SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
 3700                             CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
 3701                             0, "ctrl #entries to dump");                        
 3702                         SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
 3703                             CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
 3704                             0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
 3705 
 3706 #ifdef LRO_SUPPORTED
 3707                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
 3708                             CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
 3709                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
 3710                             CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
 3711                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
 3712                             CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
 3713                         SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
 3714                             CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
 3715 #endif
 3716                 }
 3717 
 3718                 /* Now add a node for mac stats. */
 3719                 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
 3720                     CTLFLAG_RD, NULL, "MAC statistics");
 3721                 poidlist = SYSCTL_CHILDREN(poid);
 3722 
 3723                 /*
 3724                  * We (ab)use the length argument (arg2) to pass on the offset
 3725                  * of the data that we are interested in.  This is only required
 3726                  * for the quad counters that are updated from the hardware (we
 3727                  * make sure that we return the latest value).
 3728                  * sysctl_handle_macstat first updates *all* the counters from
 3729                  * the hardware, and then returns the latest value of the
 3730                  * requested counter.  Best would be to update only the
 3731                  * requested counter from hardware, but t3_mac_update_stats()
 3732                  * hides all the register details and we don't want to dive into
 3733                  * all that here.
 3734                  */
 3735 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
 3736     (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
 3737     sysctl_handle_macstat, "QU", 0)
 3738                 CXGB_SYSCTL_ADD_QUAD(tx_octets);
 3739                 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
 3740                 CXGB_SYSCTL_ADD_QUAD(tx_frames);
 3741                 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
 3742                 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
 3743                 CXGB_SYSCTL_ADD_QUAD(tx_pause);
 3744                 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
 3745                 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
 3746                 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
 3747                 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
 3748                 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
 3749                 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
 3750                 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
 3751                 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
 3752                 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
 3753                 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
 3754                 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
 3755                 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
 3756                 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
 3757                 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
 3758                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
 3759                 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
 3760                 CXGB_SYSCTL_ADD_QUAD(rx_octets);
 3761                 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
 3762                 CXGB_SYSCTL_ADD_QUAD(rx_frames);
 3763                 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
 3764                 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
 3765                 CXGB_SYSCTL_ADD_QUAD(rx_pause);
 3766                 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
 3767                 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
 3768                 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
 3769                 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
 3770                 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
 3771                 CXGB_SYSCTL_ADD_QUAD(rx_runt);
 3772                 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
 3773                 CXGB_SYSCTL_ADD_QUAD(rx_short);
 3774                 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
 3775                 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
 3776                 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
 3777                 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
 3778                 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
 3779                 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
 3780                 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
 3781                 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
 3782                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
 3783                 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
 3784 #undef CXGB_SYSCTL_ADD_QUAD
 3785 
 3786 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
 3787     CTLFLAG_RD, &mstats->a, 0)
 3788                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
 3789                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
 3790                 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
 3791                 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
 3792                 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
 3793                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
 3794                 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
 3795                 CXGB_SYSCTL_ADD_ULONG(num_toggled);
 3796                 CXGB_SYSCTL_ADD_ULONG(num_resets);
 3797                 CXGB_SYSCTL_ADD_ULONG(link_faults);
 3798 #undef CXGB_SYSCTL_ADD_ULONG
 3799         }
 3800 }
 3801         
 3802 /**
 3803  *      t3_get_desc - dump an SGE descriptor for debugging purposes
 3804  *      @qs: the queue set
 3805  *      @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
 3806  *      @idx: the descriptor index in the queue
 3807  *      @data: where to dump the descriptor contents
 3808  *
 3809  *      Dumps the contents of a HW descriptor of an SGE queue.  Returns the
 3810  *      size of the descriptor.
 3811  */
 3812 int
 3813 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
 3814                 unsigned char *data)
 3815 {
 3816         if (qnum >= 6)
 3817                 return (EINVAL);
 3818 
 3819         if (qnum < 3) {
 3820                 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
 3821                         return -EINVAL;
 3822                 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
 3823                 return sizeof(struct tx_desc);
 3824         }
 3825 
 3826         if (qnum == 3) {
 3827                 if (!qs->rspq.desc || idx >= qs->rspq.size)
 3828                         return (EINVAL);
 3829                 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
 3830                 return sizeof(struct rsp_desc);
 3831         }
 3832 
 3833         qnum -= 4;
 3834         if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
 3835                 return (EINVAL);
 3836         memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
 3837         return sizeof(struct rx_desc);
 3838 }

Cache object: ce46fc52fb7e5fb0709d70cdab9856d6


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.