The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/cxgb/cxgb_sge.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /**************************************************************************
    2 
    3 Copyright (c) 2007, Chelsio Inc.
    4 All rights reserved.
    5 
    6 Redistribution and use in source and binary forms, with or without
    7 modification, are permitted provided that the following conditions are met:
    8 
    9  1. Redistributions of source code must retain the above copyright notice,
   10     this list of conditions and the following disclaimer.
   11 
   12  2. Neither the name of the Chelsio Corporation nor the names of its
   13     contributors may be used to endorse or promote products derived from
   14     this software without specific prior written permission.
   15  
   16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   26 POSSIBILITY OF SUCH DAMAGE.
   27 
   28 ***************************************************************************/
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD: releng/6.4/sys/dev/cxgb/cxgb_sge.c 174319 2007-12-05 22:05:49Z kmacy $");
   32 
   33 #include <sys/param.h>
   34 #include <sys/systm.h>
   35 #include <sys/kernel.h>
   36 #include <sys/module.h>
   37 #include <sys/bus.h>
   38 #include <sys/conf.h>
   39 #include <machine/bus.h>
   40 #include <machine/resource.h>
   41 #include <sys/bus_dma.h>
   42 #include <sys/rman.h>
   43 #include <sys/queue.h>
   44 #include <sys/sysctl.h>
   45 #include <sys/taskqueue.h>
   46 
   47 #include <sys/proc.h>
   48 #include <sys/sched.h>
   49 #include <sys/smp.h>
   50 #include <sys/systm.h>
   51 
   52 #include <netinet/in_systm.h>
   53 #include <netinet/in.h>
   54 #include <netinet/ip.h>
   55 #include <netinet/tcp.h>
   56 
   57 #include <dev/pci/pcireg.h>
   58 #include <dev/pci/pcivar.h>
   59 
   60 #ifdef CONFIG_DEFINED
   61 #include <cxgb_include.h>
   62 #else
   63 #include <dev/cxgb/cxgb_include.h>
   64 #endif
   65 
   66 uint32_t collapse_free = 0;
   67 uint32_t mb_free_vec_free = 0;
   68 int      txq_fills = 0;
   69 int      collapse_mbufs = 0;
   70 static int bogus_imm = 0;
   71 #ifndef DISABLE_MBUF_IOVEC
   72 static int recycle_enable = 1;
   73 #endif
   74 
   75 #define USE_GTS 0
   76 
   77 #define SGE_RX_SM_BUF_SIZE      1536
   78 #define SGE_RX_DROP_THRES       16
   79 #define SGE_RX_COPY_THRES       128
   80 
   81 /*
   82  * Period of the Tx buffer reclaim timer.  This timer does not need to run
   83  * frequently as Tx buffers are usually reclaimed by new Tx packets.
   84  */
   85 #define TX_RECLAIM_PERIOD       (hz >> 1)
   86 
   87 /* 
   88  * work request size in bytes
   89  */
   90 #define WR_LEN (WR_FLITS * 8)
   91 
   92 /* 
   93  * Values for sge_txq.flags
   94  */
   95 enum {
   96         TXQ_RUNNING     = 1 << 0,  /* fetch engine is running */
   97         TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
   98 };
   99 
  100 struct tx_desc {
  101         uint64_t        flit[TX_DESC_FLITS];
  102 } __packed;
  103 
  104 struct rx_desc {
  105         uint32_t        addr_lo;
  106         uint32_t        len_gen;
  107         uint32_t        gen2;
  108         uint32_t        addr_hi;
  109 } __packed;;
  110 
  111 struct rsp_desc {               /* response queue descriptor */
  112         struct rss_header       rss_hdr;
  113         uint32_t                flags;
  114         uint32_t                len_cq;
  115         uint8_t                 imm_data[47];
  116         uint8_t                 intr_gen;
  117 } __packed;
  118 
  119 #define RX_SW_DESC_MAP_CREATED  (1 << 0)
  120 #define TX_SW_DESC_MAP_CREATED  (1 << 1)
  121 #define RX_SW_DESC_INUSE        (1 << 3)
  122 #define TX_SW_DESC_MAPPED       (1 << 4)
  123 
  124 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
  125 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
  126 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
  127 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
  128 
  129 struct tx_sw_desc {                /* SW state per Tx descriptor */
  130         struct mbuf     *m;        
  131         bus_dmamap_t    map;
  132         int             flags;
  133 };
  134 
  135 struct rx_sw_desc {                /* SW state per Rx descriptor */
  136         void            *cl;
  137         bus_dmamap_t    map;
  138         int             flags;
  139 };
  140 
  141 struct txq_state {
  142         unsigned int compl;
  143         unsigned int gen;
  144         unsigned int pidx;
  145 };
  146 
  147 struct refill_fl_cb_arg {
  148         int               error;
  149         bus_dma_segment_t seg;
  150         int               nseg;
  151 };
  152 
  153 /*
  154  * Maps a number of flits to the number of Tx descriptors that can hold them.
  155  * The formula is
  156  *
  157  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
  158  *
  159  * HW allows up to 4 descriptors to be combined into a WR.
  160  */
  161 static uint8_t flit_desc_map[] = {
  162         0,
  163 #if SGE_NUM_GENBITS == 1
  164         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  165         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  166         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  167         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
  168 #elif SGE_NUM_GENBITS == 2
  169         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  170         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  171         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  172         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  173 #else
  174 # error "SGE_NUM_GENBITS must be 1 or 2"
  175 #endif
  176 };
  177 
  178 
  179 static int lro_default = 0;
  180 int cxgb_debug = 0;
  181 
  182 static void t3_free_qset(adapter_t *sc, struct sge_qset *q);
  183 static void sge_timer_cb(void *arg);
  184 static void sge_timer_reclaim(void *arg, int ncount);
  185 static void sge_txq_reclaim_handler(void *arg, int ncount);
  186 static int free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec);
  187 
  188 /**
  189  *      reclaim_completed_tx - reclaims completed Tx descriptors
  190  *      @adapter: the adapter
  191  *      @q: the Tx queue to reclaim completed descriptors from
  192  *
  193  *      Reclaims Tx descriptors that the SGE has indicated it has processed,
  194  *      and frees the associated buffers if possible.  Called with the Tx
  195  *      queue's lock held.
  196  */
  197 static __inline int
  198 reclaim_completed_tx(struct sge_txq *q, int nbufs, struct mbuf **mvec)
  199 {
  200         int reclaimed, reclaim = desc_reclaimable(q);
  201         int n = 0;
  202 
  203         mtx_assert(&q->lock, MA_OWNED);
  204         if (reclaim > 0) {
  205                 n = free_tx_desc(q, min(reclaim, nbufs), mvec);
  206                 reclaimed = min(reclaim, nbufs);
  207                 q->cleaned += reclaimed;
  208                 q->in_use -= reclaimed;
  209         } 
  210         return (n);
  211 }
  212 
  213 /**
  214  *      should_restart_tx - are there enough resources to restart a Tx queue?
  215  *      @q: the Tx queue
  216  *
  217  *      Checks if there are enough descriptors to restart a suspended Tx queue.
  218  */
  219 static __inline int
  220 should_restart_tx(const struct sge_txq *q)
  221 {
  222         unsigned int r = q->processed - q->cleaned;
  223 
  224         return q->in_use - r < (q->size >> 1);
  225 }
  226 
  227 /**
  228  *      t3_sge_init - initialize SGE
  229  *      @adap: the adapter
  230  *      @p: the SGE parameters
  231  *
  232  *      Performs SGE initialization needed every time after a chip reset.
  233  *      We do not initialize any of the queue sets here, instead the driver
  234  *      top-level must request those individually.  We also do not enable DMA
  235  *      here, that should be done after the queues have been set up.
  236  */
  237 void
  238 t3_sge_init(adapter_t *adap, struct sge_params *p)
  239 {
  240         u_int ctrl, ups;
  241 
  242         ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
  243 
  244         ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
  245                F_CQCRDTCTRL |
  246                V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
  247                V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
  248 #if SGE_NUM_GENBITS == 1
  249         ctrl |= F_EGRGENCTRL;
  250 #endif
  251         if (adap->params.rev > 0) {
  252                 if (!(adap->flags & (USING_MSIX | USING_MSI)))
  253                         ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
  254                 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL;
  255         }
  256         t3_write_reg(adap, A_SG_CONTROL, ctrl);
  257         t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
  258                      V_LORCQDRBTHRSH(512));
  259         t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
  260         t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
  261                      V_TIMEOUT(200 * core_ticks_per_usec(adap)));
  262         t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000);
  263         t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
  264         t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
  265         t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
  266         t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
  267         t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
  268 }
  269 
  270 
  271 /**
  272  *      sgl_len - calculates the size of an SGL of the given capacity
  273  *      @n: the number of SGL entries
  274  *
  275  *      Calculates the number of flits needed for a scatter/gather list that
  276  *      can hold the given number of entries.
  277  */
  278 static __inline unsigned int
  279 sgl_len(unsigned int n)
  280 {
  281         return ((3 * n) / 2 + (n & 1));
  282 }
  283 
  284 /**
  285  *      get_imm_packet - return the next ingress packet buffer from a response
  286  *      @resp: the response descriptor containing the packet data
  287  *
  288  *      Return a packet containing the immediate data of the given response.
  289  */
  290 #ifdef DISABLE_MBUF_IOVEC
  291 static __inline int
  292 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct t3_mbuf_hdr *mh)
  293 {
  294         struct mbuf *m;
  295         int len;
  296         uint32_t flags = ntohl(resp->flags);            
  297         uint8_t sopeop = G_RSPD_SOP_EOP(flags);
  298 
  299         /*
  300          * would be a firmware bug
  301          */
  302         if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP)
  303                 return (0);
  304         
  305         m = m_gethdr(M_NOWAIT, MT_DATA);
  306         len = G_RSPD_LEN(ntohl(resp->len_cq));
  307         
  308         if (m) {
  309                 MH_ALIGN(m, IMMED_PKT_SIZE);
  310                 memcpy(m->m_data, resp->imm_data, IMMED_PKT_SIZE);
  311                 m->m_len = len;
  312                 
  313                 switch (sopeop) {
  314                 case RSPQ_SOP_EOP:
  315                         mh->mh_head = mh->mh_tail = m;
  316                         m->m_pkthdr.len = len;
  317                         m->m_flags |= M_PKTHDR;
  318                         break;
  319                 case RSPQ_EOP:  
  320                         m->m_flags &= ~M_PKTHDR;
  321                         mh->mh_head->m_pkthdr.len += len;
  322                         mh->mh_tail->m_next = m;
  323                         mh->mh_tail = m;
  324                         break;
  325                 }
  326         }
  327         return (m != NULL);
  328 }
  329 
  330 #else
  331 static int
  332 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl, uint32_t flags)
  333 {
  334         int len, error;
  335         uint8_t sopeop = G_RSPD_SOP_EOP(flags);
  336         
  337         /*
  338          * would be a firmware bug
  339          */
  340         len = G_RSPD_LEN(ntohl(resp->len_cq));  
  341         if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) {
  342                 if (cxgb_debug)
  343                         device_printf(sc->dev, "unexpected value sopeop=%d flags=0x%x len=%din get_imm_packet\n", sopeop, flags, len);
  344                 bogus_imm++;
  345                 return (EINVAL);
  346         }
  347         error = 0;
  348         switch (sopeop) {
  349         case RSPQ_SOP_EOP:
  350                 m->m_len = m->m_pkthdr.len = len; 
  351                 memcpy(mtod(m, uint8_t *), resp->imm_data, len); 
  352                 break;
  353         case RSPQ_EOP:
  354                 memcpy(cl, resp->imm_data, len); 
  355                 m_iovappend(m, cl, MSIZE, len, 0); 
  356                 break;
  357         default:
  358                 bogus_imm++;
  359                 error = EINVAL;
  360         }
  361 
  362         return (error);
  363 }
  364 #endif
  365 
  366 static __inline u_int
  367 flits_to_desc(u_int n)
  368 {
  369         return (flit_desc_map[n]);
  370 }
  371 
  372 void
  373 t3_sge_err_intr_handler(adapter_t *adapter)
  374 {
  375         unsigned int v, status;
  376 
  377         
  378         status = t3_read_reg(adapter, A_SG_INT_CAUSE);
  379         
  380         if (status & F_RSPQCREDITOVERFOW)
  381                 CH_ALERT(adapter, "SGE response queue credit overflow\n");
  382 
  383         if (status & F_RSPQDISABLED) {
  384                 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
  385 
  386                 CH_ALERT(adapter,
  387                          "packet delivered to disabled response queue (0x%x)\n",
  388                          (v >> S_RSPQ0DISABLED) & 0xff);
  389         }
  390 
  391         t3_write_reg(adapter, A_SG_INT_CAUSE, status);
  392         if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
  393                 t3_fatal_err(adapter);
  394 }
  395 
  396 void
  397 t3_sge_prep(adapter_t *adap, struct sge_params *p)
  398 {
  399         int i;
  400 
  401         /* XXX Does ETHER_ALIGN need to be accounted for here? */
  402         p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data);
  403 
  404         for (i = 0; i < SGE_QSETS; ++i) {
  405                 struct qset_params *q = p->qset + i;
  406 
  407                 q->polling = adap->params.rev > 0;
  408 
  409                 if (adap->params.nports > 2)
  410                         q->coalesce_nsecs = 50000;
  411                 else
  412                         q->coalesce_nsecs = 5000;
  413 
  414                 q->rspq_size = RSPQ_Q_SIZE;
  415                 q->fl_size = FL_Q_SIZE;
  416                 q->jumbo_size = JUMBO_Q_SIZE;
  417                 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
  418                 q->txq_size[TXQ_OFLD] = 1024;
  419                 q->txq_size[TXQ_CTRL] = 256;
  420                 q->cong_thres = 0;
  421         }
  422 }
  423 
  424 int
  425 t3_sge_alloc(adapter_t *sc)
  426 {
  427 
  428         /* The parent tag. */
  429         if (bus_dma_tag_create( NULL,                   /* parent */
  430                                 1, 0,                   /* algnmnt, boundary */
  431                                 BUS_SPACE_MAXADDR,      /* lowaddr */
  432                                 BUS_SPACE_MAXADDR,      /* highaddr */
  433                                 NULL, NULL,             /* filter, filterarg */
  434                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
  435                                 BUS_SPACE_UNRESTRICTED, /* nsegments */
  436                                 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
  437                                 0,                      /* flags */
  438                                 NULL, NULL,             /* lock, lockarg */
  439                                 &sc->parent_dmat)) {
  440                 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
  441                 return (ENOMEM);
  442         }
  443 
  444         /*
  445          * DMA tag for normal sized RX frames
  446          */
  447         if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
  448                 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
  449                 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
  450                 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
  451                 return (ENOMEM);
  452         }
  453 
  454         /* 
  455          * DMA tag for jumbo sized RX frames.
  456          */
  457         if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR,
  458                 BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE,
  459                 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
  460                 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
  461                 return (ENOMEM);
  462         }
  463 
  464         /* 
  465          * DMA tag for TX frames.
  466          */
  467         if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
  468                 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  469                 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  470                 NULL, NULL, &sc->tx_dmat)) {
  471                 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
  472                 return (ENOMEM);
  473         }
  474 
  475         return (0);
  476 }
  477 
  478 int
  479 t3_sge_free(struct adapter * sc)
  480 {
  481 
  482         if (sc->tx_dmat != NULL)
  483                 bus_dma_tag_destroy(sc->tx_dmat);
  484 
  485         if (sc->rx_jumbo_dmat != NULL)
  486                 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
  487 
  488         if (sc->rx_dmat != NULL)
  489                 bus_dma_tag_destroy(sc->rx_dmat);
  490 
  491         if (sc->parent_dmat != NULL)
  492                 bus_dma_tag_destroy(sc->parent_dmat);
  493 
  494         return (0);
  495 }
  496 
  497 void
  498 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
  499 {
  500 
  501         qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U);
  502         qs->rspq.polling = 0 /* p->polling */;
  503 }
  504 
  505 static void
  506 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
  507 {
  508         struct refill_fl_cb_arg *cb_arg = arg;
  509         
  510         cb_arg->error = error;
  511         cb_arg->seg = segs[0];
  512         cb_arg->nseg = nseg;
  513 
  514 }
  515 
  516 /**
  517  *      refill_fl - refill an SGE free-buffer list
  518  *      @sc: the controller softc
  519  *      @q: the free-list to refill
  520  *      @n: the number of new buffers to allocate
  521  *
  522  *      (Re)populate an SGE free-buffer list with up to @n new packet buffers.
  523  *      The caller must assure that @n does not exceed the queue's capacity.
  524  */
  525 static void
  526 refill_fl(adapter_t *sc, struct sge_fl *q, int n, int blocking)
  527 {
  528         struct rx_sw_desc *sd = &q->sdesc[q->pidx];
  529         struct rx_desc *d = &q->desc[q->pidx];
  530         struct refill_fl_cb_arg cb_arg;
  531         void *cl;
  532         int err;
  533 #ifdef notyet   
  534         int flags = blocking ? M_TRYWAIT : M_DONTWAIT;
  535 #else
  536         int flags = M_DONTWAIT;
  537 #endif
  538         
  539         cb_arg.error = 0;
  540         while (n--) {
  541                 /*
  542                  * We only allocate a cluster, mbuf allocation happens after rx
  543                  */
  544                 if ((cl = m_cljget(NULL, flags, q->buf_size)) == NULL) {
  545                         log(LOG_WARNING, "Failed to allocate cluster\n");
  546                         goto done;
  547                 }
  548                 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
  549                         if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
  550                                 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
  551                                 uma_zfree(q->zone, cl);
  552                                 goto done;
  553                         }
  554                         sd->flags |= RX_SW_DESC_MAP_CREATED;
  555                 }
  556                 err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size,
  557                     refill_fl_cb, &cb_arg, 0);
  558                 
  559                 if (err != 0 || cb_arg.error) {
  560                         log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error);
  561                         /*
  562                          * XXX free cluster
  563                          */
  564                         return;
  565                 }
  566                 
  567                 sd->flags |= RX_SW_DESC_INUSE;
  568                 sd->cl = cl;
  569                 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
  570                 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
  571                 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
  572                 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
  573 
  574                 d++;
  575                 sd++;
  576 
  577                 if (++q->pidx == q->size) {
  578                         q->pidx = 0;
  579                         q->gen ^= 1;
  580                         sd = q->sdesc;
  581                         d = q->desc;
  582                 }
  583                 q->credits++;
  584         }
  585 
  586 done:
  587         t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  588 }
  589 
  590 
  591 /**
  592  *      free_rx_bufs - free the Rx buffers on an SGE free list
  593  *      @sc: the controle softc
  594  *      @q: the SGE free list to clean up
  595  *
  596  *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
  597  *      this queue should be stopped before calling this function.
  598  */
  599 static void
  600 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
  601 {
  602         u_int cidx = q->cidx;
  603 
  604         while (q->credits--) {
  605                 struct rx_sw_desc *d = &q->sdesc[cidx];
  606 
  607                 if (d->flags & RX_SW_DESC_INUSE) {
  608                         bus_dmamap_unload(q->entry_tag, d->map);
  609                         bus_dmamap_destroy(q->entry_tag, d->map);
  610                         uma_zfree(q->zone, d->cl);
  611                 }
  612                 d->cl = NULL;
  613                 if (++cidx == q->size)
  614                         cidx = 0;
  615         }
  616 }
  617 
  618 static __inline void
  619 __refill_fl(adapter_t *adap, struct sge_fl *fl)
  620 {
  621         refill_fl(adap, fl, min(16U, fl->size - fl->credits), 0);
  622 }
  623 
  624 #ifndef DISABLE_MBUF_IOVEC
  625 /**
  626  *      recycle_rx_buf - recycle a receive buffer
  627  *      @adapter: the adapter
  628  *      @q: the SGE free list
  629  *      @idx: index of buffer to recycle
  630  *
  631  *      Recycles the specified buffer on the given free list by adding it at
  632  *      the next available slot on the list.
  633  */
  634 static void
  635 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
  636 {
  637         struct rx_desc *from = &q->desc[idx];
  638         struct rx_desc *to   = &q->desc[q->pidx];
  639 
  640         q->sdesc[q->pidx] = q->sdesc[idx];
  641         to->addr_lo = from->addr_lo;        // already big endian
  642         to->addr_hi = from->addr_hi;        // likewise
  643         wmb();
  644         to->len_gen = htobe32(V_FLD_GEN1(q->gen));
  645         to->gen2 = htobe32(V_FLD_GEN2(q->gen));
  646         q->credits++;
  647 
  648         if (++q->pidx == q->size) {
  649                 q->pidx = 0;
  650                 q->gen ^= 1;
  651         }
  652         t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
  653 }
  654 #endif
  655 
  656 static void
  657 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
  658 {
  659         uint32_t *addr;
  660 
  661         addr = arg;
  662         *addr = segs[0].ds_addr;
  663 }
  664 
  665 static int
  666 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
  667     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
  668     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
  669 {
  670         size_t len = nelem * elem_size;
  671         void *s = NULL;
  672         void *p = NULL;
  673         int err;
  674 
  675         if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
  676                                       BUS_SPACE_MAXADDR_32BIT,
  677                                       BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
  678                                       len, 0, NULL, NULL, tag)) != 0) {
  679                 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
  680                 return (ENOMEM);
  681         }
  682 
  683         if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
  684                                     map)) != 0) {
  685                 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
  686                 return (ENOMEM);
  687         }
  688 
  689         bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
  690         bzero(p, len);
  691         *(void **)desc = p;
  692 
  693         if (sw_size) {
  694                 len = nelem * sw_size;
  695                 s = malloc(len, M_DEVBUF, M_WAITOK);
  696                 bzero(s, len);
  697                 *(void **)sdesc = s;
  698         }
  699         if (parent_entry_tag == NULL)
  700                 return (0);
  701             
  702         if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
  703                                       BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
  704                                       NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
  705                                       TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
  706                                       NULL, NULL, entry_tag)) != 0) {
  707                 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
  708                 return (ENOMEM);
  709         }
  710         return (0);
  711 }
  712 
  713 static void
  714 sge_slow_intr_handler(void *arg, int ncount)
  715 {
  716         adapter_t *sc = arg;
  717 
  718         t3_slow_intr_handler(sc);
  719 }
  720 
  721 /**
  722  *      sge_timer_cb - perform periodic maintenance of an SGE qset
  723  *      @data: the SGE queue set to maintain
  724  *
  725  *      Runs periodically from a timer to perform maintenance of an SGE queue
  726  *      set.  It performs two tasks:
  727  *
  728  *      a) Cleans up any completed Tx descriptors that may still be pending.
  729  *      Normal descriptor cleanup happens when new packets are added to a Tx
  730  *      queue so this timer is relatively infrequent and does any cleanup only
  731  *      if the Tx queue has not seen any new packets in a while.  We make a
  732  *      best effort attempt to reclaim descriptors, in that we don't wait
  733  *      around if we cannot get a queue's lock (which most likely is because
  734  *      someone else is queueing new packets and so will also handle the clean
  735  *      up).  Since control queues use immediate data exclusively we don't
  736  *      bother cleaning them up here.
  737  *
  738  *      b) Replenishes Rx queues that have run out due to memory shortage.
  739  *      Normally new Rx buffers are added when existing ones are consumed but
  740  *      when out of memory a queue can become empty.  We try to add only a few
  741  *      buffers here, the queue will be replenished fully as these new buffers
  742  *      are used up if memory shortage has subsided.
  743  *      
  744  *      c) Return coalesced response queue credits in case a response queue is
  745  *      starved.
  746  *
  747  *      d) Ring doorbells for T304 tunnel queues since we have seen doorbell 
  748  *      fifo overflows and the FW doesn't implement any recovery scheme yet.
  749  */
  750 static void
  751 sge_timer_cb(void *arg)
  752 {
  753         adapter_t *sc = arg;
  754         struct port_info *p;
  755         struct sge_qset *qs;
  756         struct sge_txq  *txq;
  757         int i, j;
  758         int reclaim_eth, reclaim_ofl, refill_rx;
  759         
  760         for (i = 0; i < sc->params.nports; i++) 
  761                 for (j = 0; j < sc->port[i].nqsets; j++) {
  762                         qs = &sc->sge.qs[i + j];
  763                         txq = &qs->txq[0];
  764                         reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned;
  765                         reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
  766                         refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 
  767                             (qs->fl[1].credits < qs->fl[1].size));
  768                         if (reclaim_eth || reclaim_ofl || refill_rx) {
  769                                 p = &sc->port[i];
  770                                 taskqueue_enqueue(p->tq, &p->timer_reclaim_task);
  771                                 break;
  772                         }
  773                 }
  774         if (sc->params.nports > 2) {
  775                 int i;
  776 
  777                 for_each_port(sc, i) {
  778                         struct port_info *pi = &sc->port[i];
  779 
  780                         t3_write_reg(sc, A_SG_KDOORBELL, 
  781                                      F_SELEGRCNTX | 
  782                                      (FW_TUNNEL_SGEEC_START + pi->first_qset));
  783                 }
  784         }       
  785         if (sc->open_device_map != 0) 
  786                 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
  787 }
  788 
  789 /*
  790  * This is meant to be a catch-all function to keep sge state private
  791  * to sge.c
  792  *
  793  */
  794 int
  795 t3_sge_init_adapter(adapter_t *sc)
  796 {
  797         callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
  798         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
  799         TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
  800         return (0);
  801 }
  802 
  803 int
  804 t3_sge_init_port(struct port_info *p)
  805 {
  806         TASK_INIT(&p->timer_reclaim_task, 0, sge_timer_reclaim, p);
  807         return (0);
  808 }
  809 
  810 void
  811 t3_sge_deinit_sw(adapter_t *sc)
  812 {
  813         int i;
  814         
  815         callout_drain(&sc->sge_timer_ch);
  816         if (sc->tq) 
  817                 taskqueue_drain(sc->tq, &sc->slow_intr_task);
  818         for (i = 0; i < sc->params.nports; i++) 
  819                 if (sc->port[i].tq != NULL)
  820                         taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task);
  821 }
  822 
  823 /**
  824  *      refill_rspq - replenish an SGE response queue
  825  *      @adapter: the adapter
  826  *      @q: the response queue to replenish
  827  *      @credits: how many new responses to make available
  828  *
  829  *      Replenishes a response queue by making the supplied number of responses
  830  *      available to HW.
  831  */
  832 static __inline void
  833 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
  834 {
  835 
  836         /* mbufs are allocated on demand when a rspq entry is processed. */
  837         t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
  838                      V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
  839 }
  840 
  841 static __inline void
  842 sge_txq_reclaim_(struct sge_txq *txq)
  843 {
  844         int reclaimable, i, n;
  845         struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
  846         struct port_info *p;
  847 
  848         p = txq->port;
  849 reclaim_more:
  850         n = 0;
  851         reclaimable = desc_reclaimable(txq);
  852         if (reclaimable > 0 && mtx_trylock(&txq->lock)) {
  853                 n = reclaim_completed_tx(txq, TX_CLEAN_MAX_DESC, m_vec);
  854                 mtx_unlock(&txq->lock);
  855         }
  856         if (n == 0)
  857                 return;
  858         
  859         for (i = 0; i < n; i++) {
  860                 m_freem_vec(m_vec[i]);
  861         }
  862         if (p && p->ifp->if_drv_flags & IFF_DRV_OACTIVE &&
  863             txq->size - txq->in_use >= TX_START_MAX_DESC) {
  864                 txq_fills++;
  865                 p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
  866                 taskqueue_enqueue(p->tq, &p->start_task);
  867         }
  868 
  869         if (n)
  870                 goto reclaim_more;
  871 }
  872 
  873 static void
  874 sge_txq_reclaim_handler(void *arg, int ncount)
  875 {
  876         struct sge_txq *q = arg;
  877 
  878         sge_txq_reclaim_(q);
  879 }
  880 
  881 static void
  882 sge_timer_reclaim(void *arg, int ncount)
  883 {
  884         struct port_info *p = arg;
  885         int i, nqsets = p->nqsets;
  886         adapter_t *sc = p->adapter;
  887         struct sge_qset *qs;
  888         struct sge_txq *txq;
  889         struct mtx *lock;
  890 
  891         for (i = 0; i < nqsets; i++) {
  892                 qs = &sc->sge.qs[i];
  893                 txq = &qs->txq[TXQ_ETH];
  894                 sge_txq_reclaim_(txq);
  895 
  896                 txq = &qs->txq[TXQ_OFLD];
  897                 sge_txq_reclaim_(txq);
  898                 
  899                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
  900                             &sc->sge.qs[0].rspq.lock;
  901                 
  902         retry:
  903                 if (mtx_trylock(lock)) {
  904                         /* XXX currently assume that we are *NOT* polling */
  905                         uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
  906 
  907                         if (qs->fl[0].credits < qs->fl[0].size - 16)
  908                                 __refill_fl(sc, &qs->fl[0]);
  909                         if (qs->fl[1].credits < qs->fl[1].size - 16)
  910                                 __refill_fl(sc, &qs->fl[1]);
  911                         
  912                         if (status & (1 << qs->rspq.cntxt_id)) {
  913                                 if (qs->rspq.credits) {
  914                                         refill_rspq(sc, &qs->rspq, 1);
  915                                         qs->rspq.credits--;
  916                                         t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 
  917                                             1 << qs->rspq.cntxt_id);
  918                                 }
  919                         }
  920                         mtx_unlock(lock);
  921                 }
  922                 if ((qs->fl[0].credits < qs->fl[0].size - 16) ||
  923                     (qs->fl[1].credits < qs->fl[1].size - 16))
  924                         goto retry;
  925         }
  926 }
  927 
  928 /**
  929  *      init_qset_cntxt - initialize an SGE queue set context info
  930  *      @qs: the queue set
  931  *      @id: the queue set id
  932  *
  933  *      Initializes the TIDs and context ids for the queues of a queue set.
  934  */
  935 static void
  936 init_qset_cntxt(struct sge_qset *qs, u_int id)
  937 {
  938 
  939         qs->rspq.cntxt_id = id;
  940         qs->fl[0].cntxt_id = 2 * id;
  941         qs->fl[1].cntxt_id = 2 * id + 1;
  942         qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
  943         qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
  944         qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
  945         qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
  946         qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
  947 }
  948 
  949 
  950 static void
  951 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
  952 {
  953         txq->in_use += ndesc;
  954         /*
  955          * XXX we don't handle stopping of queue
  956          * presumably start handles this when we bump against the end
  957          */
  958         txqs->gen = txq->gen;
  959         txq->unacked += ndesc;
  960         txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3);
  961         txq->unacked &= 7;
  962         txqs->pidx = txq->pidx;
  963         txq->pidx += ndesc;
  964         
  965         if (txq->pidx >= txq->size) {
  966                 txq->pidx -= txq->size;
  967                 txq->gen ^= 1;
  968         }
  969 
  970 }
  971 
  972 /**
  973  *      calc_tx_descs - calculate the number of Tx descriptors for a packet
  974  *      @m: the packet mbufs
  975  *      @nsegs: the number of segments 
  976  *
  977  *      Returns the number of Tx descriptors needed for the given Ethernet
  978  *      packet.  Ethernet packets require addition of WR and CPL headers.
  979  */
  980 static __inline unsigned int
  981 calc_tx_descs(const struct mbuf *m, int nsegs)
  982 {
  983         unsigned int flits;
  984 
  985         if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
  986                 return 1;
  987 
  988         flits = sgl_len(nsegs) + 2;
  989 #ifdef TSO_SUPPORTED
  990         if  (m->m_pkthdr.csum_flags & (CSUM_TSO))
  991                 flits++;
  992 #endif  
  993         return flits_to_desc(flits);
  994 }
  995 
  996 static unsigned int
  997 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
  998     struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs)
  999 {
 1000         struct mbuf *m0;
 1001         int err, pktlen;
 1002         
 1003         m0 = *m;
 1004         pktlen = m0->m_pkthdr.len;
 1005 
 1006         err = bus_dmamap_load_mvec_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0);
 1007 #ifdef DEBUG            
 1008         if (err) {
 1009                 int n = 0;
 1010                 struct mbuf *mtmp = m0;
 1011                 while(mtmp) {
 1012                         n++;
 1013                         mtmp = mtmp->m_next;
 1014                 }
 1015                 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n",
 1016                     err, m0->m_pkthdr.len, n);
 1017         }
 1018 #endif
 1019         if (err == EFBIG) {
 1020                 /* Too many segments, try to defrag */
 1021                 m0 = m_defrag(m0, M_DONTWAIT);
 1022                 if (m0 == NULL) {
 1023                         m_freem(*m);
 1024                         *m = NULL;
 1025                         return (ENOBUFS);
 1026                 }
 1027                 *m = m0;
 1028                 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0);
 1029         }
 1030 
 1031         if (err == ENOMEM) {
 1032                 return (err);
 1033         }
 1034 
 1035         if (err) {
 1036                 if (cxgb_debug)
 1037                         printf("map failure err=%d pktlen=%d\n", err, pktlen);
 1038                 m_freem_vec(m0);
 1039                 *m = NULL;
 1040                 return (err);
 1041         }
 1042 
 1043         bus_dmamap_sync(txq->entry_tag, stx->map, BUS_DMASYNC_PREWRITE);
 1044         stx->flags |= TX_SW_DESC_MAPPED;
 1045 
 1046         return (0);
 1047 }
 1048 
 1049 /**
 1050  *      make_sgl - populate a scatter/gather list for a packet
 1051  *      @sgp: the SGL to populate
 1052  *      @segs: the packet dma segments
 1053  *      @nsegs: the number of segments
 1054  *
 1055  *      Generates a scatter/gather list for the buffers that make up a packet
 1056  *      and returns the SGL size in 8-byte words.  The caller must size the SGL
 1057  *      appropriately.
 1058  */
 1059 static __inline void
 1060 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
 1061 {
 1062         int i, idx;
 1063         
 1064         for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) {
 1065                 if (i && idx == 0) 
 1066                         ++sgp;
 1067 
 1068                 sgp->len[idx] = htobe32(segs[i].ds_len);
 1069                 sgp->addr[idx] = htobe64(segs[i].ds_addr);
 1070         }
 1071         
 1072         if (idx)
 1073                 sgp->len[idx] = 0;
 1074 }
 1075         
 1076 /**
 1077  *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
 1078  *      @adap: the adapter
 1079  *      @q: the Tx queue
 1080  *
 1081  *      Ring the doorbel if a Tx queue is asleep.  There is a natural race,
 1082  *      where the HW is going to sleep just after we checked, however,
 1083  *      then the interrupt handler will detect the outstanding TX packet
 1084  *      and ring the doorbell for us.
 1085  *
 1086  *      When GTS is disabled we unconditionally ring the doorbell.
 1087  */
 1088 static __inline void
 1089 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
 1090 {
 1091 #if USE_GTS
 1092         clear_bit(TXQ_LAST_PKT_DB, &q->flags);
 1093         if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
 1094                 set_bit(TXQ_LAST_PKT_DB, &q->flags);
 1095 #ifdef T3_TRACE
 1096                 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
 1097                           q->cntxt_id);
 1098 #endif
 1099                 t3_write_reg(adap, A_SG_KDOORBELL,
 1100                              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1101         }
 1102 #else
 1103         wmb();            /* write descriptors before telling HW */
 1104         t3_write_reg(adap, A_SG_KDOORBELL,
 1105                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1106 #endif
 1107 }
 1108 
 1109 static __inline void
 1110 wr_gen2(struct tx_desc *d, unsigned int gen)
 1111 {
 1112 #if SGE_NUM_GENBITS == 2
 1113         d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
 1114 #endif
 1115 }
 1116 
 1117 
 1118 
 1119 /**
 1120  *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
 1121  *      @ndesc: number of Tx descriptors spanned by the SGL
 1122  *      @txd: first Tx descriptor to be written
 1123  *      @txqs: txq state (generation and producer index)
 1124  *      @txq: the SGE Tx queue
 1125  *      @sgl: the SGL
 1126  *      @flits: number of flits to the start of the SGL in the first descriptor
 1127  *      @sgl_flits: the SGL size in flits
 1128  *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
 1129  *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
 1130  *
 1131  *      Write a work request header and an associated SGL.  If the SGL is
 1132  *      small enough to fit into one Tx descriptor it has already been written
 1133  *      and we just need to write the WR header.  Otherwise we distribute the
 1134  *      SGL across the number of descriptors it spans.
 1135  */
 1136 
 1137 static void
 1138 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
 1139     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
 1140     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
 1141 {
 1142 
 1143         struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
 1144         struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
 1145         
 1146         if (__predict_true(ndesc == 1)) {
 1147                 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
 1148                     V_WR_SGLSFLT(flits)) | wr_hi;
 1149                 wmb();
 1150                 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
 1151                     V_WR_GEN(txqs->gen)) | wr_lo;
 1152                 /* XXX gen? */
 1153                 wr_gen2(txd, txqs->gen);
 1154         } else {
 1155                 unsigned int ogen = txqs->gen;
 1156                 const uint64_t *fp = (const uint64_t *)sgl;
 1157                 struct work_request_hdr *wp = wrp;
 1158                 
 1159                 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
 1160                     V_WR_SGLSFLT(flits)) | wr_hi;
 1161                 
 1162                 while (sgl_flits) {
 1163                         unsigned int avail = WR_FLITS - flits;
 1164 
 1165                         if (avail > sgl_flits)
 1166                                 avail = sgl_flits;
 1167                         memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
 1168                         sgl_flits -= avail;
 1169                         ndesc--;
 1170                         if (!sgl_flits)
 1171                                 break;
 1172                         
 1173                         fp += avail;
 1174                         txd++;
 1175                         txsd++;
 1176                         if (++txqs->pidx == txq->size) {
 1177                                 txqs->pidx = 0;
 1178                                 txqs->gen ^= 1;
 1179                                 txd = txq->desc;
 1180                                 txsd = txq->sdesc;
 1181                         }
 1182                         
 1183                         /*
 1184                          * when the head of the mbuf chain
 1185                          * is freed all clusters will be freed
 1186                          * with it
 1187                          */
 1188                         txsd->m = NULL;
 1189                         wrp = (struct work_request_hdr *)txd;
 1190                         wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
 1191                             V_WR_SGLSFLT(1)) | wr_hi;
 1192                         wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
 1193                                     sgl_flits + 1)) |
 1194                             V_WR_GEN(txqs->gen)) | wr_lo;
 1195                         wr_gen2(txd, txqs->gen);
 1196                         flits = 1;
 1197                 }
 1198                 wrp->wr_hi |= htonl(F_WR_EOP);
 1199                 wmb();
 1200                 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
 1201                 wr_gen2((struct tx_desc *)wp, ogen);
 1202         }
 1203 }
 1204 
 1205         
 1206 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
 1207 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
 1208 
 1209 int
 1210 t3_encap(struct port_info *p, struct mbuf **m, int *free)
 1211 {
 1212         adapter_t *sc;
 1213         struct mbuf *m0;
 1214         struct sge_qset *qs;
 1215         struct sge_txq *txq;
 1216         struct tx_sw_desc *stx;
 1217         struct txq_state txqs;
 1218         unsigned int ndesc, flits, cntrl, mlen;
 1219         int err, nsegs, tso_info = 0;
 1220 
 1221         struct work_request_hdr *wrp;
 1222         struct tx_sw_desc *txsd;
 1223         struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
 1224         bus_dma_segment_t segs[TX_MAX_SEGS];
 1225         uint32_t wr_hi, wr_lo, sgl_flits; 
 1226 
 1227         struct tx_desc *txd;
 1228         struct cpl_tx_pkt *cpl;
 1229        
 1230         m0 = *m;        
 1231         sc = p->adapter;
 1232         
 1233         DPRINTF("t3_encap port_id=%d qsidx=%d ", p->port_id, p->first_qset);
 1234 
 1235         /* port_id=1 qsid=1 txpkt_intf=2 tx_chan=0 */
 1236 
 1237         qs = &sc->sge.qs[p->first_qset];
 1238 
 1239         txq = &qs->txq[TXQ_ETH];
 1240         stx = &txq->sdesc[txq->pidx];
 1241         txd = &txq->desc[txq->pidx];
 1242         cpl = (struct cpl_tx_pkt *)txd;
 1243         mlen = m0->m_pkthdr.len;
 1244         cpl->len = htonl(mlen | 0x80000000);
 1245         
 1246         DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", mlen, p->txpkt_intf, p->tx_chan);
 1247         /*
 1248          * XXX handle checksum, TSO, and VLAN here
 1249          *       
 1250          */
 1251         cntrl = V_TXPKT_INTF(p->txpkt_intf);
 1252 
 1253         /*
 1254          * XXX need to add VLAN support for 6.x
 1255          */
 1256 #ifdef VLAN_SUPPORTED
 1257         if (m0->m_flags & M_VLANTAG) 
 1258                 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
 1259         if  (m0->m_pkthdr.csum_flags & (CSUM_TSO))
 1260                 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
 1261 #endif          
 1262         if (tso_info) {
 1263                 int eth_type;
 1264                 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl;
 1265                 struct ip *ip;
 1266                 struct tcphdr *tcp;
 1267                 char *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */
 1268                 
 1269                 txd->flit[2] = 0;
 1270                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
 1271                 hdr->cntrl = htonl(cntrl);
 1272                 
 1273                 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
 1274                         pkthdr = &tmp[0];
 1275                         m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr);
 1276                 } else {
 1277                         pkthdr = mtod(m0, char *);
 1278                 }
 1279 
 1280                 if (__predict_false(m0->m_flags & M_VLANTAG)) {
 1281                         eth_type = CPL_ETH_II_VLAN;
 1282                         ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
 1283                             ETHER_VLAN_ENCAP_LEN);
 1284                 } else {
 1285                         eth_type = CPL_ETH_II;
 1286                         ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
 1287                 }
 1288                 tcp = (struct tcphdr *)((uint8_t *)ip +
 1289                     sizeof(*ip)); 
 1290 
 1291                 tso_info |= V_LSO_ETH_TYPE(eth_type) |
 1292                             V_LSO_IPHDR_WORDS(ip->ip_hl) |
 1293                             V_LSO_TCPHDR_WORDS(tcp->th_off);
 1294                 hdr->lso_info = htonl(tso_info);
 1295                 flits = 3;      
 1296         } else {
 1297                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 1298                 cpl->cntrl = htonl(cntrl);
 1299                 
 1300                 if (mlen <= WR_LEN - sizeof(*cpl)) {
 1301                         txq_prod(txq, 1, &txqs);
 1302                         txq->sdesc[txqs.pidx].m = NULL;
 1303                         
 1304                         if (m0->m_len == m0->m_pkthdr.len)
 1305                                 memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen);
 1306                         else
 1307                                 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
 1308 
 1309                         *free = 1;
 1310                         flits = (mlen + 7) / 8 + 2;
 1311                         cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 1312                                           V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
 1313                                           F_WR_SOP | F_WR_EOP | txqs.compl);
 1314                         wmb();
 1315                         cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
 1316                             V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
 1317 
 1318                         wr_gen2(txd, txqs.gen);
 1319                         check_ring_tx_db(sc, txq);
 1320                         return (0);
 1321                 }
 1322                 flits = 2;
 1323         }
 1324 
 1325         wrp = (struct work_request_hdr *)txd;
 1326         
 1327         if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) {
 1328                 return (err);
 1329         }
 1330         m0 = *m;
 1331         ndesc = calc_tx_descs(m0, nsegs);
 1332         
 1333         sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
 1334         make_sgl(sgp, segs, nsegs);
 1335 
 1336         sgl_flits = sgl_len(nsegs);
 1337 
 1338         DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
 1339         txq_prod(txq, ndesc, &txqs);
 1340         txsd = &txq->sdesc[txqs.pidx];
 1341         wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 1342         wr_lo = htonl(V_WR_TID(txq->token));
 1343         txsd->m = m0;
 1344         m_set_priority(m0, txqs.pidx); 
 1345 
 1346         write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
 1347         check_ring_tx_db(p->adapter, txq);
 1348 
 1349         return (0);
 1350 }
 1351 
 1352 
 1353 /**
 1354  *      write_imm - write a packet into a Tx descriptor as immediate data
 1355  *      @d: the Tx descriptor to write
 1356  *      @m: the packet
 1357  *      @len: the length of packet data to write as immediate data
 1358  *      @gen: the generation bit value to write
 1359  *
 1360  *      Writes a packet as immediate data into a Tx descriptor.  The packet
 1361  *      contains a work request at its beginning.  We must write the packet
 1362  *      carefully so the SGE doesn't read accidentally before it's written in
 1363  *      its entirety.
 1364  */
 1365 static __inline void
 1366 write_imm(struct tx_desc *d, struct mbuf *m,
 1367           unsigned int len, unsigned int gen)
 1368 {
 1369         struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
 1370         struct work_request_hdr *to = (struct work_request_hdr *)d;
 1371 
 1372         memcpy(&to[1], &from[1], len - sizeof(*from));
 1373         to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
 1374                                         V_WR_BCNTLFLT(len & 7));
 1375         wmb();
 1376         to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
 1377                                         V_WR_LEN((len + 7) / 8));
 1378         wr_gen2(d, gen);
 1379         m_freem(m);
 1380 }
 1381 
 1382 /**
 1383  *      check_desc_avail - check descriptor availability on a send queue
 1384  *      @adap: the adapter
 1385  *      @q: the TX queue
 1386  *      @m: the packet needing the descriptors
 1387  *      @ndesc: the number of Tx descriptors needed
 1388  *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
 1389  *
 1390  *      Checks if the requested number of Tx descriptors is available on an
 1391  *      SGE send queue.  If the queue is already suspended or not enough
 1392  *      descriptors are available the packet is queued for later transmission.
 1393  *      Must be called with the Tx queue locked.
 1394  *
 1395  *      Returns 0 if enough descriptors are available, 1 if there aren't
 1396  *      enough descriptors and the packet has been queued, and 2 if the caller
 1397  *      needs to retry because there weren't enough descriptors at the
 1398  *      beginning of the call but some freed up in the mean time.
 1399  */
 1400 static __inline int
 1401 check_desc_avail(adapter_t *adap, struct sge_txq *q,
 1402                  struct mbuf *m, unsigned int ndesc,
 1403                  unsigned int qid)
 1404 {
 1405         /* 
 1406          * XXX We currently only use this for checking the control queue
 1407          * the control queue is only used for binding qsets which happens
 1408          * at init time so we are guaranteed enough descriptors
 1409          */
 1410         if (__predict_false(!mbufq_empty(&q->sendq))) {
 1411 addq_exit:      mbufq_tail(&q->sendq, m);
 1412                 return 1;
 1413         }
 1414         if (__predict_false(q->size - q->in_use < ndesc)) {
 1415 
 1416                 struct sge_qset *qs = txq_to_qset(q, qid);
 1417 
 1418                 setbit(&qs->txq_stopped, qid);
 1419                 smp_mb();
 1420 
 1421                 if (should_restart_tx(q) &&
 1422                     test_and_clear_bit(qid, &qs->txq_stopped))
 1423                         return 2;
 1424 
 1425                 q->stops++;
 1426                 goto addq_exit;
 1427         }
 1428         return 0;
 1429 }
 1430 
 1431 
 1432 /**
 1433  *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
 1434  *      @q: the SGE control Tx queue
 1435  *
 1436  *      This is a variant of reclaim_completed_tx() that is used for Tx queues
 1437  *      that send only immediate data (presently just the control queues) and
 1438  *      thus do not have any mbufs
 1439  */
 1440 static __inline void
 1441 reclaim_completed_tx_imm(struct sge_txq *q)
 1442 {
 1443         unsigned int reclaim = q->processed - q->cleaned;
 1444 
 1445         mtx_assert(&q->lock, MA_OWNED);
 1446         
 1447         q->in_use -= reclaim;
 1448         q->cleaned += reclaim;
 1449 }
 1450 
 1451 static __inline int
 1452 immediate(const struct mbuf *m)
 1453 {
 1454         return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
 1455 }
 1456 
 1457 /**
 1458  *      ctrl_xmit - send a packet through an SGE control Tx queue
 1459  *      @adap: the adapter
 1460  *      @q: the control queue
 1461  *      @m: the packet
 1462  *
 1463  *      Send a packet through an SGE control Tx queue.  Packets sent through
 1464  *      a control queue must fit entirely as immediate data in a single Tx
 1465  *      descriptor and have no page fragments.
 1466  */
 1467 static int
 1468 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
 1469 {
 1470         int ret;
 1471         struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
 1472 
 1473         if (__predict_false(!immediate(m))) {
 1474                 m_freem(m);
 1475                 return 0;
 1476         }
 1477 
 1478         wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
 1479         wrp->wr_lo = htonl(V_WR_TID(q->token));
 1480 
 1481         mtx_lock(&q->lock);
 1482 again:  reclaim_completed_tx_imm(q);
 1483 
 1484         ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
 1485         if (__predict_false(ret)) {
 1486                 if (ret == 1) {
 1487                         mtx_unlock(&q->lock);
 1488                         return (-1);
 1489                 }
 1490                 goto again;
 1491         }
 1492 
 1493         write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
 1494 
 1495         q->in_use++;
 1496         if (++q->pidx >= q->size) {
 1497                 q->pidx = 0;
 1498                 q->gen ^= 1;
 1499         }
 1500         mtx_unlock(&q->lock);
 1501         wmb();
 1502         t3_write_reg(adap, A_SG_KDOORBELL,
 1503                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1504         return (0);
 1505 }
 1506 
 1507 
 1508 /**
 1509  *      restart_ctrlq - restart a suspended control queue
 1510  *      @qs: the queue set cotaining the control queue
 1511  *
 1512  *      Resumes transmission on a suspended Tx control queue.
 1513  */
 1514 static void
 1515 restart_ctrlq(void *data, int npending)
 1516 {
 1517         struct mbuf *m;
 1518         struct sge_qset *qs = (struct sge_qset *)data;
 1519         struct sge_txq *q = &qs->txq[TXQ_CTRL];
 1520         adapter_t *adap = qs->port->adapter;
 1521 
 1522         mtx_lock(&q->lock);
 1523 again:  reclaim_completed_tx_imm(q);
 1524 
 1525         while (q->in_use < q->size &&
 1526                (m = mbufq_dequeue(&q->sendq)) != NULL) {
 1527 
 1528                 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
 1529 
 1530                 if (++q->pidx >= q->size) {
 1531                         q->pidx = 0;
 1532                         q->gen ^= 1;
 1533                 }
 1534                 q->in_use++;
 1535         }
 1536         if (!mbufq_empty(&q->sendq)) {
 1537                 setbit(&qs->txq_stopped, TXQ_CTRL);
 1538                 smp_mb();
 1539 
 1540                 if (should_restart_tx(q) &&
 1541                     test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
 1542                         goto again;
 1543                 q->stops++;
 1544         }
 1545         mtx_unlock(&q->lock);
 1546         t3_write_reg(adap, A_SG_KDOORBELL,
 1547                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1548 }
 1549 
 1550 
 1551 /*
 1552  * Send a management message through control queue 0
 1553  */
 1554 int
 1555 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
 1556 {
 1557         return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
 1558 }
 1559 
 1560 /**
 1561  *      free_qset - free the resources of an SGE queue set
 1562  *      @sc: the controller owning the queue set
 1563  *      @q: the queue set
 1564  *
 1565  *      Release the HW and SW resources associated with an SGE queue set, such
 1566  *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 1567  *      queue set must be quiesced prior to calling this.
 1568  */
 1569 static void
 1570 t3_free_qset(adapter_t *sc, struct sge_qset *q)
 1571 {
 1572         int i;
 1573 
 1574         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 1575                 if (q->fl[i].desc) {
 1576                         mtx_lock(&sc->sge.reg_lock);
 1577                         t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
 1578                         mtx_unlock(&sc->sge.reg_lock);
 1579                         bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
 1580                         bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
 1581                                         q->fl[i].desc_map);
 1582                         bus_dma_tag_destroy(q->fl[i].desc_tag);
 1583                         bus_dma_tag_destroy(q->fl[i].entry_tag);
 1584                 }
 1585                 if (q->fl[i].sdesc) {
 1586                         free_rx_bufs(sc, &q->fl[i]);
 1587                         free(q->fl[i].sdesc, M_DEVBUF);
 1588                 }
 1589         }
 1590 
 1591         for (i = 0; i < SGE_TXQ_PER_SET; i++) {
 1592                 if (q->txq[i].desc) {
 1593                         mtx_lock(&sc->sge.reg_lock);
 1594                         t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
 1595                         mtx_unlock(&sc->sge.reg_lock);
 1596                         bus_dmamap_unload(q->txq[i].desc_tag,
 1597                                         q->txq[i].desc_map);
 1598                         bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
 1599                                         q->txq[i].desc_map);
 1600                         bus_dma_tag_destroy(q->txq[i].desc_tag);
 1601                         bus_dma_tag_destroy(q->txq[i].entry_tag);
 1602                         MTX_DESTROY(&q->txq[i].lock);
 1603                 }
 1604                 if (q->txq[i].sdesc) {
 1605                         free(q->txq[i].sdesc, M_DEVBUF);
 1606                 }
 1607         }
 1608 
 1609         if (q->rspq.desc) {
 1610                 mtx_lock(&sc->sge.reg_lock);
 1611                 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
 1612                 mtx_unlock(&sc->sge.reg_lock);
 1613                 
 1614                 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
 1615                 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
 1616                                 q->rspq.desc_map);
 1617                 bus_dma_tag_destroy(q->rspq.desc_tag);
 1618                 MTX_DESTROY(&q->rspq.lock);
 1619         }
 1620 
 1621         bzero(q, sizeof(*q));
 1622 }
 1623 
 1624 /**
 1625  *      t3_free_sge_resources - free SGE resources
 1626  *      @sc: the adapter softc
 1627  *
 1628  *      Frees resources used by the SGE queue sets.
 1629  */
 1630 void
 1631 t3_free_sge_resources(adapter_t *sc)
 1632 {
 1633         int i, nqsets;
 1634 
 1635         for (nqsets = i = 0; i < (sc)->params.nports; i++) 
 1636                 nqsets += sc->port[i].nqsets;
 1637         
 1638         for (i = 0; i < nqsets; ++i)
 1639                 t3_free_qset(sc, &sc->sge.qs[i]);
 1640 }
 1641 
 1642 /**
 1643  *      t3_sge_start - enable SGE
 1644  *      @sc: the controller softc
 1645  *
 1646  *      Enables the SGE for DMAs.  This is the last step in starting packet
 1647  *      transfers.
 1648  */
 1649 void
 1650 t3_sge_start(adapter_t *sc)
 1651 {
 1652         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
 1653 }
 1654 
 1655 /**
 1656  *      t3_sge_stop - disable SGE operation
 1657  *      @sc: the adapter
 1658  *
 1659  *      Disables the DMA engine.  This can be called in emeregencies (e.g.,
 1660  *      from error interrupts) or from normal process context.  In the latter
 1661  *      case it also disables any pending queue restart tasklets.  Note that
 1662  *      if it is called in interrupt context it cannot disable the restart
 1663  *      tasklets as it cannot wait, however the tasklets will have no effect
 1664  *      since the doorbells are disabled and the driver will call this again
 1665  *      later from process context, at which time the tasklets will be stopped
 1666  *      if they are still running.
 1667  */
 1668 void
 1669 t3_sge_stop(adapter_t *sc)
 1670 {
 1671         int i, nqsets;
 1672         
 1673         t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
 1674 
 1675         if (sc->tq == NULL)
 1676                 return;
 1677         
 1678         for (nqsets = i = 0; i < (sc)->params.nports; i++) 
 1679                 nqsets += sc->port[i].nqsets;
 1680         
 1681         for (i = 0; i < nqsets; ++i) {
 1682                 struct sge_qset *qs = &sc->sge.qs[i];
 1683                 
 1684                 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 1685                 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 1686         }
 1687 }
 1688 
 1689 
 1690 /**
 1691  *      free_tx_desc - reclaims Tx descriptors and their buffers
 1692  *      @adapter: the adapter
 1693  *      @q: the Tx queue to reclaim descriptors from
 1694  *      @n: the number of descriptors to reclaim
 1695  *
 1696  *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 1697  *      Tx buffers.  Called with the Tx queue lock held.
 1698  */
 1699 int
 1700 free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec)
 1701 {
 1702         struct tx_sw_desc *d;
 1703         unsigned int cidx = q->cidx;
 1704         int nbufs = 0;
 1705         
 1706 #ifdef T3_TRACE
 1707         T3_TRACE2(sc->tb[q->cntxt_id & 7],
 1708                   "reclaiming %u Tx descriptors at cidx %u", n, cidx);
 1709 #endif
 1710         d = &q->sdesc[cidx];
 1711         
 1712         while (n-- > 0) {
 1713                 DPRINTF("cidx=%d d=%p\n", cidx, d);
 1714                 if (d->m) {
 1715                         if (d->flags & TX_SW_DESC_MAPPED) {
 1716                                 bus_dmamap_unload(q->entry_tag, d->map);
 1717                                 bus_dmamap_destroy(q->entry_tag, d->map);
 1718                                 d->flags &= ~TX_SW_DESC_MAPPED;
 1719                         }
 1720                         if (m_get_priority(d->m) == cidx) {
 1721                                 m_vec[nbufs] = d->m;
 1722                                 d->m = NULL;
 1723                                 nbufs++;
 1724                         } else {
 1725                                 printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx);
 1726                         }
 1727                 }
 1728                 ++d;
 1729                 if (++cidx == q->size) {
 1730                         cidx = 0;
 1731                         d = q->sdesc;
 1732                 }
 1733         }
 1734         q->cidx = cidx;
 1735 
 1736         return (nbufs);
 1737 }
 1738 
 1739 /**
 1740  *      is_new_response - check if a response is newly written
 1741  *      @r: the response descriptor
 1742  *      @q: the response queue
 1743  *
 1744  *      Returns true if a response descriptor contains a yet unprocessed
 1745  *      response.
 1746  */
 1747 static __inline int
 1748 is_new_response(const struct rsp_desc *r,
 1749     const struct sge_rspq *q)
 1750 {
 1751         return (r->intr_gen & F_RSPD_GEN2) == q->gen;
 1752 }
 1753 
 1754 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
 1755 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
 1756                         V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
 1757                         V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
 1758                         V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
 1759 
 1760 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
 1761 #define NOMEM_INTR_DELAY 2500
 1762 
 1763 /**
 1764  *      write_ofld_wr - write an offload work request
 1765  *      @adap: the adapter
 1766  *      @m: the packet to send
 1767  *      @q: the Tx queue
 1768  *      @pidx: index of the first Tx descriptor to write
 1769  *      @gen: the generation value to use
 1770  *      @ndesc: number of descriptors the packet will occupy
 1771  *
 1772  *      Write an offload work request to send the supplied packet.  The packet
 1773  *      data already carry the work request with most fields populated.
 1774  */
 1775 static void
 1776 write_ofld_wr(adapter_t *adap, struct mbuf *m,
 1777     struct sge_txq *q, unsigned int pidx,
 1778     unsigned int gen, unsigned int ndesc,
 1779     bus_dma_segment_t *segs, unsigned int nsegs)
 1780 {
 1781         unsigned int sgl_flits, flits;
 1782         struct work_request_hdr *from;
 1783         struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
 1784         struct tx_desc *d = &q->desc[pidx];
 1785         struct txq_state txqs;
 1786         
 1787         if (immediate(m)) {
 1788                 q->sdesc[pidx].m = NULL;
 1789                 write_imm(d, m, m->m_len, gen);
 1790                 return;
 1791         }
 1792 
 1793         /* Only TX_DATA builds SGLs */
 1794 
 1795         from = mtod(m, struct work_request_hdr *);
 1796         memcpy(&d->flit[1], &from[1],
 1797             (uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *) - sizeof(*from));
 1798 
 1799         flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8;
 1800         sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
 1801 
 1802         make_sgl(sgp, segs, nsegs);
 1803         sgl_flits = sgl_len(nsegs);
 1804 
 1805         txqs.gen = q->gen;
 1806         txqs.pidx = q->pidx;
 1807         txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3);
 1808         write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
 1809             from->wr_hi, from->wr_lo);
 1810 }
 1811 
 1812 /**
 1813  *      calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
 1814  *      @m: the packet
 1815  *
 1816  *      Returns the number of Tx descriptors needed for the given offload
 1817  *      packet.  These packets are already fully constructed.
 1818  */
 1819 static __inline unsigned int
 1820 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
 1821 {
 1822         unsigned int flits, cnt = 0;
 1823 
 1824 
 1825         if (m->m_len <= WR_LEN)
 1826                 return 1;                 /* packet fits as immediate data */
 1827 
 1828         if (m->m_flags & M_IOVEC)
 1829                 cnt = mtomv(m)->mv_count;
 1830 
 1831         flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8;   /* headers */
 1832 
 1833         return flits_to_desc(flits + sgl_len(cnt));
 1834 }
 1835 
 1836 /**
 1837  *      ofld_xmit - send a packet through an offload queue
 1838  *      @adap: the adapter
 1839  *      @q: the Tx offload queue
 1840  *      @m: the packet
 1841  *
 1842  *      Send an offload packet through an SGE offload queue.
 1843  */
 1844 static int
 1845 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
 1846 {
 1847         int ret, nsegs;
 1848         unsigned int ndesc;
 1849         unsigned int pidx, gen;
 1850         struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
 1851         bus_dma_segment_t segs[TX_MAX_SEGS];
 1852         int i, cleaned;
 1853         struct tx_sw_desc *stx = &q->sdesc[q->pidx];
 1854 
 1855         mtx_lock(&q->lock);
 1856         if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) {
 1857                 mtx_unlock(&q->lock);
 1858                 return (ret);
 1859         }
 1860         ndesc = calc_tx_descs_ofld(m, nsegs);
 1861 again:  cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec);
 1862 
 1863         ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
 1864         if (__predict_false(ret)) {
 1865                 if (ret == 1) {
 1866                         m_set_priority(m, ndesc);     /* save for restart */
 1867                         mtx_unlock(&q->lock);
 1868                         return EINTR;
 1869                 }
 1870                 goto again;
 1871         }
 1872 
 1873         gen = q->gen;
 1874         q->in_use += ndesc;
 1875         pidx = q->pidx;
 1876         q->pidx += ndesc;
 1877         if (q->pidx >= q->size) {
 1878                 q->pidx -= q->size;
 1879                 q->gen ^= 1;
 1880         }
 1881 #ifdef T3_TRACE
 1882         T3_TRACE5(adap->tb[q->cntxt_id & 7],
 1883                   "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
 1884                   ndesc, pidx, skb->len, skb->len - skb->data_len,
 1885                   skb_shinfo(skb)->nr_frags);
 1886 #endif
 1887         mtx_unlock(&q->lock);
 1888 
 1889         write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
 1890         check_ring_tx_db(adap, q);
 1891         
 1892         for (i = 0; i < cleaned; i++) {
 1893                 m_freem_vec(m_vec[i]);
 1894         }
 1895         return (0);
 1896 }
 1897 
 1898 /**
 1899  *      restart_offloadq - restart a suspended offload queue
 1900  *      @qs: the queue set cotaining the offload queue
 1901  *
 1902  *      Resumes transmission on a suspended Tx offload queue.
 1903  */
 1904 static void
 1905 restart_offloadq(void *data, int npending)
 1906 {
 1907 
 1908         struct mbuf *m;
 1909         struct sge_qset *qs = data;
 1910         struct sge_txq *q = &qs->txq[TXQ_OFLD];
 1911         adapter_t *adap = qs->port->adapter;
 1912         struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
 1913         bus_dma_segment_t segs[TX_MAX_SEGS];
 1914         int nsegs, i, cleaned;
 1915         struct tx_sw_desc *stx = &q->sdesc[q->pidx];
 1916                 
 1917         mtx_lock(&q->lock);
 1918 again:  cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec);
 1919 
 1920         while ((m = mbufq_peek(&q->sendq)) != NULL) {
 1921                 unsigned int gen, pidx;
 1922                 unsigned int ndesc = m_get_priority(m);
 1923 
 1924                 if (__predict_false(q->size - q->in_use < ndesc)) {
 1925                         setbit(&qs->txq_stopped, TXQ_OFLD);
 1926                         smp_mb();
 1927 
 1928                         if (should_restart_tx(q) &&
 1929                             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
 1930                                 goto again;
 1931                         q->stops++;
 1932                         break;
 1933                 }
 1934 
 1935                 gen = q->gen;
 1936                 q->in_use += ndesc;
 1937                 pidx = q->pidx;
 1938                 q->pidx += ndesc;
 1939                 if (q->pidx >= q->size) {
 1940                         q->pidx -= q->size;
 1941                         q->gen ^= 1;
 1942                 }
 1943                 
 1944                 (void)mbufq_dequeue(&q->sendq);
 1945                 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
 1946                 mtx_unlock(&q->lock);
 1947                 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
 1948                 mtx_lock(&q->lock);
 1949         }
 1950         mtx_unlock(&q->lock);
 1951         
 1952 #if USE_GTS
 1953         set_bit(TXQ_RUNNING, &q->flags);
 1954         set_bit(TXQ_LAST_PKT_DB, &q->flags);
 1955 #endif
 1956         t3_write_reg(adap, A_SG_KDOORBELL,
 1957                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 1958         
 1959         for (i = 0; i < cleaned; i++) {
 1960                 m_freem_vec(m_vec[i]);
 1961         }
 1962 }
 1963 
 1964 /**
 1965  *      queue_set - return the queue set a packet should use
 1966  *      @m: the packet
 1967  *
 1968  *      Maps a packet to the SGE queue set it should use.  The desired queue
 1969  *      set is carried in bits 1-3 in the packet's priority.
 1970  */
 1971 static __inline int
 1972 queue_set(const struct mbuf *m)
 1973 {
 1974         return m_get_priority(m) >> 1;
 1975 }
 1976 
 1977 /**
 1978  *      is_ctrl_pkt - return whether an offload packet is a control packet
 1979  *      @m: the packet
 1980  *
 1981  *      Determines whether an offload packet should use an OFLD or a CTRL
 1982  *      Tx queue.  This is indicated by bit 0 in the packet's priority.
 1983  */
 1984 static __inline int
 1985 is_ctrl_pkt(const struct mbuf *m)
 1986 {
 1987         return m_get_priority(m) & 1;
 1988 }
 1989 
 1990 /**
 1991  *      t3_offload_tx - send an offload packet
 1992  *      @tdev: the offload device to send to
 1993  *      @m: the packet
 1994  *
 1995  *      Sends an offload packet.  We use the packet priority to select the
 1996  *      appropriate Tx queue as follows: bit 0 indicates whether the packet
 1997  *      should be sent as regular or control, bits 1-3 select the queue set.
 1998  */
 1999 int
 2000 t3_offload_tx(struct toedev *tdev, struct mbuf *m)
 2001 {
 2002         adapter_t *adap = tdev2adap(tdev);
 2003         struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
 2004 
 2005         if (__predict_false(is_ctrl_pkt(m)))
 2006                 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
 2007 
 2008         return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
 2009 }
 2010 
 2011 /**
 2012  *      deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
 2013  *      @tdev: the offload device that will be receiving the packets
 2014  *      @q: the SGE response queue that assembled the bundle
 2015  *      @m: the partial bundle
 2016  *      @n: the number of packets in the bundle
 2017  *
 2018  *      Delivers a (partial) bundle of Rx offload packets to an offload device.
 2019  */
 2020 static __inline void
 2021 deliver_partial_bundle(struct toedev *tdev,
 2022                         struct sge_rspq *q,
 2023                         struct mbuf *mbufs[], int n)
 2024 {
 2025         if (n) {
 2026                 q->offload_bundles++;
 2027                 cxgb_ofld_recv(tdev, mbufs, n);
 2028         }
 2029 }
 2030 
 2031 static __inline int
 2032 rx_offload(struct toedev *tdev, struct sge_rspq *rq,
 2033     struct mbuf *m, struct mbuf *rx_gather[],
 2034     unsigned int gather_idx)
 2035 {
 2036         rq->offload_pkts++;
 2037         m->m_pkthdr.header = mtod(m, void *);
 2038             
 2039         rx_gather[gather_idx++] = m;
 2040         if (gather_idx == RX_BUNDLE_SIZE) {
 2041                 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
 2042                 gather_idx = 0;
 2043                 rq->offload_bundles++;
 2044         }
 2045         return (gather_idx);
 2046 }
 2047 
 2048 static void
 2049 restart_tx(struct sge_qset *qs)
 2050 {
 2051         struct adapter *sc = qs->port->adapter;
 2052         
 2053         if (isset(&qs->txq_stopped, TXQ_OFLD) &&
 2054             should_restart_tx(&qs->txq[TXQ_OFLD]) &&
 2055             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
 2056                 qs->txq[TXQ_OFLD].restarts++;
 2057                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 2058         }
 2059         if (isset(&qs->txq_stopped, TXQ_CTRL) &&
 2060             should_restart_tx(&qs->txq[TXQ_CTRL]) &&
 2061             test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
 2062                 qs->txq[TXQ_CTRL].restarts++;
 2063                 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 2064         }
 2065 }
 2066 
 2067 /**
 2068  *      t3_sge_alloc_qset - initialize an SGE queue set
 2069  *      @sc: the controller softc
 2070  *      @id: the queue set id
 2071  *      @nports: how many Ethernet ports will be using this queue set
 2072  *      @irq_vec_idx: the IRQ vector index for response queue interrupts
 2073  *      @p: configuration parameters for this queue set
 2074  *      @ntxq: number of Tx queues for the queue set
 2075  *      @pi: port info for queue set
 2076  *
 2077  *      Allocate resources and initialize an SGE queue set.  A queue set
 2078  *      comprises a response queue, two Rx free-buffer queues, and up to 3
 2079  *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
 2080  *      queue, offload queue, and control queue.
 2081  */
 2082 int
 2083 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
 2084                   const struct qset_params *p, int ntxq, struct port_info *pi)
 2085 {
 2086         struct sge_qset *q = &sc->sge.qs[id];
 2087         int i, ret = 0;
 2088 
 2089         init_qset_cntxt(q, id);
 2090         
 2091         if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
 2092                     sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
 2093                     &q->fl[0].desc, &q->fl[0].sdesc,
 2094                     &q->fl[0].desc_tag, &q->fl[0].desc_map,
 2095                     sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
 2096                 printf("error %d from alloc ring fl0\n", ret);
 2097                 goto err;
 2098         }
 2099 
 2100         if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
 2101                     sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
 2102                     &q->fl[1].desc, &q->fl[1].sdesc,
 2103                     &q->fl[1].desc_tag, &q->fl[1].desc_map,
 2104                     sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
 2105                 printf("error %d from alloc ring fl1\n", ret);
 2106                 goto err;
 2107         }
 2108 
 2109         if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
 2110                     &q->rspq.phys_addr, &q->rspq.desc, NULL,
 2111                     &q->rspq.desc_tag, &q->rspq.desc_map,
 2112                     NULL, NULL)) != 0) {
 2113                 printf("error %d from alloc ring rspq\n", ret);
 2114                 goto err;
 2115         }
 2116 
 2117         for (i = 0; i < ntxq; ++i) {
 2118                 /*
 2119                  * The control queue always uses immediate data so does not
 2120                  * need to keep track of any mbufs.
 2121                  * XXX Placeholder for future TOE support.
 2122                  */
 2123                 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
 2124 
 2125                 if ((ret = alloc_ring(sc, p->txq_size[i],
 2126                             sizeof(struct tx_desc), sz,
 2127                             &q->txq[i].phys_addr, &q->txq[i].desc,
 2128                             &q->txq[i].sdesc, &q->txq[i].desc_tag,
 2129                             &q->txq[i].desc_map,
 2130                             sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
 2131                         printf("error %d from alloc ring tx %i\n", ret, i);
 2132                         goto err;
 2133                 }
 2134                 mbufq_init(&q->txq[i].sendq);
 2135                 q->txq[i].gen = 1;
 2136                 q->txq[i].size = p->txq_size[i];
 2137                 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
 2138                     device_get_unit(sc->dev), irq_vec_idx, i);
 2139                 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
 2140         }
 2141 
 2142         q->txq[TXQ_ETH].port = pi;
 2143         
 2144         TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
 2145         TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
 2146         TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]);
 2147         TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]);
 2148 
 2149         q->fl[0].gen = q->fl[1].gen = 1;
 2150         q->fl[0].size = p->fl_size;
 2151         q->fl[1].size = p->jumbo_size;
 2152 
 2153         q->rspq.gen = 1;
 2154         q->rspq.cidx = 0;
 2155         q->rspq.size = p->rspq_size;
 2156 
 2157         q->txq[TXQ_ETH].stop_thres = nports *
 2158             flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
 2159 
 2160         q->fl[0].buf_size = MCLBYTES;
 2161         q->fl[0].zone = zone_clust;
 2162         q->fl[0].type = EXT_CLUSTER;
 2163         q->fl[1].buf_size = MJUMPAGESIZE;
 2164         q->fl[1].zone = zone_jumbop;
 2165         q->fl[1].type = EXT_JUMBOP;
 2166         
 2167         q->lro.enabled = lro_default;
 2168 
 2169         mtx_lock(&sc->sge.reg_lock);
 2170         ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
 2171                                    q->rspq.phys_addr, q->rspq.size,
 2172                                    q->fl[0].buf_size, 1, 0);
 2173         if (ret) {
 2174                 printf("error %d from t3_sge_init_rspcntxt\n", ret);
 2175                 goto err_unlock;
 2176         }
 2177 
 2178         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 2179                 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
 2180                                           q->fl[i].phys_addr, q->fl[i].size,
 2181                                           q->fl[i].buf_size, p->cong_thres, 1,
 2182                                           0);
 2183                 if (ret) {
 2184                         printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
 2185                         goto err_unlock;
 2186                 }
 2187         }
 2188 
 2189         ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
 2190                                  SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
 2191                                  q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
 2192                                  1, 0);
 2193         if (ret) {
 2194                 printf("error %d from t3_sge_init_ecntxt\n", ret);
 2195                 goto err_unlock;
 2196         }
 2197 
 2198         if (ntxq > 1) {
 2199                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
 2200                                          USE_GTS, SGE_CNTXT_OFLD, id,
 2201                                          q->txq[TXQ_OFLD].phys_addr,
 2202                                          q->txq[TXQ_OFLD].size, 0, 1, 0);
 2203                 if (ret) {
 2204                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2205                         goto err_unlock;
 2206                 }
 2207         }
 2208 
 2209         if (ntxq > 2) {
 2210                 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
 2211                                          SGE_CNTXT_CTRL, id,
 2212                                          q->txq[TXQ_CTRL].phys_addr,
 2213                                          q->txq[TXQ_CTRL].size,
 2214                                          q->txq[TXQ_CTRL].token, 1, 0);
 2215                 if (ret) {
 2216                         printf("error %d from t3_sge_init_ecntxt\n", ret);
 2217                         goto err_unlock;
 2218                 }
 2219         }
 2220         
 2221         snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
 2222             device_get_unit(sc->dev), irq_vec_idx);
 2223         MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
 2224         
 2225         mtx_unlock(&sc->sge.reg_lock);
 2226         t3_update_qset_coalesce(q, p);
 2227         q->port = pi;
 2228         
 2229         refill_fl(sc, &q->fl[0], q->fl[0].size, 1);
 2230         refill_fl(sc, &q->fl[1], q->fl[1].size, 1);
 2231         refill_rspq(sc, &q->rspq, q->rspq.size - 1);
 2232 
 2233         t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
 2234                      V_NEWTIMER(q->rspq.holdoff_tmr));
 2235 
 2236         return (0);
 2237 
 2238 err_unlock:
 2239         mtx_unlock(&sc->sge.reg_lock);
 2240 err:    
 2241         t3_free_qset(sc, q);
 2242 
 2243         return (ret);
 2244 }
 2245 
 2246 void
 2247 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
 2248 {
 2249         struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
 2250         struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
 2251         struct ifnet *ifp = pi->ifp;
 2252         
 2253         DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
 2254 
 2255         if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
 2256             cpl->csum_valid && cpl->csum == 0xffff) {
 2257                 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
 2258                 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
 2259                 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
 2260                 m->m_pkthdr.csum_data = 0xffff;
 2261         }
 2262         /* 
 2263          * XXX need to add VLAN support for 6.x
 2264          */
 2265 #ifdef VLAN_SUPPORTED
 2266         if (__predict_false(cpl->vlan_valid)) {
 2267                 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
 2268                 m->m_flags |= M_VLANTAG;
 2269         } 
 2270 #endif
 2271         
 2272         m->m_pkthdr.rcvif = ifp;
 2273         m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
 2274 #ifndef DISABLE_MBUF_IOVEC
 2275         m_explode(m);
 2276 #endif     
 2277         /*
 2278          * adjust after conversion to mbuf chain
 2279          */
 2280         m_adj(m, sizeof(*cpl) + ethpad);
 2281 
 2282         (*ifp->if_input)(ifp, m);
 2283 }
 2284 
 2285 /**
 2286  *      get_packet - return the next ingress packet buffer from a free list
 2287  *      @adap: the adapter that received the packet
 2288  *      @drop_thres: # of remaining buffers before we start dropping packets
 2289  *      @qs: the qset that the SGE free list holding the packet belongs to
 2290  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
 2291  *      @r: response descriptor 
 2292  *
 2293  *      Get the next packet from a free list and complete setup of the
 2294  *      sk_buff.  If the packet is small we make a copy and recycle the
 2295  *      original buffer, otherwise we use the original buffer itself.  If a
 2296  *      positive drop threshold is supplied packets are dropped and their
 2297  *      buffers recycled if (a) the number of remaining buffers is under the
 2298  *      threshold and the packet is too big to copy, or (b) the packet should
 2299  *      be copied but there is no memory for the copy.
 2300  */
 2301 #ifdef DISABLE_MBUF_IOVEC
 2302 
 2303 static int
 2304 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
 2305     struct t3_mbuf_hdr *mh, struct rsp_desc *r, struct mbuf *m)
 2306 {
 2307 
 2308         unsigned int len_cq =  ntohl(r->len_cq);
 2309         struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
 2310         struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
 2311         uint32_t len = G_RSPD_LEN(len_cq);
 2312         uint32_t flags = ntohl(r->flags);
 2313         uint8_t sopeop = G_RSPD_SOP_EOP(flags);
 2314         int ret = 0;
 2315 
 2316         prefetch(sd->cl);
 2317 
 2318         fl->credits--;
 2319         bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
 2320         bus_dmamap_unload(fl->entry_tag, sd->map);
 2321 
 2322         m_cljset(m, sd->cl, fl->type);
 2323         m->m_len = len;
 2324 
 2325         switch(sopeop) {
 2326         case RSPQ_SOP_EOP:
 2327                 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
 2328                 mh->mh_head = mh->mh_tail = m;
 2329                 m->m_pkthdr.len = len;
 2330                 m->m_flags |= M_PKTHDR;
 2331                 ret = 1;
 2332                 break;
 2333         case RSPQ_NSOP_NEOP:
 2334                 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
 2335                 m->m_flags &= ~M_PKTHDR;
 2336                 if (mh->mh_tail == NULL) {
 2337                         if (cxgb_debug)
 2338                                 printf("discarding intermediate descriptor entry\n");
 2339                         m_freem(m);
 2340                         break;
 2341                 }
 2342                 mh->mh_tail->m_next = m;
 2343                 mh->mh_tail = m;
 2344                 mh->mh_head->m_pkthdr.len += len;
 2345                 ret = 0;
 2346                 break;
 2347         case RSPQ_SOP:
 2348                 DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
 2349                 m->m_pkthdr.len = len;
 2350                 mh->mh_head = mh->mh_tail = m;
 2351                 m->m_flags |= M_PKTHDR;
 2352                 ret = 0;
 2353                 break;
 2354         case RSPQ_EOP:
 2355                 DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
 2356                 m->m_flags &= ~M_PKTHDR;
 2357                 mh->mh_head->m_pkthdr.len += len;
 2358                 mh->mh_tail->m_next = m;
 2359                 mh->mh_tail = m;
 2360                 ret = 1;
 2361                 break;
 2362         }
 2363         if (++fl->cidx == fl->size)
 2364                 fl->cidx = 0;
 2365 
 2366         return (ret);
 2367 }
 2368 
 2369 #else
 2370 static int
 2371 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
 2372     struct mbuf *m, struct rsp_desc *r)
 2373 {
 2374         
 2375         unsigned int len_cq =  ntohl(r->len_cq);
 2376         struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
 2377         struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
 2378         uint32_t len = G_RSPD_LEN(len_cq);
 2379         uint32_t flags = ntohl(r->flags);
 2380         uint8_t sopeop = G_RSPD_SOP_EOP(flags);
 2381         void *cl;
 2382         int ret = 0;
 2383         
 2384         prefetch(sd->cl);
 2385 
 2386         fl->credits--;
 2387         bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
 2388 
 2389         if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
 2390                 cl = mtod(m, void *);
 2391                 memcpy(cl, sd->cl, len);
 2392                 recycle_rx_buf(adap, fl, fl->cidx);
 2393         } else {
 2394                 cl = sd->cl;
 2395                 bus_dmamap_unload(fl->entry_tag, sd->map);
 2396         }
 2397         switch(sopeop) {
 2398         case RSPQ_SOP_EOP:
 2399                 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
 2400                 if (cl == sd->cl)
 2401                         m_cljset(m, cl, fl->type);
 2402                 m->m_len = m->m_pkthdr.len = len;
 2403                 ret = 1;
 2404                 goto done;
 2405                 break;
 2406         case RSPQ_NSOP_NEOP:
 2407                 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
 2408                 ret = 0;
 2409                 break;
 2410         case RSPQ_SOP:
 2411                 DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
 2412                 m_iovinit(m);
 2413                 ret = 0;
 2414                 break;
 2415         case RSPQ_EOP:
 2416                 DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
 2417                 ret = 1;
 2418                 break;
 2419         }
 2420         m_iovappend(m, cl, fl->buf_size, len, 0);
 2421 
 2422 done:   
 2423         if (++fl->cidx == fl->size)
 2424                 fl->cidx = 0;
 2425 
 2426         return (ret);
 2427 }
 2428 #endif
 2429 /**
 2430  *      handle_rsp_cntrl_info - handles control information in a response
 2431  *      @qs: the queue set corresponding to the response
 2432  *      @flags: the response control flags
 2433  *
 2434  *      Handles the control information of an SGE response, such as GTS
 2435  *      indications and completion credits for the queue set's Tx queues.
 2436  *      HW coalesces credits, we don't do any extra SW coalescing.
 2437  */
 2438 static __inline void
 2439 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
 2440 {
 2441         unsigned int credits;
 2442 
 2443 #if USE_GTS
 2444         if (flags & F_RSPD_TXQ0_GTS)
 2445                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
 2446 #endif
 2447         credits = G_RSPD_TXQ0_CR(flags);
 2448         if (credits) {
 2449                 qs->txq[TXQ_ETH].processed += credits;
 2450                 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC)
 2451                         taskqueue_enqueue(qs->port->adapter->tq,
 2452                             &qs->port->timer_reclaim_task);
 2453         }
 2454         
 2455         credits = G_RSPD_TXQ2_CR(flags);
 2456         if (credits) 
 2457                 qs->txq[TXQ_CTRL].processed += credits;
 2458 
 2459 # if USE_GTS
 2460         if (flags & F_RSPD_TXQ1_GTS)
 2461                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
 2462 # endif
 2463         credits = G_RSPD_TXQ1_CR(flags);
 2464         if (credits)
 2465                 qs->txq[TXQ_OFLD].processed += credits;
 2466 }
 2467 
 2468 static void
 2469 check_ring_db(adapter_t *adap, struct sge_qset *qs,
 2470     unsigned int sleeping)
 2471 {
 2472         ;
 2473 }
 2474 
 2475 /**
 2476  *      process_responses - process responses from an SGE response queue
 2477  *      @adap: the adapter
 2478  *      @qs: the queue set to which the response queue belongs
 2479  *      @budget: how many responses can be processed in this round
 2480  *
 2481  *      Process responses from an SGE response queue up to the supplied budget.
 2482  *      Responses include received packets as well as credits and other events
 2483  *      for the queues that belong to the response queue's queue set.
 2484  *      A negative budget is effectively unlimited.
 2485  *
 2486  *      Additionally choose the interrupt holdoff time for the next interrupt
 2487  *      on this queue.  If the system is under memory shortage use a fairly
 2488  *      long delay to help recovery.
 2489  */
 2490 static int
 2491 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
 2492 {
 2493         struct sge_rspq *rspq = &qs->rspq;
 2494         struct rsp_desc *r = &rspq->desc[rspq->cidx];
 2495         int budget_left = budget;
 2496         unsigned int sleeping = 0;
 2497         int lro = qs->lro.enabled;
 2498         struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
 2499         int ngathered = 0;
 2500 #ifdef DEBUG    
 2501         static int last_holdoff = 0;
 2502         if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
 2503                 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
 2504                 last_holdoff = rspq->holdoff_tmr;
 2505         }
 2506 #endif
 2507         rspq->next_holdoff = rspq->holdoff_tmr;
 2508 
 2509         while (__predict_true(budget_left && is_new_response(r, rspq))) {
 2510                 int eth, eop = 0, ethpad = 0;
 2511                 uint32_t flags = ntohl(r->flags);
 2512                 uint32_t rss_csum = *(const uint32_t *)r;
 2513                 uint32_t rss_hash = r->rss_hdr.rss_hash_val;
 2514                 
 2515                 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
 2516                 
 2517                 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
 2518                         /* XXX */
 2519                         printf("async notification\n");
 2520 
 2521                 } else if  (flags & F_RSPD_IMM_DATA_VALID) {
 2522 #ifdef DISABLE_MBUF_IOVEC
 2523 
 2524                         if (cxgb_debug)
 2525                                 printf("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", r->rss_hdr.opcode, rspq->cidx);
 2526 
 2527                         if(get_imm_packet(adap, r, &rspq->rspq_mh) == 0) {
 2528                                 rspq->next_holdoff = NOMEM_INTR_DELAY;
 2529                                 budget_left--;
 2530                                 break;
 2531                         } else {
 2532                                 eop = 1;
 2533                         }
 2534 #else
 2535                         struct mbuf *m = NULL;
 2536 
 2537                         if (rspq->rspq_mbuf == NULL)
 2538                                 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA);
 2539                         else
 2540                                 m = m_gethdr(M_DONTWAIT, MT_DATA);
 2541 
 2542                         /*
 2543                          * XXX revisit me
 2544                          */
 2545                         if (rspq->rspq_mbuf == NULL &&  m == NULL) {
 2546                                 rspq->next_holdoff = NOMEM_INTR_DELAY;
 2547                                 budget_left--;
 2548                                 break;
 2549                         }
 2550                         if (get_imm_packet(adap, r, rspq->rspq_mbuf, m, flags))
 2551                                 goto skip;
 2552                         eop = 1;
 2553 #endif                  
 2554                         rspq->imm_data++;
 2555                 } else if (r->len_cq) {                 
 2556                         int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
 2557                         
 2558 #ifdef DISABLE_MBUF_IOVEC
 2559                         struct mbuf *m;
 2560                         m = m_gethdr(M_NOWAIT, MT_DATA);
 2561 
 2562                         if (m == NULL) {
 2563                                 log(LOG_WARNING, "failed to get mbuf for packet\n");
 2564                                 break;
 2565                         }
 2566                         
 2567                         eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r, m);
 2568 #else
 2569                         if (rspq->rspq_mbuf == NULL)  
 2570                                 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA);
 2571                         if (rspq->rspq_mbuf == NULL) { 
 2572                                 log(LOG_WARNING, "failed to get mbuf for packet\n"); 
 2573                                 break; 
 2574                         }
 2575                         eop = get_packet(adap, drop_thresh, qs, rspq->rspq_mbuf, r);
 2576 #endif
 2577                         ethpad = 2;
 2578                 } else {
 2579                         DPRINTF("pure response\n");
 2580                         rspq->pure_rsps++;
 2581                 }
 2582 
 2583                 if (flags & RSPD_CTRL_MASK) {
 2584                         sleeping |= flags & RSPD_GTS_MASK;
 2585                         handle_rsp_cntrl_info(qs, flags);
 2586                 }
 2587 #ifndef DISABLE_MBUF_IOVEC              
 2588         skip:
 2589 #endif          
 2590                 r++;
 2591                 if (__predict_false(++rspq->cidx == rspq->size)) {
 2592                         rspq->cidx = 0;
 2593                         rspq->gen ^= 1;
 2594                         r = rspq->desc;
 2595                 }
 2596                 
 2597                 prefetch(r);
 2598                 if (++rspq->credits >= (rspq->size / 4)) {
 2599                         refill_rspq(adap, rspq, rspq->credits);
 2600                         rspq->credits = 0;
 2601                 }
 2602                 
 2603                 if (eop) {
 2604                         prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *)); 
 2605                         prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES); 
 2606 
 2607                         if (eth) {                              
 2608                                 t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad,
 2609                                     rss_hash, rss_csum, lro);
 2610                                 
 2611                                 rspq->rspq_mh.mh_tail = rspq->rspq_mh.mh_head = NULL;
 2612                         } else {
 2613                                 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
 2614                                 /*
 2615                                  * XXX size mismatch
 2616                                  */
 2617                                 m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
 2618                                 
 2619                                 ngathered = rx_offload(&adap->tdev, rspq,
 2620                                     rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
 2621                         }
 2622 
 2623                         __refill_fl(adap, &qs->fl[0]);
 2624                         __refill_fl(adap, &qs->fl[1]);
 2625                 }
 2626 
 2627 
 2628                 --budget_left;
 2629         }
 2630 
 2631         deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
 2632         t3_lro_flush(adap, qs, &qs->lro);
 2633         
 2634         if (sleeping)
 2635                 check_ring_db(adap, qs, sleeping);
 2636 
 2637         smp_mb();  /* commit Tx queue processed updates */
 2638         if (__predict_false(qs->txq_stopped != 0))
 2639                 restart_tx(qs);
 2640 
 2641         budget -= budget_left;
 2642 #if 0   
 2643         refill_fl(adap, &qs->fl[0], &qs->fl[0].size - &qs->fl[0].credits, 1);
 2644         refill_fl(adap, &qs->fl[1], &qs->fl[1].size - &qs->fl[1].credits, 1);
 2645 #endif  
 2646         return (budget);
 2647 }
 2648 
 2649 /*
 2650  * A helper function that processes responses and issues GTS.
 2651  */
 2652 static __inline int
 2653 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
 2654 {
 2655         int work;
 2656         static int last_holdoff = 0;
 2657         
 2658         work = process_responses(adap, rspq_to_qset(rq), -1);
 2659 
 2660         if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
 2661                 printf("next_holdoff=%d\n", rq->next_holdoff);
 2662                 last_holdoff = rq->next_holdoff;
 2663         }
 2664         if (work)
 2665                 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
 2666                     V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
 2667         return work;
 2668 }
 2669 
 2670 
 2671 /*
 2672  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
 2673  * Handles data events from SGE response queues as well as error and other
 2674  * async events as they all use the same interrupt pin.  We use one SGE
 2675  * response queue per port in this mode and protect all response queues with
 2676  * queue 0's lock.
 2677  */
 2678 void
 2679 t3b_intr(void *data)
 2680 {
 2681         uint32_t i, map;
 2682         adapter_t *adap = data;
 2683         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 2684         
 2685         t3_write_reg(adap, A_PL_CLI, 0);
 2686         map = t3_read_reg(adap, A_SG_DATA_INTR);
 2687 
 2688         if (!map) 
 2689                 return;
 2690 
 2691         if (__predict_false(map & F_ERRINTR))
 2692                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 2693 
 2694         mtx_lock(&q0->lock);
 2695         for_each_port(adap, i)
 2696             if (map & (1 << i))
 2697                         process_responses_gts(adap, &adap->sge.qs[i].rspq);
 2698         mtx_unlock(&q0->lock);
 2699 }
 2700 
 2701 /*
 2702  * The MSI interrupt handler.  This needs to handle data events from SGE
 2703  * response queues as well as error and other async events as they all use
 2704  * the same MSI vector.  We use one SGE response queue per port in this mode
 2705  * and protect all response queues with queue 0's lock.
 2706  */
 2707 void
 2708 t3_intr_msi(void *data)
 2709 {
 2710         adapter_t *adap = data;
 2711         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 2712         int i, new_packets = 0;
 2713 
 2714         mtx_lock(&q0->lock);
 2715 
 2716         for_each_port(adap, i)
 2717             if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 
 2718                     new_packets = 1;
 2719         mtx_unlock(&q0->lock);
 2720         if (new_packets == 0)
 2721                 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 2722 }
 2723 
 2724 void
 2725 t3_intr_msix(void *data)
 2726 {
 2727         struct sge_qset *qs = data;
 2728         adapter_t *adap = qs->port->adapter;
 2729         struct sge_rspq *rspq = &qs->rspq;
 2730 
 2731         mtx_lock(&rspq->lock);
 2732         if (process_responses_gts(adap, rspq) == 0)
 2733                 rspq->unhandled_irqs++;
 2734         mtx_unlock(&rspq->lock);
 2735 }
 2736 
 2737 /* 
 2738  * broken by recent mbuf changes 
 2739  */ 
 2740 static int
 2741 t3_lro_enable(SYSCTL_HANDLER_ARGS)
 2742 {
 2743         adapter_t *sc;
 2744         int i, j, enabled, err, nqsets = 0;
 2745 
 2746 #ifndef LRO_WORKING
 2747         return (0);
 2748 #endif  
 2749         
 2750         sc = arg1;
 2751         enabled = sc->sge.qs[0].lro.enabled;
 2752         err = sysctl_handle_int(oidp, &enabled, arg2, req);
 2753 
 2754         if (err != 0) 
 2755                 return (err);
 2756         if (enabled == sc->sge.qs[0].lro.enabled)
 2757                 return (0);
 2758 
 2759         for (i = 0; i < sc->params.nports; i++) 
 2760                 for (j = 0; j < sc->port[i].nqsets; j++)
 2761                         nqsets++;
 2762         
 2763         for (i = 0; i < nqsets; i++) 
 2764                 sc->sge.qs[i].lro.enabled = enabled;
 2765         
 2766         return (0);
 2767 }
 2768 
 2769 static int
 2770 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS)
 2771 {
 2772         adapter_t *sc = arg1;
 2773         struct qset_params *qsp = &sc->params.sge.qset[0]; 
 2774         int coalesce_nsecs;     
 2775         struct sge_qset *qs;
 2776         int i, j, err, nqsets = 0;
 2777         struct mtx *lock;
 2778         
 2779         coalesce_nsecs = qsp->coalesce_nsecs;
 2780         err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req);
 2781 
 2782         if (err != 0) {
 2783                 return (err);
 2784         }
 2785         if (coalesce_nsecs == qsp->coalesce_nsecs)
 2786                 return (0);
 2787 
 2788         for (i = 0; i < sc->params.nports; i++) 
 2789                 for (j = 0; j < sc->port[i].nqsets; j++)
 2790                         nqsets++;
 2791 
 2792         coalesce_nsecs = max(100, coalesce_nsecs);
 2793 
 2794         for (i = 0; i < nqsets; i++) {
 2795                 qs = &sc->sge.qs[i];
 2796                 qsp = &sc->params.sge.qset[i];
 2797                 qsp->coalesce_nsecs = coalesce_nsecs;
 2798                 
 2799                 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 2800                             &sc->sge.qs[0].rspq.lock;
 2801 
 2802                 mtx_lock(lock);
 2803                 t3_update_qset_coalesce(qs, qsp);
 2804                 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
 2805                     V_NEWTIMER(qs->rspq.holdoff_tmr));
 2806                 mtx_unlock(lock);
 2807         }
 2808 
 2809         return (0);
 2810 }
 2811 
 2812 
 2813 void
 2814 t3_add_sysctls(adapter_t *sc)
 2815 {
 2816         struct sysctl_ctx_list *ctx;
 2817         struct sysctl_oid_list *children;
 2818         
 2819         ctx = device_get_sysctl_ctx(sc->dev);
 2820         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
 2821 
 2822         /* random information */
 2823         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 
 2824             "firmware_version",
 2825             CTLFLAG_RD, &sc->fw_version,
 2826             0, "firmware version");
 2827 
 2828         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 2829             "enable_lro",
 2830             CTLTYPE_INT|CTLFLAG_RW, sc,
 2831             0, t3_lro_enable,
 2832             "I", "enable large receive offload");
 2833 
 2834         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 
 2835             "intr_coal",
 2836             CTLTYPE_INT|CTLFLAG_RW, sc,
 2837             0, t3_set_coalesce_nsecs,
 2838             "I", "interrupt coalescing timer (ns)");
 2839         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 2840             "enable_debug",
 2841             CTLFLAG_RW, &cxgb_debug,
 2842             0, "enable verbose debugging output");
 2843 
 2844         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 2845             "collapse_free",
 2846             CTLFLAG_RD, &collapse_free,
 2847             0, "frees during collapse");
 2848         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 2849             "mb_free_vec_free",
 2850             CTLFLAG_RD, &mb_free_vec_free,
 2851             0, "frees during mb_free_vec");
 2852         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 2853             "collapse_mbufs",
 2854             CTLFLAG_RW, &collapse_mbufs,
 2855             0, "collapse mbuf chains into iovecs");
 2856         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 2857             "txq_overrun",
 2858             CTLFLAG_RD, &txq_fills,
 2859             0, "#times txq overrun");
 2860         SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 2861             "bogus_imm",
 2862             CTLFLAG_RD, &bogus_imm,
 2863             0, "#times a bogus immediate response was seen");   
 2864 }
 2865 
 2866 /**
 2867  *      t3_get_desc - dump an SGE descriptor for debugging purposes
 2868  *      @qs: the queue set
 2869  *      @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
 2870  *      @idx: the descriptor index in the queue
 2871  *      @data: where to dump the descriptor contents
 2872  *
 2873  *      Dumps the contents of a HW descriptor of an SGE queue.  Returns the
 2874  *      size of the descriptor.
 2875  */
 2876 int
 2877 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
 2878                 unsigned char *data)
 2879 {
 2880         if (qnum >= 6)
 2881                 return (EINVAL);
 2882 
 2883         if (qnum < 3) {
 2884                 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
 2885                         return -EINVAL;
 2886                 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
 2887                 return sizeof(struct tx_desc);
 2888         }
 2889 
 2890         if (qnum == 3) {
 2891                 if (!qs->rspq.desc || idx >= qs->rspq.size)
 2892                         return (EINVAL);
 2893                 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
 2894                 return sizeof(struct rsp_desc);
 2895         }
 2896 
 2897         qnum -= 4;
 2898         if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
 2899                 return (EINVAL);
 2900         memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
 2901         return sizeof(struct rx_desc);
 2902 }

Cache object: ef28dd510431f2a90dcb05c84ab957dd


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.