1 /**************************************************************************
2
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29 #define DEBUG_BUFRING
30
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/module.h>
39 #include <sys/bus.h>
40 #include <sys/conf.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/bus_dma.h>
44 #include <sys/rman.h>
45 #include <sys/queue.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
48
49 #include <sys/proc.h>
50 #include <sys/sbuf.h>
51 #include <sys/sched.h>
52 #include <sys/smp.h>
53 #include <sys/systm.h>
54 #include <sys/syslog.h>
55 #include <sys/socket.h>
56
57 #include <net/ethernet.h>
58 #include <net/if.h>
59 #include <net/if_vlan_var.h>
60
61 #include <netinet/in_systm.h>
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 #include <netinet/tcp.h>
65
66 #include <dev/pci/pcireg.h>
67 #include <dev/pci/pcivar.h>
68
69 #include <vm/vm.h>
70 #include <vm/pmap.h>
71
72 #ifdef CONFIG_DEFINED
73 #include <cxgb_include.h>
74 #include <sys/mvec.h>
75 #else
76 #include <dev/cxgb/cxgb_include.h>
77 #include <dev/cxgb/sys/mvec.h>
78 #endif
79
80 int txq_fills = 0;
81 /*
82 * XXX don't re-enable this until TOE stops assuming
83 * we have an m_ext
84 */
85 static int recycle_enable = 0;
86 extern int cxgb_txq_buf_ring_size;
87 int cxgb_cached_allocations;
88 int cxgb_cached;
89 int cxgb_ext_freed = 0;
90 int cxgb_ext_inited = 0;
91 int fl_q_size = 0;
92 int jumbo_q_size = 0;
93
94 extern int cxgb_use_16k_clusters;
95 extern int cxgb_pcpu_cache_enable;
96 extern int nmbjumbop;
97 extern int nmbjumbo9;
98 extern int nmbjumbo16;
99
100
101
102
103 #define USE_GTS 0
104
105 #define SGE_RX_SM_BUF_SIZE 1536
106 #define SGE_RX_DROP_THRES 16
107 #define SGE_RX_COPY_THRES 128
108
109 /*
110 * Period of the Tx buffer reclaim timer. This timer does not need to run
111 * frequently as Tx buffers are usually reclaimed by new Tx packets.
112 */
113 #define TX_RECLAIM_PERIOD (hz >> 1)
114
115 /*
116 * Values for sge_txq.flags
117 */
118 enum {
119 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
120 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
121 };
122
123 struct tx_desc {
124 uint64_t flit[TX_DESC_FLITS];
125 } __packed;
126
127 struct rx_desc {
128 uint32_t addr_lo;
129 uint32_t len_gen;
130 uint32_t gen2;
131 uint32_t addr_hi;
132 } __packed;;
133
134 struct rsp_desc { /* response queue descriptor */
135 struct rss_header rss_hdr;
136 uint32_t flags;
137 uint32_t len_cq;
138 uint8_t imm_data[47];
139 uint8_t intr_gen;
140 } __packed;
141
142 #define RX_SW_DESC_MAP_CREATED (1 << 0)
143 #define TX_SW_DESC_MAP_CREATED (1 << 1)
144 #define RX_SW_DESC_INUSE (1 << 3)
145 #define TX_SW_DESC_MAPPED (1 << 4)
146
147 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
148 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
149 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
150 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
151
152 struct tx_sw_desc { /* SW state per Tx descriptor */
153 struct mbuf_iovec mi;
154 bus_dmamap_t map;
155 int flags;
156 };
157
158 struct rx_sw_desc { /* SW state per Rx descriptor */
159 caddr_t rxsd_cl;
160 caddr_t data;
161 bus_dmamap_t map;
162 int flags;
163 };
164
165 struct txq_state {
166 unsigned int compl;
167 unsigned int gen;
168 unsigned int pidx;
169 };
170
171 struct refill_fl_cb_arg {
172 int error;
173 bus_dma_segment_t seg;
174 int nseg;
175 };
176
177 /*
178 * Maps a number of flits to the number of Tx descriptors that can hold them.
179 * The formula is
180 *
181 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
182 *
183 * HW allows up to 4 descriptors to be combined into a WR.
184 */
185 static uint8_t flit_desc_map[] = {
186 0,
187 #if SGE_NUM_GENBITS == 1
188 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
189 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
190 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
191 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
192 #elif SGE_NUM_GENBITS == 2
193 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
194 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
195 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
196 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
197 #else
198 # error "SGE_NUM_GENBITS must be 1 or 2"
199 #endif
200 };
201
202
203 int cxgb_debug = 0;
204
205 static void sge_timer_cb(void *arg);
206 static void sge_timer_reclaim(void *arg, int ncount);
207 static void sge_txq_reclaim_handler(void *arg, int ncount);
208
209 /**
210 * reclaim_completed_tx - reclaims completed Tx descriptors
211 * @adapter: the adapter
212 * @q: the Tx queue to reclaim completed descriptors from
213 *
214 * Reclaims Tx descriptors that the SGE has indicated it has processed,
215 * and frees the associated buffers if possible. Called with the Tx
216 * queue's lock held.
217 */
218 static __inline int
219 reclaim_completed_tx_(struct sge_txq *q, int reclaim_min)
220 {
221 int reclaim = desc_reclaimable(q);
222
223 if (reclaim < reclaim_min)
224 return (0);
225
226 mtx_assert(&q->lock, MA_OWNED);
227 if (reclaim > 0) {
228 t3_free_tx_desc(q, reclaim);
229 q->cleaned += reclaim;
230 q->in_use -= reclaim;
231 }
232 return (reclaim);
233 }
234
235 /**
236 * should_restart_tx - are there enough resources to restart a Tx queue?
237 * @q: the Tx queue
238 *
239 * Checks if there are enough descriptors to restart a suspended Tx queue.
240 */
241 static __inline int
242 should_restart_tx(const struct sge_txq *q)
243 {
244 unsigned int r = q->processed - q->cleaned;
245
246 return q->in_use - r < (q->size >> 1);
247 }
248
249 /**
250 * t3_sge_init - initialize SGE
251 * @adap: the adapter
252 * @p: the SGE parameters
253 *
254 * Performs SGE initialization needed every time after a chip reset.
255 * We do not initialize any of the queue sets here, instead the driver
256 * top-level must request those individually. We also do not enable DMA
257 * here, that should be done after the queues have been set up.
258 */
259 void
260 t3_sge_init(adapter_t *adap, struct sge_params *p)
261 {
262 u_int ctrl, ups;
263
264 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
265
266 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
267 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
268 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
269 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
270 #if SGE_NUM_GENBITS == 1
271 ctrl |= F_EGRGENCTRL;
272 #endif
273 if (adap->params.rev > 0) {
274 if (!(adap->flags & (USING_MSIX | USING_MSI)))
275 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
276 }
277 t3_write_reg(adap, A_SG_CONTROL, ctrl);
278 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
279 V_LORCQDRBTHRSH(512));
280 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
281 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
282 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
283 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
284 adap->params.rev < T3_REV_C ? 1000 : 500);
285 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
286 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
287 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
288 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
289 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
290 }
291
292
293 /**
294 * sgl_len - calculates the size of an SGL of the given capacity
295 * @n: the number of SGL entries
296 *
297 * Calculates the number of flits needed for a scatter/gather list that
298 * can hold the given number of entries.
299 */
300 static __inline unsigned int
301 sgl_len(unsigned int n)
302 {
303 return ((3 * n) / 2 + (n & 1));
304 }
305
306 /**
307 * get_imm_packet - return the next ingress packet buffer from a response
308 * @resp: the response descriptor containing the packet data
309 *
310 * Return a packet containing the immediate data of the given response.
311 */
312 static int
313 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
314 {
315
316 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
317 m->m_ext.ext_buf = NULL;
318 m->m_ext.ext_type = 0;
319 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
320 return (0);
321 }
322
323 static __inline u_int
324 flits_to_desc(u_int n)
325 {
326 return (flit_desc_map[n]);
327 }
328
329 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
330 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
331 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
332 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
333 F_HIRCQPARITYERROR)
334 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
335 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
336 F_RSPQDISABLED)
337
338 /**
339 * t3_sge_err_intr_handler - SGE async event interrupt handler
340 * @adapter: the adapter
341 *
342 * Interrupt handler for SGE asynchronous (non-data) events.
343 */
344 void
345 t3_sge_err_intr_handler(adapter_t *adapter)
346 {
347 unsigned int v, status;
348
349 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
350 if (status & SGE_PARERR)
351 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
352 status & SGE_PARERR);
353 if (status & SGE_FRAMINGERR)
354 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
355 status & SGE_FRAMINGERR);
356 if (status & F_RSPQCREDITOVERFOW)
357 CH_ALERT(adapter, "SGE response queue credit overflow\n");
358
359 if (status & F_RSPQDISABLED) {
360 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
361
362 CH_ALERT(adapter,
363 "packet delivered to disabled response queue (0x%x)\n",
364 (v >> S_RSPQ0DISABLED) & 0xff);
365 }
366
367 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
368 if (status & SGE_FATALERR)
369 t3_fatal_err(adapter);
370 }
371
372 void
373 t3_sge_prep(adapter_t *adap, struct sge_params *p)
374 {
375 int i, nqsets;
376
377 nqsets = min(SGE_QSETS, mp_ncpus*4);
378
379 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
380
381 while (!powerof2(fl_q_size))
382 fl_q_size--;
383 #if __FreeBSD_version >= 700111
384 if (cxgb_use_16k_clusters)
385 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
386 else
387 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
388 #else
389 jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE);
390 #endif
391 while (!powerof2(jumbo_q_size))
392 jumbo_q_size--;
393
394 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
395 device_printf(adap->dev,
396 "Insufficient clusters and/or jumbo buffers.\n");
397
398 /* XXX Does ETHER_ALIGN need to be accounted for here? */
399 p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data);
400
401 for (i = 0; i < SGE_QSETS; ++i) {
402 struct qset_params *q = p->qset + i;
403
404 if (adap->params.nports > 2) {
405 q->coalesce_usecs = 50;
406 } else {
407 #ifdef INVARIANTS
408 q->coalesce_usecs = 10;
409 #else
410 q->coalesce_usecs = 5;
411 #endif
412 }
413 q->polling = 0;
414 q->rspq_size = RSPQ_Q_SIZE;
415 q->fl_size = fl_q_size;
416 q->jumbo_size = jumbo_q_size;
417 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
418 q->txq_size[TXQ_OFLD] = 1024;
419 q->txq_size[TXQ_CTRL] = 256;
420 q->cong_thres = 0;
421 }
422 }
423
424 int
425 t3_sge_alloc(adapter_t *sc)
426 {
427
428 /* The parent tag. */
429 if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */
430 1, 0, /* algnmnt, boundary */
431 BUS_SPACE_MAXADDR, /* lowaddr */
432 BUS_SPACE_MAXADDR, /* highaddr */
433 NULL, NULL, /* filter, filterarg */
434 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
435 BUS_SPACE_UNRESTRICTED, /* nsegments */
436 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
437 0, /* flags */
438 NULL, NULL, /* lock, lockarg */
439 &sc->parent_dmat)) {
440 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
441 return (ENOMEM);
442 }
443
444 /*
445 * DMA tag for normal sized RX frames
446 */
447 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
448 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
449 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
450 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
451 return (ENOMEM);
452 }
453
454 /*
455 * DMA tag for jumbo sized RX frames.
456 */
457 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
458 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
459 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
460 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
461 return (ENOMEM);
462 }
463
464 /*
465 * DMA tag for TX frames.
466 */
467 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
468 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
469 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
470 NULL, NULL, &sc->tx_dmat)) {
471 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
472 return (ENOMEM);
473 }
474
475 return (0);
476 }
477
478 int
479 t3_sge_free(struct adapter * sc)
480 {
481
482 if (sc->tx_dmat != NULL)
483 bus_dma_tag_destroy(sc->tx_dmat);
484
485 if (sc->rx_jumbo_dmat != NULL)
486 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
487
488 if (sc->rx_dmat != NULL)
489 bus_dma_tag_destroy(sc->rx_dmat);
490
491 if (sc->parent_dmat != NULL)
492 bus_dma_tag_destroy(sc->parent_dmat);
493
494 return (0);
495 }
496
497 void
498 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
499 {
500
501 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
502 qs->rspq.polling = 0 /* p->polling */;
503 }
504
505 #if !defined(__i386__) && !defined(__amd64__)
506 static void
507 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
508 {
509 struct refill_fl_cb_arg *cb_arg = arg;
510
511 cb_arg->error = error;
512 cb_arg->seg = segs[0];
513 cb_arg->nseg = nseg;
514
515 }
516 #endif
517 /**
518 * refill_fl - refill an SGE free-buffer list
519 * @sc: the controller softc
520 * @q: the free-list to refill
521 * @n: the number of new buffers to allocate
522 *
523 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
524 * The caller must assure that @n does not exceed the queue's capacity.
525 */
526 static void
527 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
528 {
529 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
530 struct rx_desc *d = &q->desc[q->pidx];
531 struct refill_fl_cb_arg cb_arg;
532 caddr_t cl;
533 int err, count = 0;
534 int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
535
536 cb_arg.error = 0;
537 while (n--) {
538 /*
539 * We only allocate a cluster, mbuf allocation happens after rx
540 */
541 if ((cl = cxgb_cache_get(q->zone)) == NULL) {
542 log(LOG_WARNING, "Failed to allocate cluster\n");
543 goto done;
544 }
545
546 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
547 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
548 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
549 uma_zfree(q->zone, cl);
550 goto done;
551 }
552 sd->flags |= RX_SW_DESC_MAP_CREATED;
553 }
554 #if !defined(__i386__) && !defined(__amd64__)
555 err = bus_dmamap_load(q->entry_tag, sd->map,
556 cl + header_size, q->buf_size,
557 refill_fl_cb, &cb_arg, 0);
558
559 if (err != 0 || cb_arg.error) {
560 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error);
561 /*
562 * XXX free cluster
563 */
564 return;
565 }
566 #else
567 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + header_size));
568 #endif
569 sd->flags |= RX_SW_DESC_INUSE;
570 sd->rxsd_cl = cl;
571 sd->data = cl + header_size;
572 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
573 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
574 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
575 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
576
577 d++;
578 sd++;
579
580 if (++q->pidx == q->size) {
581 q->pidx = 0;
582 q->gen ^= 1;
583 sd = q->sdesc;
584 d = q->desc;
585 }
586 q->credits++;
587 count++;
588 }
589
590 done:
591 if (count)
592 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
593 }
594
595
596 /**
597 * free_rx_bufs - free the Rx buffers on an SGE free list
598 * @sc: the controle softc
599 * @q: the SGE free list to clean up
600 *
601 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
602 * this queue should be stopped before calling this function.
603 */
604 static void
605 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
606 {
607 u_int cidx = q->cidx;
608
609 while (q->credits--) {
610 struct rx_sw_desc *d = &q->sdesc[cidx];
611
612 if (d->flags & RX_SW_DESC_INUSE) {
613 bus_dmamap_unload(q->entry_tag, d->map);
614 bus_dmamap_destroy(q->entry_tag, d->map);
615 uma_zfree(q->zone, d->rxsd_cl);
616 }
617 d->rxsd_cl = NULL;
618 if (++cidx == q->size)
619 cidx = 0;
620 }
621 }
622
623 static __inline void
624 __refill_fl(adapter_t *adap, struct sge_fl *fl)
625 {
626 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
627 }
628
629 static __inline void
630 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
631 {
632 if ((fl->size - fl->credits) < max)
633 refill_fl(adap, fl, min(max, fl->size - fl->credits));
634 }
635
636 void
637 refill_fl_service(adapter_t *adap, struct sge_fl *fl)
638 {
639 __refill_fl_lt(adap, fl, 512);
640 }
641
642 /**
643 * recycle_rx_buf - recycle a receive buffer
644 * @adapter: the adapter
645 * @q: the SGE free list
646 * @idx: index of buffer to recycle
647 *
648 * Recycles the specified buffer on the given free list by adding it at
649 * the next available slot on the list.
650 */
651 static void
652 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
653 {
654 struct rx_desc *from = &q->desc[idx];
655 struct rx_desc *to = &q->desc[q->pidx];
656
657 q->sdesc[q->pidx] = q->sdesc[idx];
658 to->addr_lo = from->addr_lo; // already big endian
659 to->addr_hi = from->addr_hi; // likewise
660 wmb();
661 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
662 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
663 q->credits++;
664
665 if (++q->pidx == q->size) {
666 q->pidx = 0;
667 q->gen ^= 1;
668 }
669 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
670 }
671
672 static void
673 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
674 {
675 uint32_t *addr;
676
677 addr = arg;
678 *addr = segs[0].ds_addr;
679 }
680
681 static int
682 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
683 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
684 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
685 {
686 size_t len = nelem * elem_size;
687 void *s = NULL;
688 void *p = NULL;
689 int err;
690
691 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
692 BUS_SPACE_MAXADDR_32BIT,
693 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
694 len, 0, NULL, NULL, tag)) != 0) {
695 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
696 return (ENOMEM);
697 }
698
699 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
700 map)) != 0) {
701 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
702 return (ENOMEM);
703 }
704
705 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
706 bzero(p, len);
707 *(void **)desc = p;
708
709 if (sw_size) {
710 len = nelem * sw_size;
711 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
712 *(void **)sdesc = s;
713 }
714 if (parent_entry_tag == NULL)
715 return (0);
716
717 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
718 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
719 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
720 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
721 NULL, NULL, entry_tag)) != 0) {
722 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
723 return (ENOMEM);
724 }
725 return (0);
726 }
727
728 static void
729 sge_slow_intr_handler(void *arg, int ncount)
730 {
731 adapter_t *sc = arg;
732
733 t3_slow_intr_handler(sc);
734 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
735 (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
736 }
737
738 /**
739 * sge_timer_cb - perform periodic maintenance of an SGE qset
740 * @data: the SGE queue set to maintain
741 *
742 * Runs periodically from a timer to perform maintenance of an SGE queue
743 * set. It performs two tasks:
744 *
745 * a) Cleans up any completed Tx descriptors that may still be pending.
746 * Normal descriptor cleanup happens when new packets are added to a Tx
747 * queue so this timer is relatively infrequent and does any cleanup only
748 * if the Tx queue has not seen any new packets in a while. We make a
749 * best effort attempt to reclaim descriptors, in that we don't wait
750 * around if we cannot get a queue's lock (which most likely is because
751 * someone else is queueing new packets and so will also handle the clean
752 * up). Since control queues use immediate data exclusively we don't
753 * bother cleaning them up here.
754 *
755 * b) Replenishes Rx queues that have run out due to memory shortage.
756 * Normally new Rx buffers are added when existing ones are consumed but
757 * when out of memory a queue can become empty. We try to add only a few
758 * buffers here, the queue will be replenished fully as these new buffers
759 * are used up if memory shortage has subsided.
760 *
761 * c) Return coalesced response queue credits in case a response queue is
762 * starved.
763 *
764 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell
765 * fifo overflows and the FW doesn't implement any recovery scheme yet.
766 */
767 static void
768 sge_timer_cb(void *arg)
769 {
770 adapter_t *sc = arg;
771 #ifndef IFNET_MULTIQUEUE
772 struct port_info *pi;
773 struct sge_qset *qs;
774 struct sge_txq *txq;
775 int i, j;
776 int reclaim_ofl, refill_rx;
777
778 for (i = 0; i < sc->params.nports; i++) {
779 pi = &sc->port[i];
780 for (j = 0; j < pi->nqsets; j++) {
781 qs = &sc->sge.qs[pi->first_qset + j];
782 txq = &qs->txq[0];
783 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
784 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
785 (qs->fl[1].credits < qs->fl[1].size));
786 if (reclaim_ofl || refill_rx) {
787 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
788 break;
789 }
790 }
791 }
792 #endif
793 if (sc->params.nports > 2) {
794 int i;
795
796 for_each_port(sc, i) {
797 struct port_info *pi = &sc->port[i];
798
799 t3_write_reg(sc, A_SG_KDOORBELL,
800 F_SELEGRCNTX |
801 (FW_TUNNEL_SGEEC_START + pi->first_qset));
802 }
803 }
804 if (sc->open_device_map != 0)
805 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
806 }
807
808 /*
809 * This is meant to be a catch-all function to keep sge state private
810 * to sge.c
811 *
812 */
813 int
814 t3_sge_init_adapter(adapter_t *sc)
815 {
816 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
817 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
818 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
819 mi_init();
820 cxgb_cache_init();
821 return (0);
822 }
823
824 int
825 t3_sge_reset_adapter(adapter_t *sc)
826 {
827 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
828 return (0);
829 }
830
831 int
832 t3_sge_init_port(struct port_info *pi)
833 {
834 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
835 return (0);
836 }
837
838 void
839 t3_sge_deinit_sw(adapter_t *sc)
840 {
841
842 mi_deinit();
843 }
844
845 /**
846 * refill_rspq - replenish an SGE response queue
847 * @adapter: the adapter
848 * @q: the response queue to replenish
849 * @credits: how many new responses to make available
850 *
851 * Replenishes a response queue by making the supplied number of responses
852 * available to HW.
853 */
854 static __inline void
855 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
856 {
857
858 /* mbufs are allocated on demand when a rspq entry is processed. */
859 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
860 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
861 }
862
863 static __inline void
864 sge_txq_reclaim_(struct sge_txq *txq, int force)
865 {
866
867 if (desc_reclaimable(txq) < 16)
868 return;
869 if (mtx_trylock(&txq->lock) == 0)
870 return;
871 reclaim_completed_tx_(txq, 16);
872 mtx_unlock(&txq->lock);
873
874 }
875
876 static void
877 sge_txq_reclaim_handler(void *arg, int ncount)
878 {
879 struct sge_txq *q = arg;
880
881 sge_txq_reclaim_(q, TRUE);
882 }
883
884
885
886 static void
887 sge_timer_reclaim(void *arg, int ncount)
888 {
889 struct port_info *pi = arg;
890 int i, nqsets = pi->nqsets;
891 adapter_t *sc = pi->adapter;
892 struct sge_qset *qs;
893 struct sge_txq *txq;
894 struct mtx *lock;
895
896 #ifdef IFNET_MULTIQUEUE
897 panic("%s should not be called with multiqueue support\n", __FUNCTION__);
898 #endif
899 for (i = 0; i < nqsets; i++) {
900 qs = &sc->sge.qs[pi->first_qset + i];
901
902 txq = &qs->txq[TXQ_OFLD];
903 sge_txq_reclaim_(txq, FALSE);
904
905 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
906 &sc->sge.qs[0].rspq.lock;
907
908 if (mtx_trylock(lock)) {
909 /* XXX currently assume that we are *NOT* polling */
910 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
911
912 if (qs->fl[0].credits < qs->fl[0].size - 16)
913 __refill_fl(sc, &qs->fl[0]);
914 if (qs->fl[1].credits < qs->fl[1].size - 16)
915 __refill_fl(sc, &qs->fl[1]);
916
917 if (status & (1 << qs->rspq.cntxt_id)) {
918 if (qs->rspq.credits) {
919 refill_rspq(sc, &qs->rspq, 1);
920 qs->rspq.credits--;
921 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
922 1 << qs->rspq.cntxt_id);
923 }
924 }
925 mtx_unlock(lock);
926 }
927 }
928 }
929
930 /**
931 * init_qset_cntxt - initialize an SGE queue set context info
932 * @qs: the queue set
933 * @id: the queue set id
934 *
935 * Initializes the TIDs and context ids for the queues of a queue set.
936 */
937 static void
938 init_qset_cntxt(struct sge_qset *qs, u_int id)
939 {
940
941 qs->rspq.cntxt_id = id;
942 qs->fl[0].cntxt_id = 2 * id;
943 qs->fl[1].cntxt_id = 2 * id + 1;
944 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
945 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
946 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
947 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
948 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
949
950 mbufq_init(&qs->txq[TXQ_ETH].sendq);
951 mbufq_init(&qs->txq[TXQ_OFLD].sendq);
952 mbufq_init(&qs->txq[TXQ_CTRL].sendq);
953 }
954
955
956 static void
957 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
958 {
959 txq->in_use += ndesc;
960 /*
961 * XXX we don't handle stopping of queue
962 * presumably start handles this when we bump against the end
963 */
964 txqs->gen = txq->gen;
965 txq->unacked += ndesc;
966 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
967 txq->unacked &= 31;
968 txqs->pidx = txq->pidx;
969 txq->pidx += ndesc;
970 #ifdef INVARIANTS
971 if (((txqs->pidx > txq->cidx) &&
972 (txq->pidx < txqs->pidx) &&
973 (txq->pidx >= txq->cidx)) ||
974 ((txqs->pidx < txq->cidx) &&
975 (txq->pidx >= txq-> cidx)) ||
976 ((txqs->pidx < txq->cidx) &&
977 (txq->cidx < txqs->pidx)))
978 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
979 txqs->pidx, txq->pidx, txq->cidx);
980 #endif
981 if (txq->pidx >= txq->size) {
982 txq->pidx -= txq->size;
983 txq->gen ^= 1;
984 }
985
986 }
987
988 /**
989 * calc_tx_descs - calculate the number of Tx descriptors for a packet
990 * @m: the packet mbufs
991 * @nsegs: the number of segments
992 *
993 * Returns the number of Tx descriptors needed for the given Ethernet
994 * packet. Ethernet packets require addition of WR and CPL headers.
995 */
996 static __inline unsigned int
997 calc_tx_descs(const struct mbuf *m, int nsegs)
998 {
999 unsigned int flits;
1000
1001 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
1002 return 1;
1003
1004 flits = sgl_len(nsegs) + 2;
1005 #ifdef TSO_SUPPORTED
1006 if (m->m_pkthdr.csum_flags & CSUM_TSO)
1007 flits++;
1008 #endif
1009 return flits_to_desc(flits);
1010 }
1011
1012 static unsigned int
1013 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
1014 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
1015 {
1016 struct mbuf *m0;
1017 int err, pktlen, pass = 0;
1018
1019 retry:
1020 err = 0;
1021 m0 = *m;
1022 pktlen = m0->m_pkthdr.len;
1023 #if defined(__i386__) || defined(__amd64__)
1024 if (busdma_map_sg_collapse(m, segs, nsegs) == 0) {
1025 goto done;
1026 } else
1027 #endif
1028 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0);
1029
1030 if (err == 0) {
1031 goto done;
1032 }
1033 if (err == EFBIG && pass == 0) {
1034 pass = 1;
1035 /* Too many segments, try to defrag */
1036 m0 = m_defrag(m0, M_DONTWAIT);
1037 if (m0 == NULL) {
1038 m_freem(*m);
1039 *m = NULL;
1040 return (ENOBUFS);
1041 }
1042 *m = m0;
1043 goto retry;
1044 } else if (err == ENOMEM) {
1045 return (err);
1046 } if (err) {
1047 if (cxgb_debug)
1048 printf("map failure err=%d pktlen=%d\n", err, pktlen);
1049 m_freem(m0);
1050 *m = NULL;
1051 return (err);
1052 }
1053 done:
1054 #if !defined(__i386__) && !defined(__amd64__)
1055 bus_dmamap_sync(txq->entry_tag, txsd->map, BUS_DMASYNC_PREWRITE);
1056 #endif
1057 txsd->flags |= TX_SW_DESC_MAPPED;
1058
1059 return (0);
1060 }
1061
1062 /**
1063 * make_sgl - populate a scatter/gather list for a packet
1064 * @sgp: the SGL to populate
1065 * @segs: the packet dma segments
1066 * @nsegs: the number of segments
1067 *
1068 * Generates a scatter/gather list for the buffers that make up a packet
1069 * and returns the SGL size in 8-byte words. The caller must size the SGL
1070 * appropriately.
1071 */
1072 static __inline void
1073 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1074 {
1075 int i, idx;
1076
1077 for (idx = 0, i = 0; i < nsegs; i++) {
1078 /*
1079 * firmware doesn't like empty segments
1080 */
1081 if (segs[i].ds_len == 0)
1082 continue;
1083 if (i && idx == 0)
1084 ++sgp;
1085
1086 sgp->len[idx] = htobe32(segs[i].ds_len);
1087 sgp->addr[idx] = htobe64(segs[i].ds_addr);
1088 idx ^= 1;
1089 }
1090
1091 if (idx) {
1092 sgp->len[idx] = 0;
1093 sgp->addr[idx] = 0;
1094 }
1095 }
1096
1097 /**
1098 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1099 * @adap: the adapter
1100 * @q: the Tx queue
1101 *
1102 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
1103 * where the HW is going to sleep just after we checked, however,
1104 * then the interrupt handler will detect the outstanding TX packet
1105 * and ring the doorbell for us.
1106 *
1107 * When GTS is disabled we unconditionally ring the doorbell.
1108 */
1109 static __inline void
1110 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1111 {
1112 #if USE_GTS
1113 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1114 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1115 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1116 #ifdef T3_TRACE
1117 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1118 q->cntxt_id);
1119 #endif
1120 t3_write_reg(adap, A_SG_KDOORBELL,
1121 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1122 }
1123 #else
1124 wmb(); /* write descriptors before telling HW */
1125 t3_write_reg(adap, A_SG_KDOORBELL,
1126 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1127 #endif
1128 }
1129
1130 static __inline void
1131 wr_gen2(struct tx_desc *d, unsigned int gen)
1132 {
1133 #if SGE_NUM_GENBITS == 2
1134 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1135 #endif
1136 }
1137
1138 /**
1139 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1140 * @ndesc: number of Tx descriptors spanned by the SGL
1141 * @txd: first Tx descriptor to be written
1142 * @txqs: txq state (generation and producer index)
1143 * @txq: the SGE Tx queue
1144 * @sgl: the SGL
1145 * @flits: number of flits to the start of the SGL in the first descriptor
1146 * @sgl_flits: the SGL size in flits
1147 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1148 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1149 *
1150 * Write a work request header and an associated SGL. If the SGL is
1151 * small enough to fit into one Tx descriptor it has already been written
1152 * and we just need to write the WR header. Otherwise we distribute the
1153 * SGL across the number of descriptors it spans.
1154 */
1155 static void
1156 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1157 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1158 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1159 {
1160
1161 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1162 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1163
1164 if (__predict_true(ndesc == 1)) {
1165 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1166 V_WR_SGLSFLT(flits)) | wr_hi;
1167 wmb();
1168 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1169 V_WR_GEN(txqs->gen)) | wr_lo;
1170 /* XXX gen? */
1171 wr_gen2(txd, txqs->gen);
1172
1173 } else {
1174 unsigned int ogen = txqs->gen;
1175 const uint64_t *fp = (const uint64_t *)sgl;
1176 struct work_request_hdr *wp = wrp;
1177
1178 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1179 V_WR_SGLSFLT(flits)) | wr_hi;
1180
1181 while (sgl_flits) {
1182 unsigned int avail = WR_FLITS - flits;
1183
1184 if (avail > sgl_flits)
1185 avail = sgl_flits;
1186 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1187 sgl_flits -= avail;
1188 ndesc--;
1189 if (!sgl_flits)
1190 break;
1191
1192 fp += avail;
1193 txd++;
1194 txsd++;
1195 if (++txqs->pidx == txq->size) {
1196 txqs->pidx = 0;
1197 txqs->gen ^= 1;
1198 txd = txq->desc;
1199 txsd = txq->sdesc;
1200 }
1201
1202 /*
1203 * when the head of the mbuf chain
1204 * is freed all clusters will be freed
1205 * with it
1206 */
1207 KASSERT(txsd->mi.mi_base == NULL,
1208 ("overwriting valid entry mi_base==%p", txsd->mi.mi_base));
1209 wrp = (struct work_request_hdr *)txd;
1210 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1211 V_WR_SGLSFLT(1)) | wr_hi;
1212 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1213 sgl_flits + 1)) |
1214 V_WR_GEN(txqs->gen)) | wr_lo;
1215 wr_gen2(txd, txqs->gen);
1216 flits = 1;
1217 }
1218 wrp->wr_hi |= htonl(F_WR_EOP);
1219 wmb();
1220 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1221 wr_gen2((struct tx_desc *)wp, ogen);
1222 }
1223 }
1224
1225 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
1226 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
1227
1228 #ifdef VLAN_SUPPORTED
1229 #define GET_VTAG(cntrl, m) \
1230 do { \
1231 if ((m)->m_flags & M_VLANTAG) \
1232 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1233 } while (0)
1234
1235 #define GET_VTAG_MI(cntrl, mi) \
1236 do { \
1237 if ((mi)->mi_flags & M_VLANTAG) \
1238 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((mi)->mi_ether_vtag); \
1239 } while (0)
1240 #else
1241 #define GET_VTAG(cntrl, m)
1242 #define GET_VTAG_MI(cntrl, m)
1243 #endif
1244
1245 int
1246 t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
1247 {
1248 adapter_t *sc;
1249 struct mbuf *m0;
1250 struct sge_txq *txq;
1251 struct txq_state txqs;
1252 struct port_info *pi;
1253 unsigned int ndesc, flits, cntrl, mlen;
1254 int err, nsegs, tso_info = 0;
1255
1256 struct work_request_hdr *wrp;
1257 struct tx_sw_desc *txsd;
1258 struct sg_ent *sgp, *sgl;
1259 uint32_t wr_hi, wr_lo, sgl_flits;
1260 bus_dma_segment_t segs[TX_MAX_SEGS];
1261
1262 struct tx_desc *txd;
1263 struct mbuf_vec *mv;
1264 struct mbuf_iovec *mi;
1265
1266 DPRINTF("t3_encap cpu=%d ", curcpu);
1267
1268 mi = NULL;
1269 pi = qs->port;
1270 sc = pi->adapter;
1271 txq = &qs->txq[TXQ_ETH];
1272 txd = &txq->desc[txq->pidx];
1273 txsd = &txq->sdesc[txq->pidx];
1274 sgl = txq->txq_sgl;
1275 m0 = *m;
1276
1277 mtx_assert(&txq->lock, MA_OWNED);
1278 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1279 /*
1280 * XXX need to add VLAN support for 6.x
1281 */
1282 #ifdef VLAN_SUPPORTED
1283 if (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1284 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1285 #endif
1286 KASSERT(txsd->mi.mi_base == NULL,
1287 ("overwriting valid entry mi_base==%p", txsd->mi.mi_base));
1288 if (count > 1) {
1289 panic("count > 1 not support in CVS\n");
1290 if ((err = busdma_map_sg_vec(m, &m0, segs, count)))
1291 return (err);
1292 nsegs = count;
1293 } else if ((err = busdma_map_sg_collapse(&m0, segs, &nsegs))) {
1294 if (cxgb_debug)
1295 printf("failed ... err=%d\n", err);
1296 return (err);
1297 }
1298 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count));
1299
1300 if (!(m0->m_pkthdr.len <= PIO_LEN)) {
1301 mi_collapse_mbuf(&txsd->mi, m0);
1302 mi = &txsd->mi;
1303 }
1304 if (count > 1) {
1305 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1306 int i, fidx;
1307 struct mbuf_iovec *batchmi;
1308
1309 mv = mtomv(m0);
1310 batchmi = mv->mv_vec;
1311
1312 wrp = (struct work_request_hdr *)txd;
1313
1314 flits = count*2 + 1;
1315 txq_prod(txq, 1, &txqs);
1316
1317 for (fidx = 1, i = 0; i < count; i++, batchmi++, fidx += 2) {
1318 struct cpl_tx_pkt_batch_entry *cbe = &cpl_batch->pkt_entry[i];
1319
1320 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1321 GET_VTAG_MI(cntrl, batchmi);
1322 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1323 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1324 cntrl |= F_TXPKT_IPCSUM_DIS;
1325 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1326 cntrl |= F_TXPKT_L4CSUM_DIS;
1327 cbe->cntrl = htonl(cntrl);
1328 cbe->len = htonl(batchmi->mi_len | 0x80000000);
1329 cbe->addr = htobe64(segs[i].ds_addr);
1330 txd->flit[fidx] |= htobe64(1 << 24);
1331 }
1332
1333 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1334 V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1335 wmb();
1336 wrp->wr_lo = htonl(V_WR_LEN(flits) |
1337 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1338 /* XXX gen? */
1339 wr_gen2(txd, txqs.gen);
1340 check_ring_tx_db(sc, txq);
1341
1342 return (0);
1343 } else if (tso_info) {
1344 int eth_type;
1345 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1346 struct ether_header *eh;
1347 struct ip *ip;
1348 struct tcphdr *tcp;
1349
1350 txd->flit[2] = 0;
1351 GET_VTAG(cntrl, m0);
1352 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1353 hdr->cntrl = htonl(cntrl);
1354 mlen = m0->m_pkthdr.len;
1355 hdr->len = htonl(mlen | 0x80000000);
1356
1357 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
1358 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1359 m0, mlen, m0->m_pkthdr.tso_segsz,
1360 m0->m_pkthdr.csum_flags, m0->m_flags);
1361 panic("tx tso packet too small");
1362 }
1363
1364 /* Make sure that ether, ip, tcp headers are all in m0 */
1365 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1366 m0 = m_pullup(m0, TCPPKTHDRSIZE);
1367 if (__predict_false(m0 == NULL)) {
1368 /* XXX panic probably an overreaction */
1369 panic("couldn't fit header into mbuf");
1370 }
1371 }
1372
1373 eh = mtod(m0, struct ether_header *);
1374 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1375 eth_type = CPL_ETH_II_VLAN;
1376 ip = (struct ip *)((struct ether_vlan_header *)eh + 1);
1377 } else {
1378 eth_type = CPL_ETH_II;
1379 ip = (struct ip *)(eh + 1);
1380 }
1381 tcp = (struct tcphdr *)(ip + 1);
1382
1383 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1384 V_LSO_IPHDR_WORDS(ip->ip_hl) |
1385 V_LSO_TCPHDR_WORDS(tcp->th_off);
1386 hdr->lso_info = htonl(tso_info);
1387
1388 if (__predict_false(mlen <= PIO_LEN)) {
1389 /*
1390 * pkt not undersized but fits in PIO_LEN
1391 * Indicates a TSO bug at the higher levels.
1392 */
1393 txq_prod(txq, 1, &txqs);
1394 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1395 m_freem(m0);
1396 m0 = NULL;
1397 flits = (mlen + 7) / 8 + 3;
1398 hdr->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1399 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1400 F_WR_SOP | F_WR_EOP | txqs.compl);
1401 wmb();
1402 hdr->wr.wr_lo = htonl(V_WR_LEN(flits) |
1403 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1404
1405 wr_gen2(txd, txqs.gen);
1406 check_ring_tx_db(sc, txq);
1407 return (0);
1408 }
1409 flits = 3;
1410 } else {
1411 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1412
1413 GET_VTAG(cntrl, m0);
1414 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1415 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1416 cntrl |= F_TXPKT_IPCSUM_DIS;
1417 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1418 cntrl |= F_TXPKT_L4CSUM_DIS;
1419 cpl->cntrl = htonl(cntrl);
1420 mlen = m0->m_pkthdr.len;
1421 cpl->len = htonl(mlen | 0x80000000);
1422
1423 if (mlen <= PIO_LEN) {
1424 txq_prod(txq, 1, &txqs);
1425 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1426 m_freem(m0);
1427 m0 = NULL;
1428 flits = (mlen + 7) / 8 + 2;
1429 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1430 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1431 F_WR_SOP | F_WR_EOP | txqs.compl);
1432 wmb();
1433 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1434 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1435
1436 wr_gen2(txd, txqs.gen);
1437 check_ring_tx_db(sc, txq);
1438 DPRINTF("pio buf\n");
1439 return (0);
1440 }
1441 DPRINTF("regular buf\n");
1442 flits = 2;
1443 }
1444 wrp = (struct work_request_hdr *)txd;
1445
1446 #ifdef nomore
1447 /*
1448 * XXX need to move into one of the helper routines above
1449 *
1450 */
1451 if ((err = busdma_map_mbufs(m, txq, txsd, segs, &nsegs)) != 0)
1452 return (err);
1453 m0 = *m;
1454 #endif
1455 ndesc = calc_tx_descs(m0, nsegs);
1456
1457 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1458 make_sgl(sgp, segs, nsegs);
1459
1460 sgl_flits = sgl_len(nsegs);
1461
1462 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1463 txq_prod(txq, ndesc, &txqs);
1464 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1465 wr_lo = htonl(V_WR_TID(txq->token));
1466 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1467 check_ring_tx_db(pi->adapter, txq);
1468
1469 if ((m0->m_type == MT_DATA) &&
1470 ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) &&
1471 (m0->m_ext.ext_type != EXT_PACKET)) {
1472 m0->m_flags &= ~M_EXT ;
1473 cxgb_mbufs_outstanding--;
1474 m_free(m0);
1475 }
1476
1477 return (0);
1478 }
1479
1480
1481 /**
1482 * write_imm - write a packet into a Tx descriptor as immediate data
1483 * @d: the Tx descriptor to write
1484 * @m: the packet
1485 * @len: the length of packet data to write as immediate data
1486 * @gen: the generation bit value to write
1487 *
1488 * Writes a packet as immediate data into a Tx descriptor. The packet
1489 * contains a work request at its beginning. We must write the packet
1490 * carefully so the SGE doesn't read accidentally before it's written in
1491 * its entirety.
1492 */
1493 static __inline void
1494 write_imm(struct tx_desc *d, struct mbuf *m,
1495 unsigned int len, unsigned int gen)
1496 {
1497 struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1498 struct work_request_hdr *to = (struct work_request_hdr *)d;
1499
1500 if (len > WR_LEN)
1501 panic("len too big %d\n", len);
1502 if (len < sizeof(*from))
1503 panic("len too small %d", len);
1504
1505 memcpy(&to[1], &from[1], len - sizeof(*from));
1506 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1507 V_WR_BCNTLFLT(len & 7));
1508 wmb();
1509 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1510 V_WR_LEN((len + 7) / 8));
1511 wr_gen2(d, gen);
1512
1513 /*
1514 * This check is a hack we should really fix the logic so
1515 * that this can't happen
1516 */
1517 if (m->m_type != MT_DONTFREE)
1518 m_freem(m);
1519
1520 }
1521
1522 /**
1523 * check_desc_avail - check descriptor availability on a send queue
1524 * @adap: the adapter
1525 * @q: the TX queue
1526 * @m: the packet needing the descriptors
1527 * @ndesc: the number of Tx descriptors needed
1528 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1529 *
1530 * Checks if the requested number of Tx descriptors is available on an
1531 * SGE send queue. If the queue is already suspended or not enough
1532 * descriptors are available the packet is queued for later transmission.
1533 * Must be called with the Tx queue locked.
1534 *
1535 * Returns 0 if enough descriptors are available, 1 if there aren't
1536 * enough descriptors and the packet has been queued, and 2 if the caller
1537 * needs to retry because there weren't enough descriptors at the
1538 * beginning of the call but some freed up in the mean time.
1539 */
1540 static __inline int
1541 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1542 struct mbuf *m, unsigned int ndesc,
1543 unsigned int qid)
1544 {
1545 /*
1546 * XXX We currently only use this for checking the control queue
1547 * the control queue is only used for binding qsets which happens
1548 * at init time so we are guaranteed enough descriptors
1549 */
1550 if (__predict_false(!mbufq_empty(&q->sendq))) {
1551 addq_exit: mbufq_tail(&q->sendq, m);
1552 return 1;
1553 }
1554 if (__predict_false(q->size - q->in_use < ndesc)) {
1555
1556 struct sge_qset *qs = txq_to_qset(q, qid);
1557
1558 setbit(&qs->txq_stopped, qid);
1559 smp_mb();
1560
1561 if (should_restart_tx(q) &&
1562 test_and_clear_bit(qid, &qs->txq_stopped))
1563 return 2;
1564
1565 q->stops++;
1566 goto addq_exit;
1567 }
1568 return 0;
1569 }
1570
1571
1572 /**
1573 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1574 * @q: the SGE control Tx queue
1575 *
1576 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1577 * that send only immediate data (presently just the control queues) and
1578 * thus do not have any mbufs
1579 */
1580 static __inline void
1581 reclaim_completed_tx_imm(struct sge_txq *q)
1582 {
1583 unsigned int reclaim = q->processed - q->cleaned;
1584
1585 mtx_assert(&q->lock, MA_OWNED);
1586
1587 q->in_use -= reclaim;
1588 q->cleaned += reclaim;
1589 }
1590
1591 static __inline int
1592 immediate(const struct mbuf *m)
1593 {
1594 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
1595 }
1596
1597 /**
1598 * ctrl_xmit - send a packet through an SGE control Tx queue
1599 * @adap: the adapter
1600 * @q: the control queue
1601 * @m: the packet
1602 *
1603 * Send a packet through an SGE control Tx queue. Packets sent through
1604 * a control queue must fit entirely as immediate data in a single Tx
1605 * descriptor and have no page fragments.
1606 */
1607 static int
1608 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1609 {
1610 int ret;
1611 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1612
1613 if (__predict_false(!immediate(m))) {
1614 m_freem(m);
1615 return 0;
1616 }
1617
1618 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1619 wrp->wr_lo = htonl(V_WR_TID(q->token));
1620
1621 mtx_lock(&q->lock);
1622 again: reclaim_completed_tx_imm(q);
1623
1624 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1625 if (__predict_false(ret)) {
1626 if (ret == 1) {
1627 mtx_unlock(&q->lock);
1628 return (ENOSPC);
1629 }
1630 goto again;
1631 }
1632 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1633
1634 q->in_use++;
1635 if (++q->pidx >= q->size) {
1636 q->pidx = 0;
1637 q->gen ^= 1;
1638 }
1639 mtx_unlock(&q->lock);
1640 wmb();
1641 wmb();
1642 t3_write_reg(adap, A_SG_KDOORBELL,
1643 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1644 return (0);
1645 }
1646
1647
1648 /**
1649 * restart_ctrlq - restart a suspended control queue
1650 * @qs: the queue set cotaining the control queue
1651 *
1652 * Resumes transmission on a suspended Tx control queue.
1653 */
1654 static void
1655 restart_ctrlq(void *data, int npending)
1656 {
1657 struct mbuf *m;
1658 struct sge_qset *qs = (struct sge_qset *)data;
1659 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1660 adapter_t *adap = qs->port->adapter;
1661
1662 mtx_lock(&q->lock);
1663 again: reclaim_completed_tx_imm(q);
1664
1665 while (q->in_use < q->size &&
1666 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1667
1668 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1669
1670 if (++q->pidx >= q->size) {
1671 q->pidx = 0;
1672 q->gen ^= 1;
1673 }
1674 q->in_use++;
1675 }
1676 if (!mbufq_empty(&q->sendq)) {
1677 setbit(&qs->txq_stopped, TXQ_CTRL);
1678 smp_mb();
1679
1680 if (should_restart_tx(q) &&
1681 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1682 goto again;
1683 q->stops++;
1684 }
1685 mtx_unlock(&q->lock);
1686 wmb();
1687 t3_write_reg(adap, A_SG_KDOORBELL,
1688 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1689 }
1690
1691
1692 /*
1693 * Send a management message through control queue 0
1694 */
1695 int
1696 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1697 {
1698 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1699 }
1700
1701
1702 /**
1703 * free_qset - free the resources of an SGE queue set
1704 * @sc: the controller owning the queue set
1705 * @q: the queue set
1706 *
1707 * Release the HW and SW resources associated with an SGE queue set, such
1708 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
1709 * queue set must be quiesced prior to calling this.
1710 */
1711 void
1712 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1713 {
1714 int i;
1715
1716 t3_free_tx_desc_all(&q->txq[TXQ_ETH]);
1717
1718 for (i = 0; i < SGE_TXQ_PER_SET; i++)
1719 if (q->txq[i].txq_mr.br_ring != NULL) {
1720 free(q->txq[i].txq_mr.br_ring, M_DEVBUF);
1721 mtx_destroy(&q->txq[i].txq_mr.br_lock);
1722 }
1723 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1724 if (q->fl[i].desc) {
1725 mtx_lock_spin(&sc->sge.reg_lock);
1726 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1727 mtx_unlock_spin(&sc->sge.reg_lock);
1728 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1729 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
1730 q->fl[i].desc_map);
1731 bus_dma_tag_destroy(q->fl[i].desc_tag);
1732 bus_dma_tag_destroy(q->fl[i].entry_tag);
1733 }
1734 if (q->fl[i].sdesc) {
1735 free_rx_bufs(sc, &q->fl[i]);
1736 free(q->fl[i].sdesc, M_DEVBUF);
1737 }
1738 }
1739
1740 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1741 if (q->txq[i].desc) {
1742 mtx_lock_spin(&sc->sge.reg_lock);
1743 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1744 mtx_unlock_spin(&sc->sge.reg_lock);
1745 bus_dmamap_unload(q->txq[i].desc_tag,
1746 q->txq[i].desc_map);
1747 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
1748 q->txq[i].desc_map);
1749 bus_dma_tag_destroy(q->txq[i].desc_tag);
1750 bus_dma_tag_destroy(q->txq[i].entry_tag);
1751 MTX_DESTROY(&q->txq[i].lock);
1752 }
1753 if (q->txq[i].sdesc) {
1754 free(q->txq[i].sdesc, M_DEVBUF);
1755 }
1756 }
1757
1758 if (q->rspq.desc) {
1759 mtx_lock_spin(&sc->sge.reg_lock);
1760 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1761 mtx_unlock_spin(&sc->sge.reg_lock);
1762
1763 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1764 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
1765 q->rspq.desc_map);
1766 bus_dma_tag_destroy(q->rspq.desc_tag);
1767 MTX_DESTROY(&q->rspq.lock);
1768 }
1769
1770 tcp_lro_free(&q->lro.ctrl);
1771
1772 bzero(q, sizeof(*q));
1773 }
1774
1775 /**
1776 * t3_free_sge_resources - free SGE resources
1777 * @sc: the adapter softc
1778 *
1779 * Frees resources used by the SGE queue sets.
1780 */
1781 void
1782 t3_free_sge_resources(adapter_t *sc, int nqsets)
1783 {
1784 int i;
1785
1786 for (i = 0; i < nqsets; ++i)
1787 t3_free_qset(sc, &sc->sge.qs[i]);
1788 }
1789
1790 /**
1791 * t3_sge_start - enable SGE
1792 * @sc: the controller softc
1793 *
1794 * Enables the SGE for DMAs. This is the last step in starting packet
1795 * transfers.
1796 */
1797 void
1798 t3_sge_start(adapter_t *sc)
1799 {
1800 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1801 }
1802
1803 /**
1804 * t3_sge_stop - disable SGE operation
1805 * @sc: the adapter
1806 *
1807 * Disables the DMA engine. This can be called in emeregencies (e.g.,
1808 * from error interrupts) or from normal process context. In the latter
1809 * case it also disables any pending queue restart tasklets. Note that
1810 * if it is called in interrupt context it cannot disable the restart
1811 * tasklets as it cannot wait, however the tasklets will have no effect
1812 * since the doorbells are disabled and the driver will call this again
1813 * later from process context, at which time the tasklets will be stopped
1814 * if they are still running.
1815 */
1816 void
1817 t3_sge_stop(adapter_t *sc)
1818 {
1819 int i, nqsets;
1820
1821 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1822
1823 if (sc->tq == NULL)
1824 return;
1825
1826 for (nqsets = i = 0; i < (sc)->params.nports; i++)
1827 nqsets += sc->port[i].nqsets;
1828 #ifdef notyet
1829 /*
1830 *
1831 * XXX
1832 */
1833 for (i = 0; i < nqsets; ++i) {
1834 struct sge_qset *qs = &sc->sge.qs[i];
1835
1836 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
1837 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
1838 }
1839 #endif
1840 }
1841
1842 /**
1843 * t3_free_tx_desc - reclaims Tx descriptors and their buffers
1844 * @adapter: the adapter
1845 * @q: the Tx queue to reclaim descriptors from
1846 * @reclaimable: the number of descriptors to reclaim
1847 * @m_vec_size: maximum number of buffers to reclaim
1848 * @desc_reclaimed: returns the number of descriptors reclaimed
1849 *
1850 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1851 * Tx buffers. Called with the Tx queue lock held.
1852 *
1853 * Returns number of buffers of reclaimed
1854 */
1855 void
1856 t3_free_tx_desc(struct sge_txq *q, int reclaimable)
1857 {
1858 struct tx_sw_desc *txsd;
1859 unsigned int cidx;
1860
1861 #ifdef T3_TRACE
1862 T3_TRACE2(sc->tb[q->cntxt_id & 7],
1863 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
1864 #endif
1865 cidx = q->cidx;
1866 txsd = &q->sdesc[cidx];
1867 DPRINTF("reclaiming %d WR\n", reclaimable);
1868 mtx_assert(&q->lock, MA_OWNED);
1869 while (reclaimable--) {
1870 DPRINTF("cidx=%d d=%p\n", cidx, txsd);
1871 if (txsd->mi.mi_base != NULL) {
1872 if (txsd->flags & TX_SW_DESC_MAPPED) {
1873 bus_dmamap_unload(q->entry_tag, txsd->map);
1874 txsd->flags &= ~TX_SW_DESC_MAPPED;
1875 }
1876 m_freem_iovec(&txsd->mi);
1877 buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__);
1878 txsd->mi.mi_base = NULL;
1879 #if defined(DIAGNOSTIC) && 0
1880 if (m_get_priority(txsd->m[0]) != cidx)
1881 printf("pri=%d cidx=%d\n",
1882 (int)m_get_priority(txsd->m[0]), cidx);
1883 #endif
1884
1885 } else
1886 q->txq_skipped++;
1887
1888 ++txsd;
1889 if (++cidx == q->size) {
1890 cidx = 0;
1891 txsd = q->sdesc;
1892 }
1893 }
1894 q->cidx = cidx;
1895
1896 }
1897
1898 void
1899 t3_free_tx_desc_all(struct sge_txq *q)
1900 {
1901 int i;
1902 struct tx_sw_desc *txsd;
1903
1904 for (i = 0; i < q->size; i++) {
1905 txsd = &q->sdesc[i];
1906 if (txsd->mi.mi_base != NULL) {
1907 if (txsd->flags & TX_SW_DESC_MAPPED) {
1908 bus_dmamap_unload(q->entry_tag, txsd->map);
1909 txsd->flags &= ~TX_SW_DESC_MAPPED;
1910 }
1911 m_freem_iovec(&txsd->mi);
1912 bzero(&txsd->mi, sizeof(txsd->mi));
1913 }
1914 }
1915 }
1916
1917 /**
1918 * is_new_response - check if a response is newly written
1919 * @r: the response descriptor
1920 * @q: the response queue
1921 *
1922 * Returns true if a response descriptor contains a yet unprocessed
1923 * response.
1924 */
1925 static __inline int
1926 is_new_response(const struct rsp_desc *r,
1927 const struct sge_rspq *q)
1928 {
1929 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1930 }
1931
1932 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1933 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1934 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1935 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1936 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1937
1938 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1939 #define NOMEM_INTR_DELAY 2500
1940
1941 /**
1942 * write_ofld_wr - write an offload work request
1943 * @adap: the adapter
1944 * @m: the packet to send
1945 * @q: the Tx queue
1946 * @pidx: index of the first Tx descriptor to write
1947 * @gen: the generation value to use
1948 * @ndesc: number of descriptors the packet will occupy
1949 *
1950 * Write an offload work request to send the supplied packet. The packet
1951 * data already carry the work request with most fields populated.
1952 */
1953 static void
1954 write_ofld_wr(adapter_t *adap, struct mbuf *m,
1955 struct sge_txq *q, unsigned int pidx,
1956 unsigned int gen, unsigned int ndesc,
1957 bus_dma_segment_t *segs, unsigned int nsegs)
1958 {
1959 unsigned int sgl_flits, flits;
1960 struct work_request_hdr *from;
1961 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1962 struct tx_desc *d = &q->desc[pidx];
1963 struct txq_state txqs;
1964
1965 if (immediate(m) && nsegs == 0) {
1966 write_imm(d, m, m->m_len, gen);
1967 return;
1968 }
1969
1970 /* Only TX_DATA builds SGLs */
1971 from = mtod(m, struct work_request_hdr *);
1972 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
1973
1974 flits = m->m_len / 8;
1975 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1976
1977 make_sgl(sgp, segs, nsegs);
1978 sgl_flits = sgl_len(nsegs);
1979
1980 txqs.gen = gen;
1981 txqs.pidx = pidx;
1982 txqs.compl = 0;
1983
1984 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
1985 from->wr_hi, from->wr_lo);
1986 }
1987
1988 /**
1989 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1990 * @m: the packet
1991 *
1992 * Returns the number of Tx descriptors needed for the given offload
1993 * packet. These packets are already fully constructed.
1994 */
1995 static __inline unsigned int
1996 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
1997 {
1998 unsigned int flits, cnt = 0;
1999 int ndescs;
2000
2001 if (m->m_len <= WR_LEN && nsegs == 0)
2002 return (1); /* packet fits as immediate data */
2003
2004 if (m->m_flags & M_IOVEC)
2005 cnt = mtomv(m)->mv_count;
2006 else
2007 cnt = nsegs;
2008
2009 /* headers */
2010 flits = m->m_len / 8;
2011
2012 ndescs = flits_to_desc(flits + sgl_len(cnt));
2013
2014 CTR4(KTR_CXGB, "flits=%d sgl_len=%d nsegs=%d ndescs=%d",
2015 flits, sgl_len(cnt), nsegs, ndescs);
2016
2017 return (ndescs);
2018 }
2019
2020 /**
2021 * ofld_xmit - send a packet through an offload queue
2022 * @adap: the adapter
2023 * @q: the Tx offload queue
2024 * @m: the packet
2025 *
2026 * Send an offload packet through an SGE offload queue.
2027 */
2028 static int
2029 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
2030 {
2031 int ret, nsegs;
2032 unsigned int ndesc;
2033 unsigned int pidx, gen;
2034 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
2035 struct tx_sw_desc *stx;
2036
2037 nsegs = m_get_sgllen(m);
2038 vsegs = m_get_sgl(m);
2039 ndesc = calc_tx_descs_ofld(m, nsegs);
2040 busdma_map_sgl(vsegs, segs, nsegs);
2041
2042 stx = &q->sdesc[q->pidx];
2043 KASSERT(stx->mi.mi_base == NULL, ("mi_base set"));
2044
2045 mtx_lock(&q->lock);
2046 again: reclaim_completed_tx_(q, 16);
2047 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2048 if (__predict_false(ret)) {
2049 if (ret == 1) {
2050 printf("no ofld desc avail\n");
2051
2052 m_set_priority(m, ndesc); /* save for restart */
2053 mtx_unlock(&q->lock);
2054 return (EINTR);
2055 }
2056 goto again;
2057 }
2058
2059 gen = q->gen;
2060 q->in_use += ndesc;
2061 pidx = q->pidx;
2062 q->pidx += ndesc;
2063 if (q->pidx >= q->size) {
2064 q->pidx -= q->size;
2065 q->gen ^= 1;
2066 }
2067 #ifdef T3_TRACE
2068 T3_TRACE5(adap->tb[q->cntxt_id & 7],
2069 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
2070 ndesc, pidx, skb->len, skb->len - skb->data_len,
2071 skb_shinfo(skb)->nr_frags);
2072 #endif
2073 mtx_unlock(&q->lock);
2074
2075 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2076 check_ring_tx_db(adap, q);
2077 return (0);
2078 }
2079
2080 /**
2081 * restart_offloadq - restart a suspended offload queue
2082 * @qs: the queue set cotaining the offload queue
2083 *
2084 * Resumes transmission on a suspended Tx offload queue.
2085 */
2086 static void
2087 restart_offloadq(void *data, int npending)
2088 {
2089 struct mbuf *m;
2090 struct sge_qset *qs = data;
2091 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2092 adapter_t *adap = qs->port->adapter;
2093 bus_dma_segment_t segs[TX_MAX_SEGS];
2094 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
2095 int nsegs, cleaned;
2096
2097 mtx_lock(&q->lock);
2098 again: cleaned = reclaim_completed_tx_(q, 16);
2099
2100 while ((m = mbufq_peek(&q->sendq)) != NULL) {
2101 unsigned int gen, pidx;
2102 unsigned int ndesc = m_get_priority(m);
2103
2104 if (__predict_false(q->size - q->in_use < ndesc)) {
2105 setbit(&qs->txq_stopped, TXQ_OFLD);
2106 smp_mb();
2107
2108 if (should_restart_tx(q) &&
2109 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2110 goto again;
2111 q->stops++;
2112 break;
2113 }
2114
2115 gen = q->gen;
2116 q->in_use += ndesc;
2117 pidx = q->pidx;
2118 q->pidx += ndesc;
2119 if (q->pidx >= q->size) {
2120 q->pidx -= q->size;
2121 q->gen ^= 1;
2122 }
2123
2124 (void)mbufq_dequeue(&q->sendq);
2125 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
2126 mtx_unlock(&q->lock);
2127 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2128 mtx_lock(&q->lock);
2129 }
2130 mtx_unlock(&q->lock);
2131
2132 #if USE_GTS
2133 set_bit(TXQ_RUNNING, &q->flags);
2134 set_bit(TXQ_LAST_PKT_DB, &q->flags);
2135 #endif
2136 wmb();
2137 t3_write_reg(adap, A_SG_KDOORBELL,
2138 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2139 }
2140
2141 /**
2142 * queue_set - return the queue set a packet should use
2143 * @m: the packet
2144 *
2145 * Maps a packet to the SGE queue set it should use. The desired queue
2146 * set is carried in bits 1-3 in the packet's priority.
2147 */
2148 static __inline int
2149 queue_set(const struct mbuf *m)
2150 {
2151 return m_get_priority(m) >> 1;
2152 }
2153
2154 /**
2155 * is_ctrl_pkt - return whether an offload packet is a control packet
2156 * @m: the packet
2157 *
2158 * Determines whether an offload packet should use an OFLD or a CTRL
2159 * Tx queue. This is indicated by bit 0 in the packet's priority.
2160 */
2161 static __inline int
2162 is_ctrl_pkt(const struct mbuf *m)
2163 {
2164 return m_get_priority(m) & 1;
2165 }
2166
2167 /**
2168 * t3_offload_tx - send an offload packet
2169 * @tdev: the offload device to send to
2170 * @m: the packet
2171 *
2172 * Sends an offload packet. We use the packet priority to select the
2173 * appropriate Tx queue as follows: bit 0 indicates whether the packet
2174 * should be sent as regular or control, bits 1-3 select the queue set.
2175 */
2176 int
2177 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
2178 {
2179 adapter_t *adap = tdev2adap(tdev);
2180 struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2181
2182 if (__predict_false(is_ctrl_pkt(m)))
2183 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
2184
2185 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
2186 }
2187
2188 /**
2189 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2190 * @tdev: the offload device that will be receiving the packets
2191 * @q: the SGE response queue that assembled the bundle
2192 * @m: the partial bundle
2193 * @n: the number of packets in the bundle
2194 *
2195 * Delivers a (partial) bundle of Rx offload packets to an offload device.
2196 */
2197 static __inline void
2198 deliver_partial_bundle(struct t3cdev *tdev,
2199 struct sge_rspq *q,
2200 struct mbuf *mbufs[], int n)
2201 {
2202 if (n) {
2203 q->offload_bundles++;
2204 cxgb_ofld_recv(tdev, mbufs, n);
2205 }
2206 }
2207
2208 static __inline int
2209 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
2210 struct mbuf *m, struct mbuf *rx_gather[],
2211 unsigned int gather_idx)
2212 {
2213
2214 rq->offload_pkts++;
2215 m->m_pkthdr.header = mtod(m, void *);
2216 rx_gather[gather_idx++] = m;
2217 if (gather_idx == RX_BUNDLE_SIZE) {
2218 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2219 gather_idx = 0;
2220 rq->offload_bundles++;
2221 }
2222 return (gather_idx);
2223 }
2224
2225 static void
2226 restart_tx(struct sge_qset *qs)
2227 {
2228 struct adapter *sc = qs->port->adapter;
2229
2230
2231 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2232 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2233 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2234 qs->txq[TXQ_OFLD].restarts++;
2235 DPRINTF("restarting TXQ_OFLD\n");
2236 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2237 }
2238 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
2239 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
2240 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
2241 qs->txq[TXQ_CTRL].in_use);
2242
2243 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2244 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2245 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2246 qs->txq[TXQ_CTRL].restarts++;
2247 DPRINTF("restarting TXQ_CTRL\n");
2248 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2249 }
2250 }
2251
2252 /**
2253 * t3_sge_alloc_qset - initialize an SGE queue set
2254 * @sc: the controller softc
2255 * @id: the queue set id
2256 * @nports: how many Ethernet ports will be using this queue set
2257 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2258 * @p: configuration parameters for this queue set
2259 * @ntxq: number of Tx queues for the queue set
2260 * @pi: port info for queue set
2261 *
2262 * Allocate resources and initialize an SGE queue set. A queue set
2263 * comprises a response queue, two Rx free-buffer queues, and up to 3
2264 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2265 * queue, offload queue, and control queue.
2266 */
2267 int
2268 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2269 const struct qset_params *p, int ntxq, struct port_info *pi)
2270 {
2271 struct sge_qset *q = &sc->sge.qs[id];
2272 int i, header_size, ret = 0;
2273
2274 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2275 if ((q->txq[i].txq_mr.br_ring = malloc(cxgb_txq_buf_ring_size*sizeof(struct mbuf *),
2276 M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
2277 device_printf(sc->dev, "failed to allocate mbuf ring\n");
2278 goto err;
2279 }
2280 q->txq[i].txq_mr.br_prod = q->txq[i].txq_mr.br_cons = 0;
2281 q->txq[i].txq_mr.br_size = cxgb_txq_buf_ring_size;
2282 mtx_init(&q->txq[i].txq_mr.br_lock, "txq mbuf ring", NULL, MTX_DEF);
2283 }
2284
2285 init_qset_cntxt(q, id);
2286 q->idx = id;
2287
2288 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2289 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2290 &q->fl[0].desc, &q->fl[0].sdesc,
2291 &q->fl[0].desc_tag, &q->fl[0].desc_map,
2292 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2293 printf("error %d from alloc ring fl0\n", ret);
2294 goto err;
2295 }
2296
2297 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2298 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2299 &q->fl[1].desc, &q->fl[1].sdesc,
2300 &q->fl[1].desc_tag, &q->fl[1].desc_map,
2301 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2302 printf("error %d from alloc ring fl1\n", ret);
2303 goto err;
2304 }
2305
2306 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2307 &q->rspq.phys_addr, &q->rspq.desc, NULL,
2308 &q->rspq.desc_tag, &q->rspq.desc_map,
2309 NULL, NULL)) != 0) {
2310 printf("error %d from alloc ring rspq\n", ret);
2311 goto err;
2312 }
2313
2314 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2315 device_get_unit(sc->dev), irq_vec_idx);
2316 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2317
2318 for (i = 0; i < ntxq; ++i) {
2319 /*
2320 * The control queue always uses immediate data so does not
2321 * need to keep track of any mbufs.
2322 * XXX Placeholder for future TOE support.
2323 */
2324 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2325
2326 if ((ret = alloc_ring(sc, p->txq_size[i],
2327 sizeof(struct tx_desc), sz,
2328 &q->txq[i].phys_addr, &q->txq[i].desc,
2329 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2330 &q->txq[i].desc_map,
2331 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2332 printf("error %d from alloc ring tx %i\n", ret, i);
2333 goto err;
2334 }
2335 mbufq_init(&q->txq[i].sendq);
2336 q->txq[i].gen = 1;
2337 q->txq[i].size = p->txq_size[i];
2338 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
2339 device_get_unit(sc->dev), irq_vec_idx, i);
2340 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
2341 }
2342
2343 q->txq[TXQ_ETH].port = pi;
2344
2345 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2346 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2347 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]);
2348 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]);
2349
2350 q->fl[0].gen = q->fl[1].gen = 1;
2351 q->fl[0].size = p->fl_size;
2352 q->fl[1].size = p->jumbo_size;
2353
2354 q->rspq.gen = 1;
2355 q->rspq.cidx = 0;
2356 q->rspq.size = p->rspq_size;
2357
2358
2359 header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
2360 q->txq[TXQ_ETH].stop_thres = nports *
2361 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2362
2363 q->fl[0].buf_size = (MCLBYTES - header_size);
2364 q->fl[0].zone = zone_clust;
2365 q->fl[0].type = EXT_CLUSTER;
2366 #if __FreeBSD_version >= 700111
2367 if (cxgb_use_16k_clusters) {
2368 q->fl[1].buf_size = MJUM16BYTES - header_size;
2369 q->fl[1].zone = zone_jumbo16;
2370 q->fl[1].type = EXT_JUMBO16;
2371 } else {
2372 q->fl[1].buf_size = MJUM9BYTES - header_size;
2373 q->fl[1].zone = zone_jumbo9;
2374 q->fl[1].type = EXT_JUMBO9;
2375 }
2376 #else
2377 q->fl[1].buf_size = MJUMPAGESIZE - header_size;
2378 q->fl[1].zone = zone_jumbop;
2379 q->fl[1].type = EXT_JUMBOP;
2380 #endif
2381
2382 /*
2383 * We allocate and setup the lro_ctrl structure irrespective of whether
2384 * lro is available and/or enabled.
2385 */
2386 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
2387 ret = tcp_lro_init(&q->lro.ctrl);
2388 if (ret) {
2389 printf("error %d from tcp_lro_init\n", ret);
2390 goto err;
2391 }
2392 q->lro.ctrl.ifp = pi->ifp;
2393
2394 mtx_lock_spin(&sc->sge.reg_lock);
2395 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2396 q->rspq.phys_addr, q->rspq.size,
2397 q->fl[0].buf_size, 1, 0);
2398 if (ret) {
2399 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2400 goto err_unlock;
2401 }
2402
2403 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2404 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2405 q->fl[i].phys_addr, q->fl[i].size,
2406 q->fl[i].buf_size, p->cong_thres, 1,
2407 0);
2408 if (ret) {
2409 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2410 goto err_unlock;
2411 }
2412 }
2413
2414 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2415 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2416 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2417 1, 0);
2418 if (ret) {
2419 printf("error %d from t3_sge_init_ecntxt\n", ret);
2420 goto err_unlock;
2421 }
2422
2423 if (ntxq > 1) {
2424 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2425 USE_GTS, SGE_CNTXT_OFLD, id,
2426 q->txq[TXQ_OFLD].phys_addr,
2427 q->txq[TXQ_OFLD].size, 0, 1, 0);
2428 if (ret) {
2429 printf("error %d from t3_sge_init_ecntxt\n", ret);
2430 goto err_unlock;
2431 }
2432 }
2433
2434 if (ntxq > 2) {
2435 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2436 SGE_CNTXT_CTRL, id,
2437 q->txq[TXQ_CTRL].phys_addr,
2438 q->txq[TXQ_CTRL].size,
2439 q->txq[TXQ_CTRL].token, 1, 0);
2440 if (ret) {
2441 printf("error %d from t3_sge_init_ecntxt\n", ret);
2442 goto err_unlock;
2443 }
2444 }
2445
2446 mtx_unlock_spin(&sc->sge.reg_lock);
2447 t3_update_qset_coalesce(q, p);
2448 q->port = pi;
2449
2450 refill_fl(sc, &q->fl[0], q->fl[0].size);
2451 refill_fl(sc, &q->fl[1], q->fl[1].size);
2452 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2453
2454 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2455 V_NEWTIMER(q->rspq.holdoff_tmr));
2456
2457 return (0);
2458
2459 err_unlock:
2460 mtx_unlock_spin(&sc->sge.reg_lock);
2461 err:
2462 t3_free_qset(sc, q);
2463
2464 return (ret);
2465 }
2466
2467 /*
2468 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2469 * ethernet data. Hardware assistance with various checksums and any vlan tag
2470 * will also be taken into account here.
2471 */
2472 void
2473 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2474 {
2475 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2476 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2477 struct ifnet *ifp = pi->ifp;
2478
2479 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2480
2481 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2482 cpl->csum_valid && cpl->csum == 0xffff) {
2483 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2484 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2485 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2486 m->m_pkthdr.csum_data = 0xffff;
2487 }
2488 /*
2489 * XXX need to add VLAN support for 6.x
2490 */
2491 #ifdef VLAN_SUPPORTED
2492 if (__predict_false(cpl->vlan_valid)) {
2493 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2494 m->m_flags |= M_VLANTAG;
2495 }
2496 #endif
2497
2498 m->m_pkthdr.rcvif = ifp;
2499 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2500 #ifndef DISABLE_MBUF_IOVEC
2501 m_explode(m);
2502 #endif
2503 /*
2504 * adjust after conversion to mbuf chain
2505 */
2506 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2507 m->m_len -= (sizeof(*cpl) + ethpad);
2508 m->m_data += (sizeof(*cpl) + ethpad);
2509 }
2510
2511 static void
2512 ext_free_handler(void *buf, void * args)
2513 {
2514 uintptr_t type = (uintptr_t)args;
2515 uma_zone_t zone;
2516 struct mbuf *m;
2517
2518 m = buf;
2519 zone = m_getzonefromtype(type);
2520 m->m_ext.ext_type = (int)type;
2521 cxgb_ext_freed++;
2522 cxgb_cache_put(zone, m);
2523 }
2524
2525 static void
2526 init_cluster_mbuf(caddr_t cl, int flags, int type, uma_zone_t zone)
2527 {
2528 struct mbuf *m;
2529 int header_size;
2530
2531 header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) +
2532 sizeof(struct m_ext_) + sizeof(uint32_t);
2533
2534 bzero(cl, header_size);
2535 m = (struct mbuf *)cl;
2536
2537 cxgb_ext_inited++;
2538 SLIST_INIT(&m->m_pkthdr.tags);
2539 m->m_type = MT_DATA;
2540 m->m_flags = flags | M_NOFREE | M_EXT;
2541 m->m_data = cl + header_size;
2542 m->m_ext.ext_buf = cl;
2543 m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t));
2544 m->m_ext.ext_size = m_getsizefromtype(type);
2545 m->m_ext.ext_free = ext_free_handler;
2546 m->m_ext.ext_args = (void *)(uintptr_t)type;
2547 m->m_ext.ext_type = EXT_EXTREF;
2548 *(m->m_ext.ref_cnt) = 1;
2549 DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt);
2550 }
2551
2552
2553 /**
2554 * get_packet - return the next ingress packet buffer from a free list
2555 * @adap: the adapter that received the packet
2556 * @drop_thres: # of remaining buffers before we start dropping packets
2557 * @qs: the qset that the SGE free list holding the packet belongs to
2558 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2559 * @r: response descriptor
2560 *
2561 * Get the next packet from a free list and complete setup of the
2562 * sk_buff. If the packet is small we make a copy and recycle the
2563 * original buffer, otherwise we use the original buffer itself. If a
2564 * positive drop threshold is supplied packets are dropped and their
2565 * buffers recycled if (a) the number of remaining buffers is under the
2566 * threshold and the packet is too big to copy, or (b) the packet should
2567 * be copied but there is no memory for the copy.
2568 */
2569 #ifdef DISABLE_MBUF_IOVEC
2570
2571 static int
2572 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2573 struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2574 {
2575
2576 unsigned int len_cq = ntohl(r->len_cq);
2577 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2578 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2579 uint32_t len = G_RSPD_LEN(len_cq);
2580 uint32_t flags = ntohl(r->flags);
2581 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2582 caddr_t cl;
2583 struct mbuf *m, *m0;
2584 int ret = 0;
2585
2586 prefetch(sd->rxsd_cl);
2587
2588 fl->credits--;
2589 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2590
2591 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2592 if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2593 goto skip_recycle;
2594 cl = mtod(m0, void *);
2595 memcpy(cl, sd->data, len);
2596 recycle_rx_buf(adap, fl, fl->cidx);
2597 m = m0;
2598 m0->m_len = len;
2599 } else {
2600 skip_recycle:
2601
2602 bus_dmamap_unload(fl->entry_tag, sd->map);
2603 cl = sd->rxsd_cl;
2604 m = m0 = (struct mbuf *)cl;
2605
2606 if ((sopeop == RSPQ_SOP_EOP) ||
2607 (sopeop == RSPQ_SOP))
2608 flags = M_PKTHDR;
2609 init_cluster_mbuf(cl, flags, fl->type, fl->zone);
2610 m0->m_len = len;
2611 }
2612 switch(sopeop) {
2613 case RSPQ_SOP_EOP:
2614 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2615 mh->mh_head = mh->mh_tail = m;
2616 m->m_pkthdr.len = len;
2617 ret = 1;
2618 break;
2619 case RSPQ_NSOP_NEOP:
2620 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2621 if (mh->mh_tail == NULL) {
2622 log(LOG_ERR, "discarding intermediate descriptor entry\n");
2623 m_freem(m);
2624 break;
2625 }
2626 mh->mh_tail->m_next = m;
2627 mh->mh_tail = m;
2628 mh->mh_head->m_pkthdr.len += len;
2629 ret = 0;
2630 break;
2631 case RSPQ_SOP:
2632 DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2633 m->m_pkthdr.len = len;
2634 mh->mh_head = mh->mh_tail = m;
2635 ret = 0;
2636 break;
2637 case RSPQ_EOP:
2638 DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2639 mh->mh_head->m_pkthdr.len += len;
2640 mh->mh_tail->m_next = m;
2641 mh->mh_tail = m;
2642 ret = 1;
2643 break;
2644 }
2645 if (++fl->cidx == fl->size)
2646 fl->cidx = 0;
2647
2648 return (ret);
2649 }
2650
2651 #else
2652
2653 static int
2654 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2655 struct mbuf **m, struct rsp_desc *r)
2656 {
2657
2658 unsigned int len_cq = ntohl(r->len_cq);
2659 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2660 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2661 uint32_t len = G_RSPD_LEN(len_cq);
2662 uint32_t flags = ntohl(r->flags);
2663 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2664 void *cl;
2665 int ret = 0;
2666 struct mbuf *m0;
2667 #if 0
2668 if ((sd + 1 )->rxsd_cl)
2669 prefetch((sd + 1)->rxsd_cl);
2670 if ((sd + 2)->rxsd_cl)
2671 prefetch((sd + 2)->rxsd_cl);
2672 #endif
2673 DPRINTF("rx cpu=%d\n", curcpu);
2674 fl->credits--;
2675 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2676
2677 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2678 if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2679 goto skip_recycle;
2680 cl = mtod(m0, void *);
2681 memcpy(cl, sd->data, len);
2682 recycle_rx_buf(adap, fl, fl->cidx);
2683 *m = m0;
2684 } else {
2685 skip_recycle:
2686 bus_dmamap_unload(fl->entry_tag, sd->map);
2687 cl = sd->rxsd_cl;
2688 *m = m0 = (struct mbuf *)cl;
2689 }
2690
2691 switch(sopeop) {
2692 case RSPQ_SOP_EOP:
2693 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2694 if (cl == sd->rxsd_cl)
2695 init_cluster_mbuf(cl, M_PKTHDR, fl->type, fl->zone);
2696 m0->m_len = m0->m_pkthdr.len = len;
2697 ret = 1;
2698 goto done;
2699 break;
2700 case RSPQ_NSOP_NEOP:
2701 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2702 panic("chaining unsupported");
2703 ret = 0;
2704 break;
2705 case RSPQ_SOP:
2706 DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2707 panic("chaining unsupported");
2708 m_iovinit(m0);
2709 ret = 0;
2710 break;
2711 case RSPQ_EOP:
2712 DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2713 panic("chaining unsupported");
2714 ret = 1;
2715 break;
2716 }
2717 panic("append not supported");
2718 #if 0
2719 m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref);
2720 #endif
2721 done:
2722 if (++fl->cidx == fl->size)
2723 fl->cidx = 0;
2724
2725 return (ret);
2726 }
2727 #endif
2728 /**
2729 * handle_rsp_cntrl_info - handles control information in a response
2730 * @qs: the queue set corresponding to the response
2731 * @flags: the response control flags
2732 *
2733 * Handles the control information of an SGE response, such as GTS
2734 * indications and completion credits for the queue set's Tx queues.
2735 * HW coalesces credits, we don't do any extra SW coalescing.
2736 */
2737 static __inline void
2738 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2739 {
2740 unsigned int credits;
2741
2742 #if USE_GTS
2743 if (flags & F_RSPD_TXQ0_GTS)
2744 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2745 #endif
2746 credits = G_RSPD_TXQ0_CR(flags);
2747 if (credits)
2748 qs->txq[TXQ_ETH].processed += credits;
2749
2750 credits = G_RSPD_TXQ2_CR(flags);
2751 if (credits)
2752 qs->txq[TXQ_CTRL].processed += credits;
2753
2754 # if USE_GTS
2755 if (flags & F_RSPD_TXQ1_GTS)
2756 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2757 # endif
2758 credits = G_RSPD_TXQ1_CR(flags);
2759 if (credits)
2760 qs->txq[TXQ_OFLD].processed += credits;
2761
2762 }
2763
2764 static void
2765 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2766 unsigned int sleeping)
2767 {
2768 ;
2769 }
2770
2771 /**
2772 * process_responses - process responses from an SGE response queue
2773 * @adap: the adapter
2774 * @qs: the queue set to which the response queue belongs
2775 * @budget: how many responses can be processed in this round
2776 *
2777 * Process responses from an SGE response queue up to the supplied budget.
2778 * Responses include received packets as well as credits and other events
2779 * for the queues that belong to the response queue's queue set.
2780 * A negative budget is effectively unlimited.
2781 *
2782 * Additionally choose the interrupt holdoff time for the next interrupt
2783 * on this queue. If the system is under memory shortage use a fairly
2784 * long delay to help recovery.
2785 */
2786 int
2787 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2788 {
2789 struct sge_rspq *rspq = &qs->rspq;
2790 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2791 int budget_left = budget;
2792 unsigned int sleeping = 0;
2793 int lro_enabled = qs->lro.enabled;
2794 int skip_lro;
2795 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2796 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2797 int ngathered = 0;
2798 #ifdef DEBUG
2799 static int last_holdoff = 0;
2800 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2801 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2802 last_holdoff = rspq->holdoff_tmr;
2803 }
2804 #endif
2805 rspq->next_holdoff = rspq->holdoff_tmr;
2806
2807 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2808 int eth, eop = 0, ethpad = 0;
2809 uint32_t flags = ntohl(r->flags);
2810 uint32_t rss_csum = *(const uint32_t *)r;
2811 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2812
2813 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2814
2815 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2816 struct mbuf *m;
2817
2818 if (cxgb_debug)
2819 printf("async notification\n");
2820
2821 if (rspq->rspq_mh.mh_head == NULL) {
2822 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2823 m = rspq->rspq_mh.mh_head;
2824 } else {
2825 m = m_gethdr(M_DONTWAIT, MT_DATA);
2826 }
2827
2828 /* XXX m is lost here if rspq->rspq_mbuf is not NULL */
2829
2830 if (m == NULL)
2831 goto no_mem;
2832
2833 memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2834 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2835 *mtod(m, char *) = CPL_ASYNC_NOTIF;
2836 rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
2837 eop = 1;
2838 rspq->async_notif++;
2839 goto skip;
2840 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2841 struct mbuf *m = NULL;
2842
2843 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
2844 r->rss_hdr.opcode, rspq->cidx);
2845 if (rspq->rspq_mh.mh_head == NULL)
2846 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2847 else
2848 m = m_gethdr(M_DONTWAIT, MT_DATA);
2849
2850 if (rspq->rspq_mh.mh_head == NULL && m == NULL) {
2851 no_mem:
2852 rspq->next_holdoff = NOMEM_INTR_DELAY;
2853 budget_left--;
2854 break;
2855 }
2856 get_imm_packet(adap, r, rspq->rspq_mh.mh_head);
2857 eop = 1;
2858 rspq->imm_data++;
2859 } else if (r->len_cq) {
2860 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2861
2862 #ifdef DISABLE_MBUF_IOVEC
2863 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
2864 #else
2865 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r);
2866 #endif
2867 #ifdef IFNET_MULTIQUEUE
2868 rspq->rspq_mh.mh_head->m_pkthdr.rss_hash = rss_hash;
2869 #endif
2870 ethpad = 2;
2871 } else {
2872 DPRINTF("pure response\n");
2873 rspq->pure_rsps++;
2874 }
2875 skip:
2876 if (flags & RSPD_CTRL_MASK) {
2877 sleeping |= flags & RSPD_GTS_MASK;
2878 handle_rsp_cntrl_info(qs, flags);
2879 }
2880
2881 r++;
2882 if (__predict_false(++rspq->cidx == rspq->size)) {
2883 rspq->cidx = 0;
2884 rspq->gen ^= 1;
2885 r = rspq->desc;
2886 }
2887 prefetch(r);
2888 if (++rspq->credits >= (rspq->size / 4)) {
2889 refill_rspq(adap, rspq, rspq->credits);
2890 rspq->credits = 0;
2891 }
2892 DPRINTF("eth=%d eop=%d flags=0x%x\n", eth, eop, flags);
2893
2894 if (!eth && eop) {
2895 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
2896 /*
2897 * XXX size mismatch
2898 */
2899 m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
2900
2901
2902 ngathered = rx_offload(&adap->tdev, rspq,
2903 rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
2904 rspq->rspq_mh.mh_head = NULL;
2905 DPRINTF("received offload packet\n");
2906
2907 } else if (eth && eop) {
2908 struct mbuf *m = rspq->rspq_mh.mh_head;
2909 prefetch(mtod(m, uint8_t *));
2910 prefetch(mtod(m, uint8_t *) + L1_CACHE_BYTES);
2911
2912 t3_rx_eth(adap, rspq, m, ethpad);
2913 /*
2914 * The T304 sends incoming packets on any qset. If LRO
2915 * is also enabled, we could end up sending packet up
2916 * lro_ctrl->ifp's input. That is incorrect.
2917 *
2918 * The mbuf's rcvif was derived from the cpl header and
2919 * is accurate. Skip LRO and just use that.
2920 */
2921 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
2922
2923 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro &&
2924 (tcp_lro_rx(lro_ctrl, m, 0) == 0)) {
2925 /* successfully queue'd for LRO */
2926 } else {
2927 /*
2928 * LRO not enabled, packet unsuitable for LRO,
2929 * or unable to queue. Pass it up right now in
2930 * either case.
2931 */
2932 struct ifnet *ifp = m->m_pkthdr.rcvif;
2933 (*ifp->if_input)(ifp, m);
2934 }
2935 DPRINTF("received tunnel packet\n");
2936 rspq->rspq_mh.mh_head = NULL;
2937
2938 }
2939 __refill_fl_lt(adap, &qs->fl[0], 32);
2940 __refill_fl_lt(adap, &qs->fl[1], 32);
2941 --budget_left;
2942 }
2943
2944 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
2945
2946 /* Flush LRO */
2947 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
2948 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
2949 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
2950 tcp_lro_flush(lro_ctrl, queued);
2951 }
2952
2953 if (sleeping)
2954 check_ring_db(adap, qs, sleeping);
2955
2956 smp_mb(); /* commit Tx queue processed updates */
2957 if (__predict_false(qs->txq_stopped > 1))
2958 restart_tx(qs);
2959
2960 __refill_fl_lt(adap, &qs->fl[0], 512);
2961 __refill_fl_lt(adap, &qs->fl[1], 512);
2962 budget -= budget_left;
2963 return (budget);
2964 }
2965
2966 /*
2967 * A helper function that processes responses and issues GTS.
2968 */
2969 static __inline int
2970 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2971 {
2972 int work;
2973 static int last_holdoff = 0;
2974
2975 work = process_responses(adap, rspq_to_qset(rq), -1);
2976
2977 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
2978 printf("next_holdoff=%d\n", rq->next_holdoff);
2979 last_holdoff = rq->next_holdoff;
2980 }
2981 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2982 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2983
2984 return (work);
2985 }
2986
2987
2988 /*
2989 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2990 * Handles data events from SGE response queues as well as error and other
2991 * async events as they all use the same interrupt pin. We use one SGE
2992 * response queue per port in this mode and protect all response queues with
2993 * queue 0's lock.
2994 */
2995 void
2996 t3b_intr(void *data)
2997 {
2998 uint32_t i, map;
2999 adapter_t *adap = data;
3000 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3001
3002 t3_write_reg(adap, A_PL_CLI, 0);
3003 map = t3_read_reg(adap, A_SG_DATA_INTR);
3004
3005 if (!map)
3006 return;
3007
3008 if (__predict_false(map & F_ERRINTR)) {
3009 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3010 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3011 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3012 }
3013
3014 mtx_lock(&q0->lock);
3015 for_each_port(adap, i)
3016 if (map & (1 << i))
3017 process_responses_gts(adap, &adap->sge.qs[i].rspq);
3018 mtx_unlock(&q0->lock);
3019 }
3020
3021 /*
3022 * The MSI interrupt handler. This needs to handle data events from SGE
3023 * response queues as well as error and other async events as they all use
3024 * the same MSI vector. We use one SGE response queue per port in this mode
3025 * and protect all response queues with queue 0's lock.
3026 */
3027 void
3028 t3_intr_msi(void *data)
3029 {
3030 adapter_t *adap = data;
3031 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3032 int i, new_packets = 0;
3033
3034 mtx_lock(&q0->lock);
3035
3036 for_each_port(adap, i)
3037 if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3038 new_packets = 1;
3039 mtx_unlock(&q0->lock);
3040 if (new_packets == 0) {
3041 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3042 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3043 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3044 }
3045 }
3046
3047 void
3048 t3_intr_msix(void *data)
3049 {
3050 struct sge_qset *qs = data;
3051 adapter_t *adap = qs->port->adapter;
3052 struct sge_rspq *rspq = &qs->rspq;
3053 #ifndef IFNET_MULTIQUEUE
3054 mtx_lock(&rspq->lock);
3055 #else
3056 if (mtx_trylock(&rspq->lock))
3057 #endif
3058 {
3059
3060 if (process_responses_gts(adap, rspq) == 0)
3061 rspq->unhandled_irqs++;
3062 mtx_unlock(&rspq->lock);
3063 }
3064 }
3065
3066 #define QDUMP_SBUF_SIZE 32 * 400
3067 static int
3068 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3069 {
3070 struct sge_rspq *rspq;
3071 struct sge_qset *qs;
3072 int i, err, dump_end, idx;
3073 struct sbuf *sb;
3074 struct rsp_desc *rspd;
3075 uint32_t data[4];
3076
3077 rspq = arg1;
3078 qs = rspq_to_qset(rspq);
3079 if (rspq->rspq_dump_count == 0)
3080 return (0);
3081 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3082 log(LOG_WARNING,
3083 "dump count is too large %d\n", rspq->rspq_dump_count);
3084 rspq->rspq_dump_count = 0;
3085 return (EINVAL);
3086 }
3087 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3088 log(LOG_WARNING,
3089 "dump start of %d is greater than queue size\n",
3090 rspq->rspq_dump_start);
3091 rspq->rspq_dump_start = 0;
3092 return (EINVAL);
3093 }
3094 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3095 if (err)
3096 return (err);
3097 err = sysctl_wire_old_buffer(req, 0);
3098 if (err)
3099 return (err);
3100 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3101
3102 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3103 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3104 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3105 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3106 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3107
3108 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3109 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3110
3111 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3112 for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3113 idx = i & (RSPQ_Q_SIZE-1);
3114
3115 rspd = &rspq->desc[idx];
3116 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3117 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3118 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3119 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3120 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3121 be32toh(rspd->len_cq), rspd->intr_gen);
3122 }
3123
3124 err = sbuf_finish(sb);
3125 /* Output a trailing NUL. */
3126 if (err == 0)
3127 err = SYSCTL_OUT(req, "", 1);
3128 sbuf_delete(sb);
3129 return (err);
3130 }
3131
3132 static int
3133 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3134 {
3135 struct sge_txq *txq;
3136 struct sge_qset *qs;
3137 int i, j, err, dump_end;
3138 struct sbuf *sb;
3139 struct tx_desc *txd;
3140 uint32_t *WR, wr_hi, wr_lo, gen;
3141 uint32_t data[4];
3142
3143 txq = arg1;
3144 qs = txq_to_qset(txq, TXQ_ETH);
3145 if (txq->txq_dump_count == 0) {
3146 return (0);
3147 }
3148 if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3149 log(LOG_WARNING,
3150 "dump count is too large %d\n", txq->txq_dump_count);
3151 txq->txq_dump_count = 1;
3152 return (EINVAL);
3153 }
3154 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3155 log(LOG_WARNING,
3156 "dump start of %d is greater than queue size\n",
3157 txq->txq_dump_start);
3158 txq->txq_dump_start = 0;
3159 return (EINVAL);
3160 }
3161 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3162 if (err)
3163 return (err);
3164 err = sysctl_wire_old_buffer(req, 0);
3165 if (err)
3166 return (err);
3167 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3168
3169 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3170 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3171 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3172 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3173 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3174 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3175 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3176 txq->txq_dump_start,
3177 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3178
3179 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3180 for (i = txq->txq_dump_start; i < dump_end; i++) {
3181 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3182 WR = (uint32_t *)txd->flit;
3183 wr_hi = ntohl(WR[0]);
3184 wr_lo = ntohl(WR[1]);
3185 gen = G_WR_GEN(wr_lo);
3186
3187 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3188 wr_hi, wr_lo, gen);
3189 for (j = 2; j < 30; j += 4)
3190 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3191 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3192
3193 }
3194 err = sbuf_finish(sb);
3195 /* Output a trailing NUL. */
3196 if (err == 0)
3197 err = SYSCTL_OUT(req, "", 1);
3198 sbuf_delete(sb);
3199 return (err);
3200 }
3201
3202 static int
3203 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3204 {
3205 struct sge_txq *txq;
3206 struct sge_qset *qs;
3207 int i, j, err, dump_end;
3208 struct sbuf *sb;
3209 struct tx_desc *txd;
3210 uint32_t *WR, wr_hi, wr_lo, gen;
3211
3212 txq = arg1;
3213 qs = txq_to_qset(txq, TXQ_CTRL);
3214 if (txq->txq_dump_count == 0) {
3215 return (0);
3216 }
3217 if (txq->txq_dump_count > 256) {
3218 log(LOG_WARNING,
3219 "dump count is too large %d\n", txq->txq_dump_count);
3220 txq->txq_dump_count = 1;
3221 return (EINVAL);
3222 }
3223 if (txq->txq_dump_start > 255) {
3224 log(LOG_WARNING,
3225 "dump start of %d is greater than queue size\n",
3226 txq->txq_dump_start);
3227 txq->txq_dump_start = 0;
3228 return (EINVAL);
3229 }
3230
3231 err = sysctl_wire_old_buffer(req, 0);
3232 if (err != 0)
3233 return (err);
3234 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3235 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3236 txq->txq_dump_start,
3237 (txq->txq_dump_start + txq->txq_dump_count) & 255);
3238
3239 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3240 for (i = txq->txq_dump_start; i < dump_end; i++) {
3241 txd = &txq->desc[i & (255)];
3242 WR = (uint32_t *)txd->flit;
3243 wr_hi = ntohl(WR[0]);
3244 wr_lo = ntohl(WR[1]);
3245 gen = G_WR_GEN(wr_lo);
3246
3247 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3248 wr_hi, wr_lo, gen);
3249 for (j = 2; j < 30; j += 4)
3250 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3251 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3252
3253 }
3254 err = sbuf_finish(sb);
3255 /* Output a trailing NUL. */
3256 if (err == 0)
3257 err = SYSCTL_OUT(req, "", 1);
3258 sbuf_delete(sb);
3259 return (err);
3260 }
3261
3262 static int
3263 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3264 {
3265 adapter_t *sc = arg1;
3266 struct qset_params *qsp = &sc->params.sge.qset[0];
3267 int coalesce_usecs;
3268 struct sge_qset *qs;
3269 int i, j, err, nqsets = 0;
3270 struct mtx *lock;
3271
3272 if ((sc->flags & FULL_INIT_DONE) == 0)
3273 return (ENXIO);
3274
3275 coalesce_usecs = qsp->coalesce_usecs;
3276 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3277
3278 if (err != 0) {
3279 return (err);
3280 }
3281 if (coalesce_usecs == qsp->coalesce_usecs)
3282 return (0);
3283
3284 for (i = 0; i < sc->params.nports; i++)
3285 for (j = 0; j < sc->port[i].nqsets; j++)
3286 nqsets++;
3287
3288 coalesce_usecs = max(1, coalesce_usecs);
3289
3290 for (i = 0; i < nqsets; i++) {
3291 qs = &sc->sge.qs[i];
3292 qsp = &sc->params.sge.qset[i];
3293 qsp->coalesce_usecs = coalesce_usecs;
3294
3295 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3296 &sc->sge.qs[0].rspq.lock;
3297
3298 mtx_lock(lock);
3299 t3_update_qset_coalesce(qs, qsp);
3300 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3301 V_NEWTIMER(qs->rspq.holdoff_tmr));
3302 mtx_unlock(lock);
3303 }
3304
3305 return (0);
3306 }
3307
3308
3309 void
3310 t3_add_attach_sysctls(adapter_t *sc)
3311 {
3312 struct sysctl_ctx_list *ctx;
3313 struct sysctl_oid_list *children;
3314
3315 ctx = device_get_sysctl_ctx(sc->dev);
3316 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3317
3318 /* random information */
3319 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3320 "firmware_version",
3321 CTLFLAG_RD, &sc->fw_version,
3322 0, "firmware version");
3323 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3324 "hw_revision",
3325 CTLFLAG_RD, &sc->params.rev,
3326 0, "chip model");
3327 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3328 "port_types",
3329 CTLFLAG_RD, &sc->port_types,
3330 0, "type of ports");
3331 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3332 "enable_debug",
3333 CTLFLAG_RW, &cxgb_debug,
3334 0, "enable verbose debugging output");
3335 SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tunq_coalesce",
3336 CTLFLAG_RD, &sc->tunq_coalesce,
3337 "#tunneled packets freed");
3338 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3339 "txq_overrun",
3340 CTLFLAG_RD, &txq_fills,
3341 0, "#times txq overrun");
3342 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3343 "pcpu_cache_enable",
3344 CTLFLAG_RW, &cxgb_pcpu_cache_enable,
3345 0, "#enable driver local pcpu caches");
3346 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3347 "cache_alloc",
3348 CTLFLAG_RD, &cxgb_cached_allocations,
3349 0, "#times a cluster was allocated from cache");
3350 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3351 "cached",
3352 CTLFLAG_RD, &cxgb_cached,
3353 0, "#times a cluster was cached");
3354 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3355 "ext_freed",
3356 CTLFLAG_RD, &cxgb_ext_freed,
3357 0, "#times a cluster was freed through ext_free");
3358 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3359 "ext_inited",
3360 CTLFLAG_RD, &cxgb_ext_inited,
3361 0, "#times a cluster was initialized for ext_free");
3362 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3363 "mbufs_outstanding",
3364 CTLFLAG_RD, &cxgb_mbufs_outstanding,
3365 0, "#mbufs in flight in the driver");
3366 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3367 "pack_outstanding",
3368 CTLFLAG_RD, &cxgb_pack_outstanding,
3369 0, "#packet in flight in the driver");
3370 }
3371
3372
3373 static const char *rspq_name = "rspq";
3374 static const char *txq_names[] =
3375 {
3376 "txq_eth",
3377 "txq_ofld",
3378 "txq_ctrl"
3379 };
3380
3381 static int
3382 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3383 {
3384 struct port_info *p = arg1;
3385 uint64_t *parg;
3386
3387 if (!p)
3388 return (EINVAL);
3389
3390 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3391
3392 PORT_LOCK(p);
3393 t3_mac_update_stats(&p->mac);
3394 PORT_UNLOCK(p);
3395
3396 return (sysctl_handle_quad(oidp, parg, 0, req));
3397 }
3398
3399 void
3400 t3_add_configured_sysctls(adapter_t *sc)
3401 {
3402 struct sysctl_ctx_list *ctx;
3403 struct sysctl_oid_list *children;
3404 int i, j;
3405
3406 ctx = device_get_sysctl_ctx(sc->dev);
3407 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3408
3409 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3410 "intr_coal",
3411 CTLTYPE_INT|CTLFLAG_RW, sc,
3412 0, t3_set_coalesce_usecs,
3413 "I", "interrupt coalescing timer (us)");
3414
3415 for (i = 0; i < sc->params.nports; i++) {
3416 struct port_info *pi = &sc->port[i];
3417 struct sysctl_oid *poid;
3418 struct sysctl_oid_list *poidlist;
3419 struct mac_stats *mstats = &pi->mac.stats;
3420
3421 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3422 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3423 pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3424 poidlist = SYSCTL_CHILDREN(poid);
3425 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
3426 "nqsets", CTLFLAG_RD, &pi->nqsets,
3427 0, "#queue sets");
3428
3429 for (j = 0; j < pi->nqsets; j++) {
3430 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3431 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
3432 *ctrlqpoid, *lropoid;
3433 struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
3434 *txqpoidlist, *ctrlqpoidlist,
3435 *lropoidlist;
3436 struct sge_txq *txq = &qs->txq[TXQ_ETH];
3437
3438 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3439
3440 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3441 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3442 qspoidlist = SYSCTL_CHILDREN(qspoid);
3443
3444 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
3445 CTLFLAG_RD, &qs->fl[0].empty, 0,
3446 "freelist #0 empty");
3447 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
3448 CTLFLAG_RD, &qs->fl[1].empty, 0,
3449 "freelist #1 empty");
3450
3451 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3452 rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3453 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3454
3455 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3456 txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3457 txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3458
3459 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3460 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3461 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3462
3463 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3464 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
3465 lropoidlist = SYSCTL_CHILDREN(lropoid);
3466
3467 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3468 CTLFLAG_RD, &qs->rspq.size,
3469 0, "#entries in response queue");
3470 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3471 CTLFLAG_RD, &qs->rspq.cidx,
3472 0, "consumer index");
3473 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3474 CTLFLAG_RD, &qs->rspq.credits,
3475 0, "#credits");
3476 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
3477 CTLFLAG_RD, &qs->rspq.starved,
3478 0, "#times starved");
3479 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3480 CTLFLAG_RD, &qs->rspq.phys_addr,
3481 "physical_address_of the queue");
3482 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3483 CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3484 0, "start rspq dump entry");
3485 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3486 CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3487 0, "#rspq entries to dump");
3488 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3489 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3490 0, t3_dump_rspq, "A", "dump of the response queue");
3491
3492
3493 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped",
3494 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops,
3495 0, "#tunneled packets dropped");
3496 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3497 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3498 0, "#tunneled packets waiting to be sent");
3499 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3500 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3501 0, "#tunneled packets queue producer index");
3502 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3503 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3504 0, "#tunneled packets queue consumer index");
3505 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
3506 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3507 0, "#tunneled packets processed by the card");
3508 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3509 CTLFLAG_RD, &txq->cleaned,
3510 0, "#tunneled packets cleaned");
3511 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3512 CTLFLAG_RD, &txq->in_use,
3513 0, "#tunneled packet slots in use");
3514 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
3515 CTLFLAG_RD, &txq->txq_frees,
3516 "#tunneled packets freed");
3517 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3518 CTLFLAG_RD, &txq->txq_skipped,
3519 0, "#tunneled packet descriptors skipped");
3520 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "coalesced",
3521 CTLFLAG_RD, &txq->txq_coalesced,
3522 0, "#tunneled packets coalesced");
3523 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3524 CTLFLAG_RD, &txq->txq_enqueued,
3525 0, "#tunneled packets enqueued to hardware");
3526 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3527 CTLFLAG_RD, &qs->txq_stopped,
3528 0, "tx queues stopped");
3529 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3530 CTLFLAG_RD, &txq->phys_addr,
3531 "physical_address_of the queue");
3532 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3533 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3534 0, "txq generation");
3535 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3536 CTLFLAG_RD, &txq->cidx,
3537 0, "hardware queue cidx");
3538 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3539 CTLFLAG_RD, &txq->pidx,
3540 0, "hardware queue pidx");
3541 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3542 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3543 0, "txq start idx for dump");
3544 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3545 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3546 0, "txq #entries to dump");
3547 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3548 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3549 0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3550
3551 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3552 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3553 0, "ctrlq start idx for dump");
3554 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3555 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3556 0, "ctrl #entries to dump");
3557 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3558 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3559 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3560
3561 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
3562 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3563 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3564 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3565 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3566 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3567 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3568 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3569 }
3570
3571 /* Now add a node for mac stats. */
3572 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3573 CTLFLAG_RD, NULL, "MAC statistics");
3574 poidlist = SYSCTL_CHILDREN(poid);
3575
3576 /*
3577 * We (ab)use the length argument (arg2) to pass on the offset
3578 * of the data that we are interested in. This is only required
3579 * for the quad counters that are updated from the hardware (we
3580 * make sure that we return the latest value).
3581 * sysctl_handle_macstat first updates *all* the counters from
3582 * the hardware, and then returns the latest value of the
3583 * requested counter. Best would be to update only the
3584 * requested counter from hardware, but t3_mac_update_stats()
3585 * hides all the register details and we don't want to dive into
3586 * all that here.
3587 */
3588 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3589 (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
3590 sysctl_handle_macstat, "QU", 0)
3591 CXGB_SYSCTL_ADD_QUAD(tx_octets);
3592 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3593 CXGB_SYSCTL_ADD_QUAD(tx_frames);
3594 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3595 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3596 CXGB_SYSCTL_ADD_QUAD(tx_pause);
3597 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3598 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3599 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3600 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3601 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3602 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3603 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3604 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3605 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3606 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3607 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3608 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3609 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3610 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3611 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3612 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3613 CXGB_SYSCTL_ADD_QUAD(rx_octets);
3614 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3615 CXGB_SYSCTL_ADD_QUAD(rx_frames);
3616 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3617 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3618 CXGB_SYSCTL_ADD_QUAD(rx_pause);
3619 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
3620 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3621 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3622 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3623 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3624 CXGB_SYSCTL_ADD_QUAD(rx_runt);
3625 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3626 CXGB_SYSCTL_ADD_QUAD(rx_short);
3627 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3628 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3629 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3630 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3631 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3632 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3633 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3634 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3635 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3636 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3637 #undef CXGB_SYSCTL_ADD_QUAD
3638
3639 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3640 CTLFLAG_RD, &mstats->a, 0)
3641 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3642 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3643 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3644 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3645 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3646 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3647 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3648 CXGB_SYSCTL_ADD_ULONG(num_toggled);
3649 CXGB_SYSCTL_ADD_ULONG(num_resets);
3650 CXGB_SYSCTL_ADD_ULONG(link_faults);
3651 #undef CXGB_SYSCTL_ADD_ULONG
3652 }
3653 }
3654
3655 /**
3656 * t3_get_desc - dump an SGE descriptor for debugging purposes
3657 * @qs: the queue set
3658 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3659 * @idx: the descriptor index in the queue
3660 * @data: where to dump the descriptor contents
3661 *
3662 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3663 * size of the descriptor.
3664 */
3665 int
3666 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3667 unsigned char *data)
3668 {
3669 if (qnum >= 6)
3670 return (EINVAL);
3671
3672 if (qnum < 3) {
3673 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3674 return -EINVAL;
3675 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3676 return sizeof(struct tx_desc);
3677 }
3678
3679 if (qnum == 3) {
3680 if (!qs->rspq.desc || idx >= qs->rspq.size)
3681 return (EINVAL);
3682 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3683 return sizeof(struct rsp_desc);
3684 }
3685
3686 qnum -= 4;
3687 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3688 return (EINVAL);
3689 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3690 return sizeof(struct rx_desc);
3691 }
Cache object: 954d392a318e088fd2b6f8eb3ac589f3
|