1 /**************************************************************************
2
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD: releng/8.0/sys/dev/cxgb/cxgb_sge.c 195512 2009-07-09 19:27:58Z np $");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/module.h>
37 #include <sys/bus.h>
38 #include <sys/conf.h>
39 #include <machine/bus.h>
40 #include <machine/resource.h>
41 #include <sys/bus_dma.h>
42 #include <sys/rman.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/taskqueue.h>
46
47 #include <sys/proc.h>
48 #include <sys/sbuf.h>
49 #include <sys/sched.h>
50 #include <sys/smp.h>
51 #include <sys/systm.h>
52 #include <sys/syslog.h>
53
54 #include <net/bpf.h>
55
56 #include <netinet/in_systm.h>
57 #include <netinet/in.h>
58 #include <netinet/ip.h>
59 #include <netinet/tcp.h>
60
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66
67 #include <cxgb_include.h>
68 #include <sys/mvec.h>
69
70 int txq_fills = 0;
71 int multiq_tx_enable = 1;
72
73 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
74 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
75 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
76 SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
77 "size of per-queue mbuf ring");
78
79 static int cxgb_tx_coalesce_force = 0;
80 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force);
81 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW,
82 &cxgb_tx_coalesce_force, 0,
83 "coalesce small packets into a single work request regardless of ring state");
84
85 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1
86 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
87 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2
88 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5
89 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5
90 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2
91 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6
92
93
94 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
95 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start",
96 &cxgb_tx_coalesce_enable_start);
97 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW,
98 &cxgb_tx_coalesce_enable_start, 0,
99 "coalesce enable threshold");
100 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
101 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop);
102 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW,
103 &cxgb_tx_coalesce_enable_stop, 0,
104 "coalesce disable threshold");
105 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
106 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold);
107 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW,
108 &cxgb_tx_reclaim_threshold, 0,
109 "tx cleaning minimum threshold");
110
111 /*
112 * XXX don't re-enable this until TOE stops assuming
113 * we have an m_ext
114 */
115 static int recycle_enable = 0;
116 int cxgb_ext_freed = 0;
117 int cxgb_ext_inited = 0;
118 int fl_q_size = 0;
119 int jumbo_q_size = 0;
120
121 extern int cxgb_use_16k_clusters;
122 extern int nmbjumbo4;
123 extern int nmbjumbo9;
124 extern int nmbjumbo16;
125
126 #define USE_GTS 0
127
128 #define SGE_RX_SM_BUF_SIZE 1536
129 #define SGE_RX_DROP_THRES 16
130 #define SGE_RX_COPY_THRES 128
131
132 /*
133 * Period of the Tx buffer reclaim timer. This timer does not need to run
134 * frequently as Tx buffers are usually reclaimed by new Tx packets.
135 */
136 #define TX_RECLAIM_PERIOD (hz >> 1)
137
138 /*
139 * Values for sge_txq.flags
140 */
141 enum {
142 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
143 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
144 };
145
146 struct tx_desc {
147 uint64_t flit[TX_DESC_FLITS];
148 } __packed;
149
150 struct rx_desc {
151 uint32_t addr_lo;
152 uint32_t len_gen;
153 uint32_t gen2;
154 uint32_t addr_hi;
155 } __packed;;
156
157 struct rsp_desc { /* response queue descriptor */
158 struct rss_header rss_hdr;
159 uint32_t flags;
160 uint32_t len_cq;
161 uint8_t imm_data[47];
162 uint8_t intr_gen;
163 } __packed;
164
165 #define RX_SW_DESC_MAP_CREATED (1 << 0)
166 #define TX_SW_DESC_MAP_CREATED (1 << 1)
167 #define RX_SW_DESC_INUSE (1 << 3)
168 #define TX_SW_DESC_MAPPED (1 << 4)
169
170 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
171 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
172 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
173 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
174
175 struct tx_sw_desc { /* SW state per Tx descriptor */
176 struct mbuf *m;
177 bus_dmamap_t map;
178 int flags;
179 };
180
181 struct rx_sw_desc { /* SW state per Rx descriptor */
182 caddr_t rxsd_cl;
183 struct mbuf *m;
184 bus_dmamap_t map;
185 int flags;
186 };
187
188 struct txq_state {
189 unsigned int compl;
190 unsigned int gen;
191 unsigned int pidx;
192 };
193
194 struct refill_fl_cb_arg {
195 int error;
196 bus_dma_segment_t seg;
197 int nseg;
198 };
199
200
201 /*
202 * Maps a number of flits to the number of Tx descriptors that can hold them.
203 * The formula is
204 *
205 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
206 *
207 * HW allows up to 4 descriptors to be combined into a WR.
208 */
209 static uint8_t flit_desc_map[] = {
210 0,
211 #if SGE_NUM_GENBITS == 1
212 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
213 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
214 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
215 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
216 #elif SGE_NUM_GENBITS == 2
217 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
218 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
219 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
220 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
221 #else
222 # error "SGE_NUM_GENBITS must be 1 or 2"
223 #endif
224 };
225
226 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED)
227 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock)
228 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock)
229 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock)
230 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
231 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
232 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \
233 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
234 #define TXQ_RING_DEQUEUE(qs) \
235 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
236
237 int cxgb_debug = 0;
238
239 static void sge_timer_cb(void *arg);
240 static void sge_timer_reclaim(void *arg, int ncount);
241 static void sge_txq_reclaim_handler(void *arg, int ncount);
242 static void cxgb_start_locked(struct sge_qset *qs);
243
244 /*
245 * XXX need to cope with bursty scheduling by looking at a wider
246 * window than we are now for determining the need for coalescing
247 *
248 */
249 static __inline uint64_t
250 check_pkt_coalesce(struct sge_qset *qs)
251 {
252 struct adapter *sc;
253 struct sge_txq *txq;
254 uint8_t *fill;
255
256 if (__predict_false(cxgb_tx_coalesce_force))
257 return (1);
258 txq = &qs->txq[TXQ_ETH];
259 sc = qs->port->adapter;
260 fill = &sc->tunq_fill[qs->idx];
261
262 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
263 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
264 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
265 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
266 /*
267 * if the hardware transmit queue is more than 1/8 full
268 * we mark it as coalescing - we drop back from coalescing
269 * when we go below 1/32 full and there are no packets enqueued,
270 * this provides us with some degree of hysteresis
271 */
272 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
273 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
274 *fill = 0;
275 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
276 *fill = 1;
277
278 return (sc->tunq_coalesce);
279 }
280
281 #ifdef __LP64__
282 static void
283 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
284 {
285 uint64_t wr_hilo;
286 #if _BYTE_ORDER == _LITTLE_ENDIAN
287 wr_hilo = wr_hi;
288 wr_hilo |= (((uint64_t)wr_lo)<<32);
289 #else
290 wr_hilo = wr_lo;
291 wr_hilo |= (((uint64_t)wr_hi)<<32);
292 #endif
293 wrp->wrh_hilo = wr_hilo;
294 }
295 #else
296 static void
297 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
298 {
299
300 wrp->wrh_hi = wr_hi;
301 wmb();
302 wrp->wrh_lo = wr_lo;
303 }
304 #endif
305
306 struct coalesce_info {
307 int count;
308 int nbytes;
309 };
310
311 static int
312 coalesce_check(struct mbuf *m, void *arg)
313 {
314 struct coalesce_info *ci = arg;
315 int *count = &ci->count;
316 int *nbytes = &ci->nbytes;
317
318 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
319 (*count < 7) && (m->m_next == NULL))) {
320 *count += 1;
321 *nbytes += m->m_len;
322 return (1);
323 }
324 return (0);
325 }
326
327 static struct mbuf *
328 cxgb_dequeue(struct sge_qset *qs)
329 {
330 struct mbuf *m, *m_head, *m_tail;
331 struct coalesce_info ci;
332
333
334 if (check_pkt_coalesce(qs) == 0)
335 return TXQ_RING_DEQUEUE(qs);
336
337 m_head = m_tail = NULL;
338 ci.count = ci.nbytes = 0;
339 do {
340 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
341 if (m_head == NULL) {
342 m_tail = m_head = m;
343 } else if (m != NULL) {
344 m_tail->m_nextpkt = m;
345 m_tail = m;
346 }
347 } while (m != NULL);
348 if (ci.count > 7)
349 panic("trying to coalesce %d packets in to one WR", ci.count);
350 return (m_head);
351 }
352
353 /**
354 * reclaim_completed_tx - reclaims completed Tx descriptors
355 * @adapter: the adapter
356 * @q: the Tx queue to reclaim completed descriptors from
357 *
358 * Reclaims Tx descriptors that the SGE has indicated it has processed,
359 * and frees the associated buffers if possible. Called with the Tx
360 * queue's lock held.
361 */
362 static __inline int
363 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
364 {
365 struct sge_txq *q = &qs->txq[queue];
366 int reclaim = desc_reclaimable(q);
367
368 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
369 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
370 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
371
372 if (reclaim < reclaim_min)
373 return (0);
374
375 mtx_assert(&qs->lock, MA_OWNED);
376 if (reclaim > 0) {
377 t3_free_tx_desc(qs, reclaim, queue);
378 q->cleaned += reclaim;
379 q->in_use -= reclaim;
380 }
381 if (isset(&qs->txq_stopped, TXQ_ETH))
382 clrbit(&qs->txq_stopped, TXQ_ETH);
383
384 return (reclaim);
385 }
386
387 /**
388 * should_restart_tx - are there enough resources to restart a Tx queue?
389 * @q: the Tx queue
390 *
391 * Checks if there are enough descriptors to restart a suspended Tx queue.
392 */
393 static __inline int
394 should_restart_tx(const struct sge_txq *q)
395 {
396 unsigned int r = q->processed - q->cleaned;
397
398 return q->in_use - r < (q->size >> 1);
399 }
400
401 /**
402 * t3_sge_init - initialize SGE
403 * @adap: the adapter
404 * @p: the SGE parameters
405 *
406 * Performs SGE initialization needed every time after a chip reset.
407 * We do not initialize any of the queue sets here, instead the driver
408 * top-level must request those individually. We also do not enable DMA
409 * here, that should be done after the queues have been set up.
410 */
411 void
412 t3_sge_init(adapter_t *adap, struct sge_params *p)
413 {
414 u_int ctrl, ups;
415
416 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
417
418 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
419 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
420 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
421 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
422 #if SGE_NUM_GENBITS == 1
423 ctrl |= F_EGRGENCTRL;
424 #endif
425 if (adap->params.rev > 0) {
426 if (!(adap->flags & (USING_MSIX | USING_MSI)))
427 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
428 }
429 t3_write_reg(adap, A_SG_CONTROL, ctrl);
430 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
431 V_LORCQDRBTHRSH(512));
432 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
433 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
434 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
435 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
436 adap->params.rev < T3_REV_C ? 1000 : 500);
437 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
438 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
439 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
440 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
441 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
442 }
443
444
445 /**
446 * sgl_len - calculates the size of an SGL of the given capacity
447 * @n: the number of SGL entries
448 *
449 * Calculates the number of flits needed for a scatter/gather list that
450 * can hold the given number of entries.
451 */
452 static __inline unsigned int
453 sgl_len(unsigned int n)
454 {
455 return ((3 * n) / 2 + (n & 1));
456 }
457
458 /**
459 * get_imm_packet - return the next ingress packet buffer from a response
460 * @resp: the response descriptor containing the packet data
461 *
462 * Return a packet containing the immediate data of the given response.
463 */
464 static int
465 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
466 {
467
468 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
469 m->m_ext.ext_buf = NULL;
470 m->m_ext.ext_type = 0;
471 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
472 return (0);
473 }
474
475 static __inline u_int
476 flits_to_desc(u_int n)
477 {
478 return (flit_desc_map[n]);
479 }
480
481 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
482 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
483 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
484 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
485 F_HIRCQPARITYERROR)
486 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
487 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
488 F_RSPQDISABLED)
489
490 /**
491 * t3_sge_err_intr_handler - SGE async event interrupt handler
492 * @adapter: the adapter
493 *
494 * Interrupt handler for SGE asynchronous (non-data) events.
495 */
496 void
497 t3_sge_err_intr_handler(adapter_t *adapter)
498 {
499 unsigned int v, status;
500
501 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
502 if (status & SGE_PARERR)
503 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
504 status & SGE_PARERR);
505 if (status & SGE_FRAMINGERR)
506 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
507 status & SGE_FRAMINGERR);
508 if (status & F_RSPQCREDITOVERFOW)
509 CH_ALERT(adapter, "SGE response queue credit overflow\n");
510
511 if (status & F_RSPQDISABLED) {
512 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
513
514 CH_ALERT(adapter,
515 "packet delivered to disabled response queue (0x%x)\n",
516 (v >> S_RSPQ0DISABLED) & 0xff);
517 }
518
519 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
520 if (status & SGE_FATALERR)
521 t3_fatal_err(adapter);
522 }
523
524 void
525 t3_sge_prep(adapter_t *adap, struct sge_params *p)
526 {
527 int i, nqsets;
528
529 nqsets = min(SGE_QSETS, mp_ncpus*4);
530
531 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
532
533 while (!powerof2(fl_q_size))
534 fl_q_size--;
535 #if __FreeBSD_version >= 700111
536 if (cxgb_use_16k_clusters)
537 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
538 else
539 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
540 #else
541 jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE);
542 #endif
543 while (!powerof2(jumbo_q_size))
544 jumbo_q_size--;
545
546 /* XXX Does ETHER_ALIGN need to be accounted for here? */
547 p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data);
548
549 for (i = 0; i < SGE_QSETS; ++i) {
550 struct qset_params *q = p->qset + i;
551
552 if (adap->params.nports > 2) {
553 q->coalesce_usecs = 50;
554 } else {
555 #ifdef INVARIANTS
556 q->coalesce_usecs = 10;
557 #else
558 q->coalesce_usecs = 5;
559 #endif
560 }
561 q->polling = 0;
562 q->rspq_size = RSPQ_Q_SIZE;
563 q->fl_size = fl_q_size;
564 q->jumbo_size = jumbo_q_size;
565 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
566 q->txq_size[TXQ_OFLD] = 1024;
567 q->txq_size[TXQ_CTRL] = 256;
568 q->cong_thres = 0;
569 }
570 }
571
572 int
573 t3_sge_alloc(adapter_t *sc)
574 {
575
576 /* The parent tag. */
577 if (bus_dma_tag_create( NULL, /* parent */
578 1, 0, /* algnmnt, boundary */
579 BUS_SPACE_MAXADDR, /* lowaddr */
580 BUS_SPACE_MAXADDR, /* highaddr */
581 NULL, NULL, /* filter, filterarg */
582 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
583 BUS_SPACE_UNRESTRICTED, /* nsegments */
584 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
585 0, /* flags */
586 NULL, NULL, /* lock, lockarg */
587 &sc->parent_dmat)) {
588 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
589 return (ENOMEM);
590 }
591
592 /*
593 * DMA tag for normal sized RX frames
594 */
595 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
596 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
597 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
598 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
599 return (ENOMEM);
600 }
601
602 /*
603 * DMA tag for jumbo sized RX frames.
604 */
605 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
606 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
607 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
608 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
609 return (ENOMEM);
610 }
611
612 /*
613 * DMA tag for TX frames.
614 */
615 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
616 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
617 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
618 NULL, NULL, &sc->tx_dmat)) {
619 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
620 return (ENOMEM);
621 }
622
623 return (0);
624 }
625
626 int
627 t3_sge_free(struct adapter * sc)
628 {
629
630 if (sc->tx_dmat != NULL)
631 bus_dma_tag_destroy(sc->tx_dmat);
632
633 if (sc->rx_jumbo_dmat != NULL)
634 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
635
636 if (sc->rx_dmat != NULL)
637 bus_dma_tag_destroy(sc->rx_dmat);
638
639 if (sc->parent_dmat != NULL)
640 bus_dma_tag_destroy(sc->parent_dmat);
641
642 return (0);
643 }
644
645 void
646 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
647 {
648
649 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
650 qs->rspq.polling = 0 /* p->polling */;
651 }
652
653 #if !defined(__i386__) && !defined(__amd64__)
654 static void
655 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
656 {
657 struct refill_fl_cb_arg *cb_arg = arg;
658
659 cb_arg->error = error;
660 cb_arg->seg = segs[0];
661 cb_arg->nseg = nseg;
662
663 }
664 #endif
665 /**
666 * refill_fl - refill an SGE free-buffer list
667 * @sc: the controller softc
668 * @q: the free-list to refill
669 * @n: the number of new buffers to allocate
670 *
671 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
672 * The caller must assure that @n does not exceed the queue's capacity.
673 */
674 static void
675 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
676 {
677 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
678 struct rx_desc *d = &q->desc[q->pidx];
679 struct refill_fl_cb_arg cb_arg;
680 struct mbuf *m;
681 caddr_t cl;
682 int err, count = 0;
683
684 cb_arg.error = 0;
685 while (n--) {
686 /*
687 * We only allocate a cluster, mbuf allocation happens after rx
688 */
689 if (q->zone == zone_pack) {
690 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
691 break;
692 cl = m->m_ext.ext_buf;
693 } else {
694 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
695 break;
696 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
697 uma_zfree(q->zone, cl);
698 break;
699 }
700 }
701 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
702 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
703 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
704 uma_zfree(q->zone, cl);
705 goto done;
706 }
707 sd->flags |= RX_SW_DESC_MAP_CREATED;
708 }
709 #if !defined(__i386__) && !defined(__amd64__)
710 err = bus_dmamap_load(q->entry_tag, sd->map,
711 cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
712
713 if (err != 0 || cb_arg.error) {
714 if (q->zone == zone_pack)
715 uma_zfree(q->zone, cl);
716 m_free(m);
717 goto done;
718 }
719 #else
720 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
721 #endif
722 sd->flags |= RX_SW_DESC_INUSE;
723 sd->rxsd_cl = cl;
724 sd->m = m;
725 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
726 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
727 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
728 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
729
730 d++;
731 sd++;
732
733 if (++q->pidx == q->size) {
734 q->pidx = 0;
735 q->gen ^= 1;
736 sd = q->sdesc;
737 d = q->desc;
738 }
739 q->credits++;
740 count++;
741 }
742
743 done:
744 if (count)
745 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
746 }
747
748
749 /**
750 * free_rx_bufs - free the Rx buffers on an SGE free list
751 * @sc: the controle softc
752 * @q: the SGE free list to clean up
753 *
754 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
755 * this queue should be stopped before calling this function.
756 */
757 static void
758 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
759 {
760 u_int cidx = q->cidx;
761
762 while (q->credits--) {
763 struct rx_sw_desc *d = &q->sdesc[cidx];
764
765 if (d->flags & RX_SW_DESC_INUSE) {
766 bus_dmamap_unload(q->entry_tag, d->map);
767 bus_dmamap_destroy(q->entry_tag, d->map);
768 if (q->zone == zone_pack) {
769 m_init(d->m, zone_pack, MCLBYTES,
770 M_NOWAIT, MT_DATA, M_EXT);
771 uma_zfree(zone_pack, d->m);
772 } else {
773 m_init(d->m, zone_mbuf, MLEN,
774 M_NOWAIT, MT_DATA, 0);
775 uma_zfree(zone_mbuf, d->m);
776 uma_zfree(q->zone, d->rxsd_cl);
777 }
778 }
779
780 d->rxsd_cl = NULL;
781 d->m = NULL;
782 if (++cidx == q->size)
783 cidx = 0;
784 }
785 }
786
787 static __inline void
788 __refill_fl(adapter_t *adap, struct sge_fl *fl)
789 {
790 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
791 }
792
793 static __inline void
794 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
795 {
796 if ((fl->size - fl->credits) < max)
797 refill_fl(adap, fl, min(max, fl->size - fl->credits));
798 }
799
800 /**
801 * recycle_rx_buf - recycle a receive buffer
802 * @adapter: the adapter
803 * @q: the SGE free list
804 * @idx: index of buffer to recycle
805 *
806 * Recycles the specified buffer on the given free list by adding it at
807 * the next available slot on the list.
808 */
809 static void
810 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
811 {
812 struct rx_desc *from = &q->desc[idx];
813 struct rx_desc *to = &q->desc[q->pidx];
814
815 q->sdesc[q->pidx] = q->sdesc[idx];
816 to->addr_lo = from->addr_lo; // already big endian
817 to->addr_hi = from->addr_hi; // likewise
818 wmb(); /* necessary ? */
819 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
820 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
821 q->credits++;
822
823 if (++q->pidx == q->size) {
824 q->pidx = 0;
825 q->gen ^= 1;
826 }
827 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
828 }
829
830 static void
831 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
832 {
833 uint32_t *addr;
834
835 addr = arg;
836 *addr = segs[0].ds_addr;
837 }
838
839 static int
840 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
841 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
842 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
843 {
844 size_t len = nelem * elem_size;
845 void *s = NULL;
846 void *p = NULL;
847 int err;
848
849 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
850 BUS_SPACE_MAXADDR_32BIT,
851 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
852 len, 0, NULL, NULL, tag)) != 0) {
853 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
854 return (ENOMEM);
855 }
856
857 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
858 map)) != 0) {
859 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
860 return (ENOMEM);
861 }
862
863 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
864 bzero(p, len);
865 *(void **)desc = p;
866
867 if (sw_size) {
868 len = nelem * sw_size;
869 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
870 *(void **)sdesc = s;
871 }
872 if (parent_entry_tag == NULL)
873 return (0);
874
875 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
876 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
877 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
878 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
879 NULL, NULL, entry_tag)) != 0) {
880 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
881 return (ENOMEM);
882 }
883 return (0);
884 }
885
886 static void
887 sge_slow_intr_handler(void *arg, int ncount)
888 {
889 adapter_t *sc = arg;
890
891 t3_slow_intr_handler(sc);
892 }
893
894 /**
895 * sge_timer_cb - perform periodic maintenance of an SGE qset
896 * @data: the SGE queue set to maintain
897 *
898 * Runs periodically from a timer to perform maintenance of an SGE queue
899 * set. It performs two tasks:
900 *
901 * a) Cleans up any completed Tx descriptors that may still be pending.
902 * Normal descriptor cleanup happens when new packets are added to a Tx
903 * queue so this timer is relatively infrequent and does any cleanup only
904 * if the Tx queue has not seen any new packets in a while. We make a
905 * best effort attempt to reclaim descriptors, in that we don't wait
906 * around if we cannot get a queue's lock (which most likely is because
907 * someone else is queueing new packets and so will also handle the clean
908 * up). Since control queues use immediate data exclusively we don't
909 * bother cleaning them up here.
910 *
911 * b) Replenishes Rx queues that have run out due to memory shortage.
912 * Normally new Rx buffers are added when existing ones are consumed but
913 * when out of memory a queue can become empty. We try to add only a few
914 * buffers here, the queue will be replenished fully as these new buffers
915 * are used up if memory shortage has subsided.
916 *
917 * c) Return coalesced response queue credits in case a response queue is
918 * starved.
919 *
920 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell
921 * fifo overflows and the FW doesn't implement any recovery scheme yet.
922 */
923 static void
924 sge_timer_cb(void *arg)
925 {
926 adapter_t *sc = arg;
927 if ((sc->flags & USING_MSIX) == 0) {
928
929 struct port_info *pi;
930 struct sge_qset *qs;
931 struct sge_txq *txq;
932 int i, j;
933 int reclaim_ofl, refill_rx;
934
935 if (sc->open_device_map == 0)
936 return;
937
938 for (i = 0; i < sc->params.nports; i++) {
939 pi = &sc->port[i];
940 for (j = 0; j < pi->nqsets; j++) {
941 qs = &sc->sge.qs[pi->first_qset + j];
942 txq = &qs->txq[0];
943 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
944 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
945 (qs->fl[1].credits < qs->fl[1].size));
946 if (reclaim_ofl || refill_rx) {
947 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
948 break;
949 }
950 }
951 }
952 }
953
954 if (sc->params.nports > 2) {
955 int i;
956
957 for_each_port(sc, i) {
958 struct port_info *pi = &sc->port[i];
959
960 t3_write_reg(sc, A_SG_KDOORBELL,
961 F_SELEGRCNTX |
962 (FW_TUNNEL_SGEEC_START + pi->first_qset));
963 }
964 }
965 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
966 sc->open_device_map != 0)
967 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
968 }
969
970 /*
971 * This is meant to be a catch-all function to keep sge state private
972 * to sge.c
973 *
974 */
975 int
976 t3_sge_init_adapter(adapter_t *sc)
977 {
978 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
979 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
980 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
981 return (0);
982 }
983
984 int
985 t3_sge_reset_adapter(adapter_t *sc)
986 {
987 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
988 return (0);
989 }
990
991 int
992 t3_sge_init_port(struct port_info *pi)
993 {
994 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
995 return (0);
996 }
997
998 /**
999 * refill_rspq - replenish an SGE response queue
1000 * @adapter: the adapter
1001 * @q: the response queue to replenish
1002 * @credits: how many new responses to make available
1003 *
1004 * Replenishes a response queue by making the supplied number of responses
1005 * available to HW.
1006 */
1007 static __inline void
1008 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
1009 {
1010
1011 /* mbufs are allocated on demand when a rspq entry is processed. */
1012 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
1013 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
1014 }
1015
1016 static void
1017 sge_txq_reclaim_handler(void *arg, int ncount)
1018 {
1019 struct sge_qset *qs = arg;
1020 int i;
1021
1022 for (i = 0; i < 3; i++)
1023 reclaim_completed_tx(qs, 16, i);
1024 }
1025
1026 static void
1027 sge_timer_reclaim(void *arg, int ncount)
1028 {
1029 struct port_info *pi = arg;
1030 int i, nqsets = pi->nqsets;
1031 adapter_t *sc = pi->adapter;
1032 struct sge_qset *qs;
1033 struct mtx *lock;
1034
1035 KASSERT((sc->flags & USING_MSIX) == 0,
1036 ("can't call timer reclaim for msi-x"));
1037
1038 for (i = 0; i < nqsets; i++) {
1039 qs = &sc->sge.qs[pi->first_qset + i];
1040
1041 reclaim_completed_tx(qs, 16, TXQ_OFLD);
1042 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
1043 &sc->sge.qs[0].rspq.lock;
1044
1045 if (mtx_trylock(lock)) {
1046 /* XXX currently assume that we are *NOT* polling */
1047 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
1048
1049 if (qs->fl[0].credits < qs->fl[0].size - 16)
1050 __refill_fl(sc, &qs->fl[0]);
1051 if (qs->fl[1].credits < qs->fl[1].size - 16)
1052 __refill_fl(sc, &qs->fl[1]);
1053
1054 if (status & (1 << qs->rspq.cntxt_id)) {
1055 if (qs->rspq.credits) {
1056 refill_rspq(sc, &qs->rspq, 1);
1057 qs->rspq.credits--;
1058 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
1059 1 << qs->rspq.cntxt_id);
1060 }
1061 }
1062 mtx_unlock(lock);
1063 }
1064 }
1065 }
1066
1067 /**
1068 * init_qset_cntxt - initialize an SGE queue set context info
1069 * @qs: the queue set
1070 * @id: the queue set id
1071 *
1072 * Initializes the TIDs and context ids for the queues of a queue set.
1073 */
1074 static void
1075 init_qset_cntxt(struct sge_qset *qs, u_int id)
1076 {
1077
1078 qs->rspq.cntxt_id = id;
1079 qs->fl[0].cntxt_id = 2 * id;
1080 qs->fl[1].cntxt_id = 2 * id + 1;
1081 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
1082 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
1083 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
1084 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
1085 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
1086
1087 mbufq_init(&qs->txq[TXQ_ETH].sendq);
1088 mbufq_init(&qs->txq[TXQ_OFLD].sendq);
1089 mbufq_init(&qs->txq[TXQ_CTRL].sendq);
1090 }
1091
1092
1093 static void
1094 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
1095 {
1096 txq->in_use += ndesc;
1097 /*
1098 * XXX we don't handle stopping of queue
1099 * presumably start handles this when we bump against the end
1100 */
1101 txqs->gen = txq->gen;
1102 txq->unacked += ndesc;
1103 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
1104 txq->unacked &= 31;
1105 txqs->pidx = txq->pidx;
1106 txq->pidx += ndesc;
1107 #ifdef INVARIANTS
1108 if (((txqs->pidx > txq->cidx) &&
1109 (txq->pidx < txqs->pidx) &&
1110 (txq->pidx >= txq->cidx)) ||
1111 ((txqs->pidx < txq->cidx) &&
1112 (txq->pidx >= txq-> cidx)) ||
1113 ((txqs->pidx < txq->cidx) &&
1114 (txq->cidx < txqs->pidx)))
1115 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
1116 txqs->pidx, txq->pidx, txq->cidx);
1117 #endif
1118 if (txq->pidx >= txq->size) {
1119 txq->pidx -= txq->size;
1120 txq->gen ^= 1;
1121 }
1122
1123 }
1124
1125 /**
1126 * calc_tx_descs - calculate the number of Tx descriptors for a packet
1127 * @m: the packet mbufs
1128 * @nsegs: the number of segments
1129 *
1130 * Returns the number of Tx descriptors needed for the given Ethernet
1131 * packet. Ethernet packets require addition of WR and CPL headers.
1132 */
1133 static __inline unsigned int
1134 calc_tx_descs(const struct mbuf *m, int nsegs)
1135 {
1136 unsigned int flits;
1137
1138 if (m->m_pkthdr.len <= PIO_LEN)
1139 return 1;
1140
1141 flits = sgl_len(nsegs) + 2;
1142 #ifdef TSO_SUPPORTED
1143 if (m->m_pkthdr.csum_flags & CSUM_TSO)
1144 flits++;
1145 #endif
1146 return flits_to_desc(flits);
1147 }
1148
1149 static unsigned int
1150 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
1151 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
1152 {
1153 struct mbuf *m0;
1154 int err, pktlen, pass = 0;
1155 bus_dma_tag_t tag = txq->entry_tag;
1156
1157 retry:
1158 err = 0;
1159 m0 = *m;
1160 pktlen = m0->m_pkthdr.len;
1161 #if defined(__i386__) || defined(__amd64__)
1162 if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) {
1163 goto done;
1164 } else
1165 #endif
1166 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0);
1167
1168 if (err == 0) {
1169 goto done;
1170 }
1171 if (err == EFBIG && pass == 0) {
1172 pass = 1;
1173 /* Too many segments, try to defrag */
1174 m0 = m_defrag(m0, M_DONTWAIT);
1175 if (m0 == NULL) {
1176 m_freem(*m);
1177 *m = NULL;
1178 return (ENOBUFS);
1179 }
1180 *m = m0;
1181 goto retry;
1182 } else if (err == ENOMEM) {
1183 return (err);
1184 } if (err) {
1185 if (cxgb_debug)
1186 printf("map failure err=%d pktlen=%d\n", err, pktlen);
1187 m_freem(m0);
1188 *m = NULL;
1189 return (err);
1190 }
1191 done:
1192 #if !defined(__i386__) && !defined(__amd64__)
1193 bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE);
1194 #endif
1195 txsd->flags |= TX_SW_DESC_MAPPED;
1196
1197 return (0);
1198 }
1199
1200 /**
1201 * make_sgl - populate a scatter/gather list for a packet
1202 * @sgp: the SGL to populate
1203 * @segs: the packet dma segments
1204 * @nsegs: the number of segments
1205 *
1206 * Generates a scatter/gather list for the buffers that make up a packet
1207 * and returns the SGL size in 8-byte words. The caller must size the SGL
1208 * appropriately.
1209 */
1210 static __inline void
1211 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1212 {
1213 int i, idx;
1214
1215 for (idx = 0, i = 0; i < nsegs; i++) {
1216 /*
1217 * firmware doesn't like empty segments
1218 */
1219 if (segs[i].ds_len == 0)
1220 continue;
1221 if (i && idx == 0)
1222 ++sgp;
1223
1224 sgp->len[idx] = htobe32(segs[i].ds_len);
1225 sgp->addr[idx] = htobe64(segs[i].ds_addr);
1226 idx ^= 1;
1227 }
1228
1229 if (idx) {
1230 sgp->len[idx] = 0;
1231 sgp->addr[idx] = 0;
1232 }
1233 }
1234
1235 /**
1236 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1237 * @adap: the adapter
1238 * @q: the Tx queue
1239 *
1240 * Ring the doorbell if a Tx queue is asleep. There is a natural race,
1241 * where the HW is going to sleep just after we checked, however,
1242 * then the interrupt handler will detect the outstanding TX packet
1243 * and ring the doorbell for us.
1244 *
1245 * When GTS is disabled we unconditionally ring the doorbell.
1246 */
1247 static __inline void
1248 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1249 {
1250 #if USE_GTS
1251 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1252 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1253 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1254 #ifdef T3_TRACE
1255 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1256 q->cntxt_id);
1257 #endif
1258 t3_write_reg(adap, A_SG_KDOORBELL,
1259 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1260 }
1261 #else
1262 wmb(); /* write descriptors before telling HW */
1263 t3_write_reg(adap, A_SG_KDOORBELL,
1264 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1265 #endif
1266 }
1267
1268 static __inline void
1269 wr_gen2(struct tx_desc *d, unsigned int gen)
1270 {
1271 #if SGE_NUM_GENBITS == 2
1272 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1273 #endif
1274 }
1275
1276 /**
1277 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1278 * @ndesc: number of Tx descriptors spanned by the SGL
1279 * @txd: first Tx descriptor to be written
1280 * @txqs: txq state (generation and producer index)
1281 * @txq: the SGE Tx queue
1282 * @sgl: the SGL
1283 * @flits: number of flits to the start of the SGL in the first descriptor
1284 * @sgl_flits: the SGL size in flits
1285 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1286 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1287 *
1288 * Write a work request header and an associated SGL. If the SGL is
1289 * small enough to fit into one Tx descriptor it has already been written
1290 * and we just need to write the WR header. Otherwise we distribute the
1291 * SGL across the number of descriptors it spans.
1292 */
1293 static void
1294 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1295 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1296 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1297 {
1298
1299 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1300 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1301
1302 if (__predict_true(ndesc == 1)) {
1303 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1304 V_WR_SGLSFLT(flits)) | wr_hi,
1305 htonl(V_WR_LEN(flits + sgl_flits) |
1306 V_WR_GEN(txqs->gen)) | wr_lo);
1307 /* XXX gen? */
1308 wr_gen2(txd, txqs->gen);
1309
1310 } else {
1311 unsigned int ogen = txqs->gen;
1312 const uint64_t *fp = (const uint64_t *)sgl;
1313 struct work_request_hdr *wp = wrp;
1314
1315 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1316 V_WR_SGLSFLT(flits)) | wr_hi;
1317
1318 while (sgl_flits) {
1319 unsigned int avail = WR_FLITS - flits;
1320
1321 if (avail > sgl_flits)
1322 avail = sgl_flits;
1323 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1324 sgl_flits -= avail;
1325 ndesc--;
1326 if (!sgl_flits)
1327 break;
1328
1329 fp += avail;
1330 txd++;
1331 txsd++;
1332 if (++txqs->pidx == txq->size) {
1333 txqs->pidx = 0;
1334 txqs->gen ^= 1;
1335 txd = txq->desc;
1336 txsd = txq->sdesc;
1337 }
1338
1339 /*
1340 * when the head of the mbuf chain
1341 * is freed all clusters will be freed
1342 * with it
1343 */
1344 wrp = (struct work_request_hdr *)txd;
1345 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
1346 V_WR_SGLSFLT(1)) | wr_hi;
1347 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
1348 sgl_flits + 1)) |
1349 V_WR_GEN(txqs->gen)) | wr_lo;
1350 wr_gen2(txd, txqs->gen);
1351 flits = 1;
1352 }
1353 wrp->wrh_hi |= htonl(F_WR_EOP);
1354 wmb();
1355 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1356 wr_gen2((struct tx_desc *)wp, ogen);
1357 }
1358 }
1359
1360 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1361 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1362
1363 #ifdef VLAN_SUPPORTED
1364 #define GET_VTAG(cntrl, m) \
1365 do { \
1366 if ((m)->m_flags & M_VLANTAG) \
1367 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1368 } while (0)
1369
1370 #else
1371 #define GET_VTAG(cntrl, m)
1372 #endif
1373
1374 static int
1375 t3_encap(struct sge_qset *qs, struct mbuf **m)
1376 {
1377 adapter_t *sc;
1378 struct mbuf *m0;
1379 struct sge_txq *txq;
1380 struct txq_state txqs;
1381 struct port_info *pi;
1382 unsigned int ndesc, flits, cntrl, mlen;
1383 int err, nsegs, tso_info = 0;
1384
1385 struct work_request_hdr *wrp;
1386 struct tx_sw_desc *txsd;
1387 struct sg_ent *sgp, *sgl;
1388 uint32_t wr_hi, wr_lo, sgl_flits;
1389 bus_dma_segment_t segs[TX_MAX_SEGS];
1390
1391 struct tx_desc *txd;
1392
1393 pi = qs->port;
1394 sc = pi->adapter;
1395 txq = &qs->txq[TXQ_ETH];
1396 txd = &txq->desc[txq->pidx];
1397 txsd = &txq->sdesc[txq->pidx];
1398 sgl = txq->txq_sgl;
1399
1400 prefetch(txd);
1401 m0 = *m;
1402
1403 DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset);
1404 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan);
1405
1406 mtx_assert(&qs->lock, MA_OWNED);
1407 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1408 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
1409
1410 #ifdef VLAN_SUPPORTED
1411 if (m0->m_nextpkt == NULL && m0->m_next != NULL &&
1412 m0->m_pkthdr.csum_flags & (CSUM_TSO))
1413 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1414 #endif
1415 if (m0->m_nextpkt != NULL) {
1416 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
1417 ndesc = 1;
1418 mlen = 0;
1419 } else {
1420 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
1421 &m0, segs, &nsegs))) {
1422 if (cxgb_debug)
1423 printf("failed ... err=%d\n", err);
1424 return (err);
1425 }
1426 mlen = m0->m_pkthdr.len;
1427 ndesc = calc_tx_descs(m0, nsegs);
1428 }
1429 txq_prod(txq, ndesc, &txqs);
1430
1431 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
1432 txsd->m = m0;
1433
1434 if (m0->m_nextpkt != NULL) {
1435 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1436 int i, fidx;
1437
1438 if (nsegs > 7)
1439 panic("trying to coalesce %d packets in to one WR", nsegs);
1440 txq->txq_coalesced += nsegs;
1441 wrp = (struct work_request_hdr *)txd;
1442 flits = nsegs*2 + 1;
1443
1444 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
1445 struct cpl_tx_pkt_batch_entry *cbe;
1446 uint64_t flit;
1447 uint32_t *hflit = (uint32_t *)&flit;
1448 int cflags = m0->m_pkthdr.csum_flags;
1449
1450 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1451 GET_VTAG(cntrl, m0);
1452 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1453 if (__predict_false(!(cflags & CSUM_IP)))
1454 cntrl |= F_TXPKT_IPCSUM_DIS;
1455 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP))))
1456 cntrl |= F_TXPKT_L4CSUM_DIS;
1457
1458 hflit[0] = htonl(cntrl);
1459 hflit[1] = htonl(segs[i].ds_len | 0x80000000);
1460 flit |= htobe64(1 << 24);
1461 cbe = &cpl_batch->pkt_entry[i];
1462 cbe->cntrl = hflit[0];
1463 cbe->len = hflit[1];
1464 cbe->addr = htobe64(segs[i].ds_addr);
1465 }
1466
1467 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1468 V_WR_SGLSFLT(flits)) |
1469 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1470 wr_lo = htonl(V_WR_LEN(flits) |
1471 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1472 set_wr_hdr(wrp, wr_hi, wr_lo);
1473 wmb();
1474 wr_gen2(txd, txqs.gen);
1475 check_ring_tx_db(sc, txq);
1476 return (0);
1477 } else if (tso_info) {
1478 int min_size = TCPPKTHDRSIZE, eth_type, tagged;
1479 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1480 struct ip *ip;
1481 struct tcphdr *tcp;
1482 char *pkthdr;
1483
1484 txd->flit[2] = 0;
1485 GET_VTAG(cntrl, m0);
1486 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1487 hdr->cntrl = htonl(cntrl);
1488 hdr->len = htonl(mlen | 0x80000000);
1489
1490 DPRINTF("tso buf len=%d\n", mlen);
1491
1492 tagged = m0->m_flags & M_VLANTAG;
1493 if (!tagged)
1494 min_size -= ETHER_VLAN_ENCAP_LEN;
1495
1496 if (__predict_false(mlen < min_size)) {
1497 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1498 m0, mlen, m0->m_pkthdr.tso_segsz,
1499 m0->m_pkthdr.csum_flags, m0->m_flags);
1500 panic("tx tso packet too small");
1501 }
1502
1503 /* Make sure that ether, ip, tcp headers are all in m0 */
1504 if (__predict_false(m0->m_len < min_size)) {
1505 m0 = m_pullup(m0, min_size);
1506 if (__predict_false(m0 == NULL)) {
1507 /* XXX panic probably an overreaction */
1508 panic("couldn't fit header into mbuf");
1509 }
1510 }
1511 pkthdr = m0->m_data;
1512
1513 if (tagged) {
1514 eth_type = CPL_ETH_II_VLAN;
1515 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1516 ETHER_VLAN_ENCAP_LEN);
1517 } else {
1518 eth_type = CPL_ETH_II;
1519 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1520 }
1521 tcp = (struct tcphdr *)((uint8_t *)ip +
1522 sizeof(*ip));
1523
1524 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1525 V_LSO_IPHDR_WORDS(ip->ip_hl) |
1526 V_LSO_TCPHDR_WORDS(tcp->th_off);
1527 hdr->lso_info = htonl(tso_info);
1528
1529 if (__predict_false(mlen <= PIO_LEN)) {
1530 /* pkt not undersized but fits in PIO_LEN
1531 * Indicates a TSO bug at the higher levels.
1532 *
1533 */
1534 DPRINTF("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1535 m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags);
1536 txsd->m = NULL;
1537 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1538 flits = (mlen + 7) / 8 + 3;
1539 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1540 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1541 F_WR_SOP | F_WR_EOP | txqs.compl);
1542 wr_lo = htonl(V_WR_LEN(flits) |
1543 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1544 set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
1545 wmb();
1546 wr_gen2(txd, txqs.gen);
1547 check_ring_tx_db(sc, txq);
1548 return (0);
1549 }
1550 flits = 3;
1551 } else {
1552 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1553
1554 GET_VTAG(cntrl, m0);
1555 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1556 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1557 cntrl |= F_TXPKT_IPCSUM_DIS;
1558 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1559 cntrl |= F_TXPKT_L4CSUM_DIS;
1560 cpl->cntrl = htonl(cntrl);
1561 cpl->len = htonl(mlen | 0x80000000);
1562
1563 if (mlen <= PIO_LEN) {
1564 txsd->m = NULL;
1565 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1566 flits = (mlen + 7) / 8 + 2;
1567
1568 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1569 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1570 F_WR_SOP | F_WR_EOP | txqs.compl);
1571 wr_lo = htonl(V_WR_LEN(flits) |
1572 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1573 set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
1574 wmb();
1575 wr_gen2(txd, txqs.gen);
1576 check_ring_tx_db(sc, txq);
1577 return (0);
1578 }
1579 flits = 2;
1580 }
1581 wrp = (struct work_request_hdr *)txd;
1582 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1583 make_sgl(sgp, segs, nsegs);
1584
1585 sgl_flits = sgl_len(nsegs);
1586
1587 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
1588 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1589 wr_lo = htonl(V_WR_TID(txq->token));
1590 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
1591 sgl_flits, wr_hi, wr_lo);
1592 check_ring_tx_db(pi->adapter, txq);
1593
1594 return (0);
1595 }
1596
1597 void
1598 cxgb_tx_watchdog(void *arg)
1599 {
1600 struct sge_qset *qs = arg;
1601 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1602
1603 if (qs->coalescing != 0 &&
1604 (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
1605 TXQ_RING_EMPTY(qs))
1606 qs->coalescing = 0;
1607 else if (qs->coalescing == 0 &&
1608 (txq->in_use >= cxgb_tx_coalesce_enable_start))
1609 qs->coalescing = 1;
1610 if (TXQ_TRYLOCK(qs)) {
1611 qs->qs_flags |= QS_FLUSHING;
1612 cxgb_start_locked(qs);
1613 qs->qs_flags &= ~QS_FLUSHING;
1614 TXQ_UNLOCK(qs);
1615 }
1616 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
1617 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
1618 qs, txq->txq_watchdog.c_cpu);
1619 }
1620
1621 static void
1622 cxgb_tx_timeout(void *arg)
1623 {
1624 struct sge_qset *qs = arg;
1625 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1626
1627 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
1628 qs->coalescing = 1;
1629 if (TXQ_TRYLOCK(qs)) {
1630 qs->qs_flags |= QS_TIMEOUT;
1631 cxgb_start_locked(qs);
1632 qs->qs_flags &= ~QS_TIMEOUT;
1633 TXQ_UNLOCK(qs);
1634 }
1635 }
1636
1637 static void
1638 cxgb_start_locked(struct sge_qset *qs)
1639 {
1640 struct mbuf *m_head = NULL;
1641 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1642 int avail, txmax;
1643 int in_use_init = txq->in_use;
1644 struct port_info *pi = qs->port;
1645 struct ifnet *ifp = pi->ifp;
1646 avail = txq->size - txq->in_use - 4;
1647 txmax = min(TX_START_MAX_DESC, avail);
1648
1649 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
1650 reclaim_completed_tx(qs, 0, TXQ_ETH);
1651
1652 if (!pi->link_config.link_ok) {
1653 TXQ_RING_FLUSH(qs);
1654 return;
1655 }
1656 TXQ_LOCK_ASSERT(qs);
1657 while ((txq->in_use - in_use_init < txmax) &&
1658 !TXQ_RING_EMPTY(qs) &&
1659 (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
1660 pi->link_config.link_ok) {
1661 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1662
1663 if ((m_head = cxgb_dequeue(qs)) == NULL)
1664 break;
1665 /*
1666 * Encapsulation can modify our pointer, and or make it
1667 * NULL on failure. In that event, we can't requeue.
1668 */
1669 if (t3_encap(qs, &m_head) || m_head == NULL)
1670 break;
1671
1672 /* Send a copy of the frame to the BPF listener */
1673 ETHER_BPF_MTAP(ifp, m_head);
1674
1675 /*
1676 * We sent via PIO, no longer need a copy
1677 */
1678 if (m_head->m_nextpkt == NULL &&
1679 m_head->m_pkthdr.len <= PIO_LEN)
1680 m_freem(m_head);
1681
1682 m_head = NULL;
1683 }
1684 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
1685 pi->link_config.link_ok)
1686 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1687 qs, txq->txq_timer.c_cpu);
1688 if (m_head != NULL)
1689 m_freem(m_head);
1690 }
1691
1692 static int
1693 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
1694 {
1695 struct port_info *pi = qs->port;
1696 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1697 struct buf_ring *br = txq->txq_mr;
1698 int error, avail;
1699
1700 avail = txq->size - txq->in_use;
1701 TXQ_LOCK_ASSERT(qs);
1702
1703 /*
1704 * We can only do a direct transmit if the following are true:
1705 * - we aren't coalescing (ring < 3/4 full)
1706 * - the link is up -- checked in caller
1707 * - there are no packets enqueued already
1708 * - there is space in hardware transmit queue
1709 */
1710 if (check_pkt_coalesce(qs) == 0 &&
1711 TXQ_RING_EMPTY(qs) && avail > 4) {
1712 if (t3_encap(qs, &m)) {
1713 if (m != NULL &&
1714 (error = drbr_enqueue(ifp, br, m)) != 0)
1715 return (error);
1716 } else {
1717 /*
1718 * We've bypassed the buf ring so we need to update
1719 * the stats directly
1720 */
1721 txq->txq_direct_packets++;
1722 txq->txq_direct_bytes += m->m_pkthdr.len;
1723 /*
1724 ** Send a copy of the frame to the BPF
1725 ** listener and set the watchdog on.
1726 */
1727 ETHER_BPF_MTAP(ifp, m);
1728 /*
1729 * We sent via PIO, no longer need a copy
1730 */
1731 if (m->m_pkthdr.len <= PIO_LEN)
1732 m_freem(m);
1733
1734 }
1735 } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
1736 return (error);
1737
1738 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1739 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
1740 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
1741 cxgb_start_locked(qs);
1742 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
1743 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1744 qs, txq->txq_timer.c_cpu);
1745 return (0);
1746 }
1747
1748 int
1749 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
1750 {
1751 struct sge_qset *qs;
1752 struct port_info *pi = ifp->if_softc;
1753 int error, qidx = pi->first_qset;
1754
1755 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
1756 ||(!pi->link_config.link_ok)) {
1757 m_freem(m);
1758 return (0);
1759 }
1760
1761 if (m->m_flags & M_FLOWID)
1762 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
1763
1764 qs = &pi->adapter->sge.qs[qidx];
1765
1766 if (TXQ_TRYLOCK(qs)) {
1767 /* XXX running */
1768 error = cxgb_transmit_locked(ifp, qs, m);
1769 TXQ_UNLOCK(qs);
1770 } else
1771 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
1772 return (error);
1773 }
1774 void
1775 cxgb_start(struct ifnet *ifp)
1776 {
1777 struct port_info *pi = ifp->if_softc;
1778 struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset];
1779
1780 if (!pi->link_config.link_ok)
1781 return;
1782
1783 TXQ_LOCK(qs);
1784 cxgb_start_locked(qs);
1785 TXQ_UNLOCK(qs);
1786 }
1787
1788 void
1789 cxgb_qflush(struct ifnet *ifp)
1790 {
1791 /*
1792 * flush any enqueued mbufs in the buf_rings
1793 * and in the transmit queues
1794 * no-op for now
1795 */
1796 return;
1797 }
1798
1799 /**
1800 * write_imm - write a packet into a Tx descriptor as immediate data
1801 * @d: the Tx descriptor to write
1802 * @m: the packet
1803 * @len: the length of packet data to write as immediate data
1804 * @gen: the generation bit value to write
1805 *
1806 * Writes a packet as immediate data into a Tx descriptor. The packet
1807 * contains a work request at its beginning. We must write the packet
1808 * carefully so the SGE doesn't read accidentally before it's written in
1809 * its entirety.
1810 */
1811 static __inline void
1812 write_imm(struct tx_desc *d, struct mbuf *m,
1813 unsigned int len, unsigned int gen)
1814 {
1815 struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1816 struct work_request_hdr *to = (struct work_request_hdr *)d;
1817 uint32_t wr_hi, wr_lo;
1818
1819 if (len > WR_LEN)
1820 panic("len too big %d\n", len);
1821 if (len < sizeof(*from))
1822 panic("len too small %d", len);
1823
1824 memcpy(&to[1], &from[1], len - sizeof(*from));
1825 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
1826 V_WR_BCNTLFLT(len & 7));
1827 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) |
1828 V_WR_LEN((len + 7) / 8));
1829 set_wr_hdr(to, wr_hi, wr_lo);
1830 wmb();
1831 wr_gen2(d, gen);
1832
1833 /*
1834 * This check is a hack we should really fix the logic so
1835 * that this can't happen
1836 */
1837 if (m->m_type != MT_DONTFREE)
1838 m_freem(m);
1839
1840 }
1841
1842 /**
1843 * check_desc_avail - check descriptor availability on a send queue
1844 * @adap: the adapter
1845 * @q: the TX queue
1846 * @m: the packet needing the descriptors
1847 * @ndesc: the number of Tx descriptors needed
1848 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1849 *
1850 * Checks if the requested number of Tx descriptors is available on an
1851 * SGE send queue. If the queue is already suspended or not enough
1852 * descriptors are available the packet is queued for later transmission.
1853 * Must be called with the Tx queue locked.
1854 *
1855 * Returns 0 if enough descriptors are available, 1 if there aren't
1856 * enough descriptors and the packet has been queued, and 2 if the caller
1857 * needs to retry because there weren't enough descriptors at the
1858 * beginning of the call but some freed up in the mean time.
1859 */
1860 static __inline int
1861 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1862 struct mbuf *m, unsigned int ndesc,
1863 unsigned int qid)
1864 {
1865 /*
1866 * XXX We currently only use this for checking the control queue
1867 * the control queue is only used for binding qsets which happens
1868 * at init time so we are guaranteed enough descriptors
1869 */
1870 if (__predict_false(!mbufq_empty(&q->sendq))) {
1871 addq_exit: mbufq_tail(&q->sendq, m);
1872 return 1;
1873 }
1874 if (__predict_false(q->size - q->in_use < ndesc)) {
1875
1876 struct sge_qset *qs = txq_to_qset(q, qid);
1877
1878 setbit(&qs->txq_stopped, qid);
1879 if (should_restart_tx(q) &&
1880 test_and_clear_bit(qid, &qs->txq_stopped))
1881 return 2;
1882
1883 q->stops++;
1884 goto addq_exit;
1885 }
1886 return 0;
1887 }
1888
1889
1890 /**
1891 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1892 * @q: the SGE control Tx queue
1893 *
1894 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1895 * that send only immediate data (presently just the control queues) and
1896 * thus do not have any mbufs
1897 */
1898 static __inline void
1899 reclaim_completed_tx_imm(struct sge_txq *q)
1900 {
1901 unsigned int reclaim = q->processed - q->cleaned;
1902
1903 q->in_use -= reclaim;
1904 q->cleaned += reclaim;
1905 }
1906
1907 static __inline int
1908 immediate(const struct mbuf *m)
1909 {
1910 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
1911 }
1912
1913 /**
1914 * ctrl_xmit - send a packet through an SGE control Tx queue
1915 * @adap: the adapter
1916 * @q: the control queue
1917 * @m: the packet
1918 *
1919 * Send a packet through an SGE control Tx queue. Packets sent through
1920 * a control queue must fit entirely as immediate data in a single Tx
1921 * descriptor and have no page fragments.
1922 */
1923 static int
1924 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
1925 {
1926 int ret;
1927 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1928 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1929
1930 if (__predict_false(!immediate(m))) {
1931 m_freem(m);
1932 return 0;
1933 }
1934
1935 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
1936 wrp->wrh_lo = htonl(V_WR_TID(q->token));
1937
1938 TXQ_LOCK(qs);
1939 again: reclaim_completed_tx_imm(q);
1940
1941 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1942 if (__predict_false(ret)) {
1943 if (ret == 1) {
1944 TXQ_UNLOCK(qs);
1945 log(LOG_ERR, "no desc available\n");
1946 return (ENOSPC);
1947 }
1948 goto again;
1949 }
1950 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1951
1952 q->in_use++;
1953 if (++q->pidx >= q->size) {
1954 q->pidx = 0;
1955 q->gen ^= 1;
1956 }
1957 TXQ_UNLOCK(qs);
1958 t3_write_reg(adap, A_SG_KDOORBELL,
1959 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1960 return (0);
1961 }
1962
1963
1964 /**
1965 * restart_ctrlq - restart a suspended control queue
1966 * @qs: the queue set cotaining the control queue
1967 *
1968 * Resumes transmission on a suspended Tx control queue.
1969 */
1970 static void
1971 restart_ctrlq(void *data, int npending)
1972 {
1973 struct mbuf *m;
1974 struct sge_qset *qs = (struct sge_qset *)data;
1975 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1976 adapter_t *adap = qs->port->adapter;
1977
1978 log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use);
1979
1980 TXQ_LOCK(qs);
1981 again: reclaim_completed_tx_imm(q);
1982
1983 while (q->in_use < q->size &&
1984 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1985
1986 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1987
1988 if (++q->pidx >= q->size) {
1989 q->pidx = 0;
1990 q->gen ^= 1;
1991 }
1992 q->in_use++;
1993 }
1994 if (!mbufq_empty(&q->sendq)) {
1995 setbit(&qs->txq_stopped, TXQ_CTRL);
1996
1997 if (should_restart_tx(q) &&
1998 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1999 goto again;
2000 q->stops++;
2001 }
2002 TXQ_UNLOCK(qs);
2003 t3_write_reg(adap, A_SG_KDOORBELL,
2004 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2005 }
2006
2007
2008 /*
2009 * Send a management message through control queue 0
2010 */
2011 int
2012 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
2013 {
2014 return ctrl_xmit(adap, &adap->sge.qs[0], m);
2015 }
2016
2017 /**
2018 * free_qset - free the resources of an SGE queue set
2019 * @sc: the controller owning the queue set
2020 * @q: the queue set
2021 *
2022 * Release the HW and SW resources associated with an SGE queue set, such
2023 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
2024 * queue set must be quiesced prior to calling this.
2025 */
2026 static void
2027 t3_free_qset(adapter_t *sc, struct sge_qset *q)
2028 {
2029 int i;
2030
2031 reclaim_completed_tx(q, 0, TXQ_ETH);
2032 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2033 if (q->txq[i].txq_mr != NULL)
2034 buf_ring_free(q->txq[i].txq_mr, M_DEVBUF);
2035 if (q->txq[i].txq_ifq != NULL) {
2036 ifq_delete(q->txq[i].txq_ifq);
2037 free(q->txq[i].txq_ifq, M_DEVBUF);
2038 }
2039 }
2040
2041 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2042 if (q->fl[i].desc) {
2043 mtx_lock_spin(&sc->sge.reg_lock);
2044 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
2045 mtx_unlock_spin(&sc->sge.reg_lock);
2046 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
2047 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
2048 q->fl[i].desc_map);
2049 bus_dma_tag_destroy(q->fl[i].desc_tag);
2050 bus_dma_tag_destroy(q->fl[i].entry_tag);
2051 }
2052 if (q->fl[i].sdesc) {
2053 free_rx_bufs(sc, &q->fl[i]);
2054 free(q->fl[i].sdesc, M_DEVBUF);
2055 }
2056 }
2057
2058 mtx_unlock(&q->lock);
2059 MTX_DESTROY(&q->lock);
2060 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2061 if (q->txq[i].desc) {
2062 mtx_lock_spin(&sc->sge.reg_lock);
2063 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
2064 mtx_unlock_spin(&sc->sge.reg_lock);
2065 bus_dmamap_unload(q->txq[i].desc_tag,
2066 q->txq[i].desc_map);
2067 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
2068 q->txq[i].desc_map);
2069 bus_dma_tag_destroy(q->txq[i].desc_tag);
2070 bus_dma_tag_destroy(q->txq[i].entry_tag);
2071 }
2072 if (q->txq[i].sdesc) {
2073 free(q->txq[i].sdesc, M_DEVBUF);
2074 }
2075 }
2076
2077 if (q->rspq.desc) {
2078 mtx_lock_spin(&sc->sge.reg_lock);
2079 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
2080 mtx_unlock_spin(&sc->sge.reg_lock);
2081
2082 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
2083 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
2084 q->rspq.desc_map);
2085 bus_dma_tag_destroy(q->rspq.desc_tag);
2086 MTX_DESTROY(&q->rspq.lock);
2087 }
2088
2089 #ifdef LRO_SUPPORTED
2090 tcp_lro_free(&q->lro.ctrl);
2091 #endif
2092
2093 bzero(q, sizeof(*q));
2094 }
2095
2096 /**
2097 * t3_free_sge_resources - free SGE resources
2098 * @sc: the adapter softc
2099 *
2100 * Frees resources used by the SGE queue sets.
2101 */
2102 void
2103 t3_free_sge_resources(adapter_t *sc)
2104 {
2105 int i, nqsets;
2106
2107 for (nqsets = i = 0; i < (sc)->params.nports; i++)
2108 nqsets += sc->port[i].nqsets;
2109
2110 for (i = 0; i < nqsets; ++i) {
2111 TXQ_LOCK(&sc->sge.qs[i]);
2112 t3_free_qset(sc, &sc->sge.qs[i]);
2113 }
2114
2115 }
2116
2117 /**
2118 * t3_sge_start - enable SGE
2119 * @sc: the controller softc
2120 *
2121 * Enables the SGE for DMAs. This is the last step in starting packet
2122 * transfers.
2123 */
2124 void
2125 t3_sge_start(adapter_t *sc)
2126 {
2127 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2128 }
2129
2130 /**
2131 * t3_sge_stop - disable SGE operation
2132 * @sc: the adapter
2133 *
2134 * Disables the DMA engine. This can be called in emeregencies (e.g.,
2135 * from error interrupts) or from normal process context. In the latter
2136 * case it also disables any pending queue restart tasklets. Note that
2137 * if it is called in interrupt context it cannot disable the restart
2138 * tasklets as it cannot wait, however the tasklets will have no effect
2139 * since the doorbells are disabled and the driver will call this again
2140 * later from process context, at which time the tasklets will be stopped
2141 * if they are still running.
2142 */
2143 void
2144 t3_sge_stop(adapter_t *sc)
2145 {
2146 int i, nqsets;
2147
2148 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
2149
2150 if (sc->tq == NULL)
2151 return;
2152
2153 for (nqsets = i = 0; i < (sc)->params.nports; i++)
2154 nqsets += sc->port[i].nqsets;
2155 #ifdef notyet
2156 /*
2157 *
2158 * XXX
2159 */
2160 for (i = 0; i < nqsets; ++i) {
2161 struct sge_qset *qs = &sc->sge.qs[i];
2162
2163 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2164 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2165 }
2166 #endif
2167 }
2168
2169 /**
2170 * t3_free_tx_desc - reclaims Tx descriptors and their buffers
2171 * @adapter: the adapter
2172 * @q: the Tx queue to reclaim descriptors from
2173 * @reclaimable: the number of descriptors to reclaim
2174 * @m_vec_size: maximum number of buffers to reclaim
2175 * @desc_reclaimed: returns the number of descriptors reclaimed
2176 *
2177 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
2178 * Tx buffers. Called with the Tx queue lock held.
2179 *
2180 * Returns number of buffers of reclaimed
2181 */
2182 void
2183 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
2184 {
2185 struct tx_sw_desc *txsd;
2186 unsigned int cidx, mask;
2187 struct sge_txq *q = &qs->txq[queue];
2188
2189 #ifdef T3_TRACE
2190 T3_TRACE2(sc->tb[q->cntxt_id & 7],
2191 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
2192 #endif
2193 cidx = q->cidx;
2194 mask = q->size - 1;
2195 txsd = &q->sdesc[cidx];
2196
2197 mtx_assert(&qs->lock, MA_OWNED);
2198 while (reclaimable--) {
2199 prefetch(q->sdesc[(cidx + 1) & mask].m);
2200 prefetch(q->sdesc[(cidx + 2) & mask].m);
2201
2202 if (txsd->m != NULL) {
2203 if (txsd->flags & TX_SW_DESC_MAPPED) {
2204 bus_dmamap_unload(q->entry_tag, txsd->map);
2205 txsd->flags &= ~TX_SW_DESC_MAPPED;
2206 }
2207 m_freem_list(txsd->m);
2208 txsd->m = NULL;
2209 } else
2210 q->txq_skipped++;
2211
2212 ++txsd;
2213 if (++cidx == q->size) {
2214 cidx = 0;
2215 txsd = q->sdesc;
2216 }
2217 }
2218 q->cidx = cidx;
2219
2220 }
2221
2222 /**
2223 * is_new_response - check if a response is newly written
2224 * @r: the response descriptor
2225 * @q: the response queue
2226 *
2227 * Returns true if a response descriptor contains a yet unprocessed
2228 * response.
2229 */
2230 static __inline int
2231 is_new_response(const struct rsp_desc *r,
2232 const struct sge_rspq *q)
2233 {
2234 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2235 }
2236
2237 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2238 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2239 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2240 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2241 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2242
2243 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2244 #define NOMEM_INTR_DELAY 2500
2245
2246 /**
2247 * write_ofld_wr - write an offload work request
2248 * @adap: the adapter
2249 * @m: the packet to send
2250 * @q: the Tx queue
2251 * @pidx: index of the first Tx descriptor to write
2252 * @gen: the generation value to use
2253 * @ndesc: number of descriptors the packet will occupy
2254 *
2255 * Write an offload work request to send the supplied packet. The packet
2256 * data already carry the work request with most fields populated.
2257 */
2258 static void
2259 write_ofld_wr(adapter_t *adap, struct mbuf *m,
2260 struct sge_txq *q, unsigned int pidx,
2261 unsigned int gen, unsigned int ndesc,
2262 bus_dma_segment_t *segs, unsigned int nsegs)
2263 {
2264 unsigned int sgl_flits, flits;
2265 struct work_request_hdr *from;
2266 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
2267 struct tx_desc *d = &q->desc[pidx];
2268 struct txq_state txqs;
2269
2270 if (immediate(m) && nsegs == 0) {
2271 write_imm(d, m, m->m_len, gen);
2272 return;
2273 }
2274
2275 /* Only TX_DATA builds SGLs */
2276 from = mtod(m, struct work_request_hdr *);
2277 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
2278
2279 flits = m->m_len / 8;
2280 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
2281
2282 make_sgl(sgp, segs, nsegs);
2283 sgl_flits = sgl_len(nsegs);
2284
2285 txqs.gen = gen;
2286 txqs.pidx = pidx;
2287 txqs.compl = 0;
2288
2289 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
2290 from->wrh_hi, from->wrh_lo);
2291 }
2292
2293 /**
2294 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
2295 * @m: the packet
2296 *
2297 * Returns the number of Tx descriptors needed for the given offload
2298 * packet. These packets are already fully constructed.
2299 */
2300 static __inline unsigned int
2301 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
2302 {
2303 unsigned int flits, cnt = 0;
2304 int ndescs;
2305
2306 if (m->m_len <= WR_LEN && nsegs == 0)
2307 return (1); /* packet fits as immediate data */
2308
2309 /*
2310 * This needs to be re-visited for TOE
2311 */
2312
2313 cnt = nsegs;
2314
2315 /* headers */
2316 flits = m->m_len / 8;
2317
2318 ndescs = flits_to_desc(flits + sgl_len(cnt));
2319
2320 return (ndescs);
2321 }
2322
2323 /**
2324 * ofld_xmit - send a packet through an offload queue
2325 * @adap: the adapter
2326 * @q: the Tx offload queue
2327 * @m: the packet
2328 *
2329 * Send an offload packet through an SGE offload queue.
2330 */
2331 static int
2332 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
2333 {
2334 int ret, nsegs;
2335 unsigned int ndesc;
2336 unsigned int pidx, gen;
2337 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2338 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
2339 struct tx_sw_desc *stx;
2340
2341 nsegs = m_get_sgllen(m);
2342 vsegs = m_get_sgl(m);
2343 ndesc = calc_tx_descs_ofld(m, nsegs);
2344 busdma_map_sgl(vsegs, segs, nsegs);
2345
2346 stx = &q->sdesc[q->pidx];
2347
2348 TXQ_LOCK(qs);
2349 again: reclaim_completed_tx(qs, 16, TXQ_OFLD);
2350 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2351 if (__predict_false(ret)) {
2352 if (ret == 1) {
2353 printf("no ofld desc avail\n");
2354
2355 m_set_priority(m, ndesc); /* save for restart */
2356 TXQ_UNLOCK(qs);
2357 return (EINTR);
2358 }
2359 goto again;
2360 }
2361
2362 gen = q->gen;
2363 q->in_use += ndesc;
2364 pidx = q->pidx;
2365 q->pidx += ndesc;
2366 if (q->pidx >= q->size) {
2367 q->pidx -= q->size;
2368 q->gen ^= 1;
2369 }
2370 #ifdef T3_TRACE
2371 T3_TRACE5(adap->tb[q->cntxt_id & 7],
2372 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
2373 ndesc, pidx, skb->len, skb->len - skb->data_len,
2374 skb_shinfo(skb)->nr_frags);
2375 #endif
2376 TXQ_UNLOCK(qs);
2377
2378 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2379 check_ring_tx_db(adap, q);
2380 return (0);
2381 }
2382
2383 /**
2384 * restart_offloadq - restart a suspended offload queue
2385 * @qs: the queue set cotaining the offload queue
2386 *
2387 * Resumes transmission on a suspended Tx offload queue.
2388 */
2389 static void
2390 restart_offloadq(void *data, int npending)
2391 {
2392 struct mbuf *m;
2393 struct sge_qset *qs = data;
2394 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2395 adapter_t *adap = qs->port->adapter;
2396 bus_dma_segment_t segs[TX_MAX_SEGS];
2397 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
2398 int nsegs, cleaned;
2399
2400 TXQ_LOCK(qs);
2401 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
2402
2403 while ((m = mbufq_peek(&q->sendq)) != NULL) {
2404 unsigned int gen, pidx;
2405 unsigned int ndesc = m_get_priority(m);
2406
2407 if (__predict_false(q->size - q->in_use < ndesc)) {
2408 setbit(&qs->txq_stopped, TXQ_OFLD);
2409 if (should_restart_tx(q) &&
2410 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2411 goto again;
2412 q->stops++;
2413 break;
2414 }
2415
2416 gen = q->gen;
2417 q->in_use += ndesc;
2418 pidx = q->pidx;
2419 q->pidx += ndesc;
2420 if (q->pidx >= q->size) {
2421 q->pidx -= q->size;
2422 q->gen ^= 1;
2423 }
2424
2425 (void)mbufq_dequeue(&q->sendq);
2426 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
2427 TXQ_UNLOCK(qs);
2428 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2429 TXQ_LOCK(qs);
2430 }
2431 #if USE_GTS
2432 set_bit(TXQ_RUNNING, &q->flags);
2433 set_bit(TXQ_LAST_PKT_DB, &q->flags);
2434 #endif
2435 TXQ_UNLOCK(qs);
2436 wmb();
2437 t3_write_reg(adap, A_SG_KDOORBELL,
2438 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2439 }
2440
2441 /**
2442 * queue_set - return the queue set a packet should use
2443 * @m: the packet
2444 *
2445 * Maps a packet to the SGE queue set it should use. The desired queue
2446 * set is carried in bits 1-3 in the packet's priority.
2447 */
2448 static __inline int
2449 queue_set(const struct mbuf *m)
2450 {
2451 return m_get_priority(m) >> 1;
2452 }
2453
2454 /**
2455 * is_ctrl_pkt - return whether an offload packet is a control packet
2456 * @m: the packet
2457 *
2458 * Determines whether an offload packet should use an OFLD or a CTRL
2459 * Tx queue. This is indicated by bit 0 in the packet's priority.
2460 */
2461 static __inline int
2462 is_ctrl_pkt(const struct mbuf *m)
2463 {
2464 return m_get_priority(m) & 1;
2465 }
2466
2467 /**
2468 * t3_offload_tx - send an offload packet
2469 * @tdev: the offload device to send to
2470 * @m: the packet
2471 *
2472 * Sends an offload packet. We use the packet priority to select the
2473 * appropriate Tx queue as follows: bit 0 indicates whether the packet
2474 * should be sent as regular or control, bits 1-3 select the queue set.
2475 */
2476 int
2477 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
2478 {
2479 adapter_t *adap = tdev2adap(tdev);
2480 struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2481
2482 if (__predict_false(is_ctrl_pkt(m)))
2483 return ctrl_xmit(adap, qs, m);
2484
2485 return ofld_xmit(adap, qs, m);
2486 }
2487
2488 /**
2489 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2490 * @tdev: the offload device that will be receiving the packets
2491 * @q: the SGE response queue that assembled the bundle
2492 * @m: the partial bundle
2493 * @n: the number of packets in the bundle
2494 *
2495 * Delivers a (partial) bundle of Rx offload packets to an offload device.
2496 */
2497 static __inline void
2498 deliver_partial_bundle(struct t3cdev *tdev,
2499 struct sge_rspq *q,
2500 struct mbuf *mbufs[], int n)
2501 {
2502 if (n) {
2503 q->offload_bundles++;
2504 cxgb_ofld_recv(tdev, mbufs, n);
2505 }
2506 }
2507
2508 static __inline int
2509 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
2510 struct mbuf *m, struct mbuf *rx_gather[],
2511 unsigned int gather_idx)
2512 {
2513
2514 rq->offload_pkts++;
2515 m->m_pkthdr.header = mtod(m, void *);
2516 rx_gather[gather_idx++] = m;
2517 if (gather_idx == RX_BUNDLE_SIZE) {
2518 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2519 gather_idx = 0;
2520 rq->offload_bundles++;
2521 }
2522 return (gather_idx);
2523 }
2524
2525 static void
2526 restart_tx(struct sge_qset *qs)
2527 {
2528 struct adapter *sc = qs->port->adapter;
2529
2530
2531 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2532 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2533 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2534 qs->txq[TXQ_OFLD].restarts++;
2535 DPRINTF("restarting TXQ_OFLD\n");
2536 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2537 }
2538 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
2539 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
2540 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
2541 qs->txq[TXQ_CTRL].in_use);
2542
2543 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2544 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2545 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2546 qs->txq[TXQ_CTRL].restarts++;
2547 DPRINTF("restarting TXQ_CTRL\n");
2548 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2549 }
2550 }
2551
2552 /**
2553 * t3_sge_alloc_qset - initialize an SGE queue set
2554 * @sc: the controller softc
2555 * @id: the queue set id
2556 * @nports: how many Ethernet ports will be using this queue set
2557 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2558 * @p: configuration parameters for this queue set
2559 * @ntxq: number of Tx queues for the queue set
2560 * @pi: port info for queue set
2561 *
2562 * Allocate resources and initialize an SGE queue set. A queue set
2563 * comprises a response queue, two Rx free-buffer queues, and up to 3
2564 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2565 * queue, offload queue, and control queue.
2566 */
2567 int
2568 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2569 const struct qset_params *p, int ntxq, struct port_info *pi)
2570 {
2571 struct sge_qset *q = &sc->sge.qs[id];
2572 int i, ret = 0;
2573
2574 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
2575 q->port = pi;
2576
2577 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2578
2579 if ((q->txq[i].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
2580 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
2581 device_printf(sc->dev, "failed to allocate mbuf ring\n");
2582 goto err;
2583 }
2584 if ((q->txq[i].txq_ifq =
2585 malloc(sizeof(struct ifaltq), M_DEVBUF, M_NOWAIT|M_ZERO))
2586 == NULL) {
2587 device_printf(sc->dev, "failed to allocate ifq\n");
2588 goto err;
2589 }
2590 ifq_init(q->txq[i].txq_ifq, pi->ifp);
2591 callout_init(&q->txq[i].txq_timer, 1);
2592 callout_init(&q->txq[i].txq_watchdog, 1);
2593 q->txq[i].txq_timer.c_cpu = id % mp_ncpus;
2594 q->txq[i].txq_watchdog.c_cpu = id % mp_ncpus;
2595 }
2596 init_qset_cntxt(q, id);
2597 q->idx = id;
2598 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2599 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2600 &q->fl[0].desc, &q->fl[0].sdesc,
2601 &q->fl[0].desc_tag, &q->fl[0].desc_map,
2602 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2603 printf("error %d from alloc ring fl0\n", ret);
2604 goto err;
2605 }
2606
2607 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2608 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2609 &q->fl[1].desc, &q->fl[1].sdesc,
2610 &q->fl[1].desc_tag, &q->fl[1].desc_map,
2611 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2612 printf("error %d from alloc ring fl1\n", ret);
2613 goto err;
2614 }
2615
2616 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2617 &q->rspq.phys_addr, &q->rspq.desc, NULL,
2618 &q->rspq.desc_tag, &q->rspq.desc_map,
2619 NULL, NULL)) != 0) {
2620 printf("error %d from alloc ring rspq\n", ret);
2621 goto err;
2622 }
2623
2624 for (i = 0; i < ntxq; ++i) {
2625 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2626
2627 if ((ret = alloc_ring(sc, p->txq_size[i],
2628 sizeof(struct tx_desc), sz,
2629 &q->txq[i].phys_addr, &q->txq[i].desc,
2630 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2631 &q->txq[i].desc_map,
2632 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2633 printf("error %d from alloc ring tx %i\n", ret, i);
2634 goto err;
2635 }
2636 mbufq_init(&q->txq[i].sendq);
2637 q->txq[i].gen = 1;
2638 q->txq[i].size = p->txq_size[i];
2639 }
2640
2641 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2642 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2643 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2644 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2645
2646 q->fl[0].gen = q->fl[1].gen = 1;
2647 q->fl[0].size = p->fl_size;
2648 q->fl[1].size = p->jumbo_size;
2649
2650 q->rspq.gen = 1;
2651 q->rspq.cidx = 0;
2652 q->rspq.size = p->rspq_size;
2653
2654 q->txq[TXQ_ETH].stop_thres = nports *
2655 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2656
2657 q->fl[0].buf_size = MCLBYTES;
2658 q->fl[0].zone = zone_pack;
2659 q->fl[0].type = EXT_PACKET;
2660 #if __FreeBSD_version > 800000
2661 if (cxgb_use_16k_clusters) {
2662 q->fl[1].buf_size = MJUM16BYTES;
2663 q->fl[1].zone = zone_jumbo16;
2664 q->fl[1].type = EXT_JUMBO16;
2665 } else {
2666 q->fl[1].buf_size = MJUM9BYTES;
2667 q->fl[1].zone = zone_jumbo9;
2668 q->fl[1].type = EXT_JUMBO9;
2669 }
2670 #else
2671 q->fl[1].buf_size = MJUMPAGESIZE;
2672 q->fl[1].zone = zone_jumbop;
2673 q->fl[1].type = EXT_JUMBOP;
2674 #endif
2675
2676 #ifdef LRO_SUPPORTED
2677 /* Allocate and setup the lro_ctrl structure */
2678 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
2679 ret = tcp_lro_init(&q->lro.ctrl);
2680 if (ret) {
2681 printf("error %d from tcp_lro_init\n", ret);
2682 goto err;
2683 }
2684 q->lro.ctrl.ifp = pi->ifp;
2685 #endif
2686
2687 mtx_lock_spin(&sc->sge.reg_lock);
2688 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2689 q->rspq.phys_addr, q->rspq.size,
2690 q->fl[0].buf_size, 1, 0);
2691 if (ret) {
2692 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2693 goto err_unlock;
2694 }
2695
2696 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2697 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2698 q->fl[i].phys_addr, q->fl[i].size,
2699 q->fl[i].buf_size, p->cong_thres, 1,
2700 0);
2701 if (ret) {
2702 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2703 goto err_unlock;
2704 }
2705 }
2706
2707 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2708 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2709 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2710 1, 0);
2711 if (ret) {
2712 printf("error %d from t3_sge_init_ecntxt\n", ret);
2713 goto err_unlock;
2714 }
2715
2716 if (ntxq > 1) {
2717 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2718 USE_GTS, SGE_CNTXT_OFLD, id,
2719 q->txq[TXQ_OFLD].phys_addr,
2720 q->txq[TXQ_OFLD].size, 0, 1, 0);
2721 if (ret) {
2722 printf("error %d from t3_sge_init_ecntxt\n", ret);
2723 goto err_unlock;
2724 }
2725 }
2726
2727 if (ntxq > 2) {
2728 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2729 SGE_CNTXT_CTRL, id,
2730 q->txq[TXQ_CTRL].phys_addr,
2731 q->txq[TXQ_CTRL].size,
2732 q->txq[TXQ_CTRL].token, 1, 0);
2733 if (ret) {
2734 printf("error %d from t3_sge_init_ecntxt\n", ret);
2735 goto err_unlock;
2736 }
2737 }
2738
2739 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2740 device_get_unit(sc->dev), irq_vec_idx);
2741 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2742
2743 mtx_unlock_spin(&sc->sge.reg_lock);
2744 t3_update_qset_coalesce(q, p);
2745 q->port = pi;
2746
2747 refill_fl(sc, &q->fl[0], q->fl[0].size);
2748 refill_fl(sc, &q->fl[1], q->fl[1].size);
2749 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2750
2751 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2752 V_NEWTIMER(q->rspq.holdoff_tmr));
2753
2754 return (0);
2755
2756 err_unlock:
2757 mtx_unlock_spin(&sc->sge.reg_lock);
2758 err:
2759 TXQ_LOCK(q);
2760 t3_free_qset(sc, q);
2761
2762 return (ret);
2763 }
2764
2765 /*
2766 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2767 * ethernet data. Hardware assistance with various checksums and any vlan tag
2768 * will also be taken into account here.
2769 */
2770 void
2771 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2772 {
2773 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2774 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2775 struct ifnet *ifp = pi->ifp;
2776
2777 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2778
2779 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2780 cpl->csum_valid && cpl->csum == 0xffff) {
2781 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2782 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2783 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2784 m->m_pkthdr.csum_data = 0xffff;
2785 }
2786 /*
2787 * XXX need to add VLAN support for 6.x
2788 */
2789 #ifdef VLAN_SUPPORTED
2790 if (__predict_false(cpl->vlan_valid)) {
2791 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2792 m->m_flags |= M_VLANTAG;
2793 }
2794 #endif
2795
2796 m->m_pkthdr.rcvif = ifp;
2797 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2798 /*
2799 * adjust after conversion to mbuf chain
2800 */
2801 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2802 m->m_len -= (sizeof(*cpl) + ethpad);
2803 m->m_data += (sizeof(*cpl) + ethpad);
2804 }
2805
2806 /**
2807 * get_packet - return the next ingress packet buffer from a free list
2808 * @adap: the adapter that received the packet
2809 * @drop_thres: # of remaining buffers before we start dropping packets
2810 * @qs: the qset that the SGE free list holding the packet belongs to
2811 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2812 * @r: response descriptor
2813 *
2814 * Get the next packet from a free list and complete setup of the
2815 * sk_buff. If the packet is small we make a copy and recycle the
2816 * original buffer, otherwise we use the original buffer itself. If a
2817 * positive drop threshold is supplied packets are dropped and their
2818 * buffers recycled if (a) the number of remaining buffers is under the
2819 * threshold and the packet is too big to copy, or (b) the packet should
2820 * be copied but there is no memory for the copy.
2821 */
2822 static int
2823 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2824 struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2825 {
2826
2827 unsigned int len_cq = ntohl(r->len_cq);
2828 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2829 int mask, cidx = fl->cidx;
2830 struct rx_sw_desc *sd = &fl->sdesc[cidx];
2831 uint32_t len = G_RSPD_LEN(len_cq);
2832 uint32_t flags = M_EXT;
2833 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
2834 caddr_t cl;
2835 struct mbuf *m;
2836 int ret = 0;
2837
2838 mask = fl->size - 1;
2839 prefetch(fl->sdesc[(cidx + 1) & mask].m);
2840 prefetch(fl->sdesc[(cidx + 2) & mask].m);
2841 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
2842 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl);
2843
2844 fl->credits--;
2845 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2846
2847 if (recycle_enable && len <= SGE_RX_COPY_THRES &&
2848 sopeop == RSPQ_SOP_EOP) {
2849 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2850 goto skip_recycle;
2851 cl = mtod(m, void *);
2852 memcpy(cl, sd->rxsd_cl, len);
2853 recycle_rx_buf(adap, fl, fl->cidx);
2854 m->m_pkthdr.len = m->m_len = len;
2855 m->m_flags = 0;
2856 mh->mh_head = mh->mh_tail = m;
2857 ret = 1;
2858 goto done;
2859 } else {
2860 skip_recycle:
2861 bus_dmamap_unload(fl->entry_tag, sd->map);
2862 cl = sd->rxsd_cl;
2863 m = sd->m;
2864
2865 if ((sopeop == RSPQ_SOP_EOP) ||
2866 (sopeop == RSPQ_SOP))
2867 flags |= M_PKTHDR;
2868 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags);
2869 if (fl->zone == zone_pack) {
2870 /*
2871 * restore clobbered data pointer
2872 */
2873 m->m_data = m->m_ext.ext_buf;
2874 } else {
2875 m_cljset(m, cl, fl->type);
2876 }
2877 m->m_len = len;
2878 }
2879 switch(sopeop) {
2880 case RSPQ_SOP_EOP:
2881 ret = 1;
2882 /* FALLTHROUGH */
2883 case RSPQ_SOP:
2884 mh->mh_head = mh->mh_tail = m;
2885 m->m_pkthdr.len = len;
2886 break;
2887 case RSPQ_EOP:
2888 ret = 1;
2889 /* FALLTHROUGH */
2890 case RSPQ_NSOP_NEOP:
2891 if (mh->mh_tail == NULL) {
2892 log(LOG_ERR, "discarding intermediate descriptor entry\n");
2893 m_freem(m);
2894 break;
2895 }
2896 mh->mh_tail->m_next = m;
2897 mh->mh_tail = m;
2898 mh->mh_head->m_pkthdr.len += len;
2899 break;
2900 }
2901 if (cxgb_debug)
2902 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
2903 done:
2904 if (++fl->cidx == fl->size)
2905 fl->cidx = 0;
2906
2907 return (ret);
2908 }
2909
2910 /**
2911 * handle_rsp_cntrl_info - handles control information in a response
2912 * @qs: the queue set corresponding to the response
2913 * @flags: the response control flags
2914 *
2915 * Handles the control information of an SGE response, such as GTS
2916 * indications and completion credits for the queue set's Tx queues.
2917 * HW coalesces credits, we don't do any extra SW coalescing.
2918 */
2919 static __inline void
2920 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2921 {
2922 unsigned int credits;
2923
2924 #if USE_GTS
2925 if (flags & F_RSPD_TXQ0_GTS)
2926 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2927 #endif
2928 credits = G_RSPD_TXQ0_CR(flags);
2929 if (credits)
2930 qs->txq[TXQ_ETH].processed += credits;
2931
2932 credits = G_RSPD_TXQ2_CR(flags);
2933 if (credits)
2934 qs->txq[TXQ_CTRL].processed += credits;
2935
2936 # if USE_GTS
2937 if (flags & F_RSPD_TXQ1_GTS)
2938 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2939 # endif
2940 credits = G_RSPD_TXQ1_CR(flags);
2941 if (credits)
2942 qs->txq[TXQ_OFLD].processed += credits;
2943
2944 }
2945
2946 static void
2947 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2948 unsigned int sleeping)
2949 {
2950 ;
2951 }
2952
2953 /**
2954 * process_responses - process responses from an SGE response queue
2955 * @adap: the adapter
2956 * @qs: the queue set to which the response queue belongs
2957 * @budget: how many responses can be processed in this round
2958 *
2959 * Process responses from an SGE response queue up to the supplied budget.
2960 * Responses include received packets as well as credits and other events
2961 * for the queues that belong to the response queue's queue set.
2962 * A negative budget is effectively unlimited.
2963 *
2964 * Additionally choose the interrupt holdoff time for the next interrupt
2965 * on this queue. If the system is under memory shortage use a fairly
2966 * long delay to help recovery.
2967 */
2968 static int
2969 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2970 {
2971 struct sge_rspq *rspq = &qs->rspq;
2972 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2973 int budget_left = budget;
2974 unsigned int sleeping = 0;
2975 #ifdef LRO_SUPPORTED
2976 int lro_enabled = qs->lro.enabled;
2977 int skip_lro;
2978 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2979 #endif
2980 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2981 int ngathered = 0;
2982 #ifdef DEBUG
2983 static int last_holdoff = 0;
2984 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2985 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2986 last_holdoff = rspq->holdoff_tmr;
2987 }
2988 #endif
2989 rspq->next_holdoff = rspq->holdoff_tmr;
2990
2991 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2992 int eth, eop = 0, ethpad = 0;
2993 uint32_t flags = ntohl(r->flags);
2994 uint32_t rss_csum = *(const uint32_t *)r;
2995 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2996
2997 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2998
2999 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
3000 struct mbuf *m;
3001
3002 if (cxgb_debug)
3003 printf("async notification\n");
3004
3005 if (rspq->rspq_mh.mh_head == NULL) {
3006 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
3007 m = rspq->rspq_mh.mh_head;
3008 } else {
3009 m = m_gethdr(M_DONTWAIT, MT_DATA);
3010 }
3011 if (m == NULL)
3012 goto no_mem;
3013
3014 memcpy(mtod(m, char *), r, AN_PKT_SIZE);
3015 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
3016 *mtod(m, char *) = CPL_ASYNC_NOTIF;
3017 rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
3018 eop = 1;
3019 rspq->async_notif++;
3020 goto skip;
3021 } else if (flags & F_RSPD_IMM_DATA_VALID) {
3022 struct mbuf *m = NULL;
3023
3024 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
3025 r->rss_hdr.opcode, rspq->cidx);
3026 if (rspq->rspq_mh.mh_head == NULL)
3027 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
3028 else
3029 m = m_gethdr(M_DONTWAIT, MT_DATA);
3030
3031 if (rspq->rspq_mh.mh_head == NULL && m == NULL) {
3032 no_mem:
3033 rspq->next_holdoff = NOMEM_INTR_DELAY;
3034 budget_left--;
3035 break;
3036 }
3037 get_imm_packet(adap, r, rspq->rspq_mh.mh_head);
3038 eop = 1;
3039 rspq->imm_data++;
3040 } else if (r->len_cq) {
3041 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
3042
3043 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
3044 if (eop) {
3045 rspq->rspq_mh.mh_head->m_flags |= M_FLOWID;
3046 rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash;
3047 }
3048
3049 ethpad = 2;
3050 } else {
3051 rspq->pure_rsps++;
3052 }
3053 skip:
3054 if (flags & RSPD_CTRL_MASK) {
3055 sleeping |= flags & RSPD_GTS_MASK;
3056 handle_rsp_cntrl_info(qs, flags);
3057 }
3058
3059 r++;
3060 if (__predict_false(++rspq->cidx == rspq->size)) {
3061 rspq->cidx = 0;
3062 rspq->gen ^= 1;
3063 r = rspq->desc;
3064 }
3065
3066 if (++rspq->credits >= (rspq->size / 4)) {
3067 refill_rspq(adap, rspq, rspq->credits);
3068 rspq->credits = 0;
3069 }
3070 if (!eth && eop) {
3071 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
3072 /*
3073 * XXX size mismatch
3074 */
3075 m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
3076
3077
3078 ngathered = rx_offload(&adap->tdev, rspq,
3079 rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
3080 rspq->rspq_mh.mh_head = NULL;
3081 DPRINTF("received offload packet\n");
3082
3083 } else if (eth && eop) {
3084 struct mbuf *m = rspq->rspq_mh.mh_head;
3085
3086 t3_rx_eth(adap, rspq, m, ethpad);
3087
3088 #ifdef LRO_SUPPORTED
3089 /*
3090 * The T304 sends incoming packets on any qset. If LRO
3091 * is also enabled, we could end up sending packet up
3092 * lro_ctrl->ifp's input. That is incorrect.
3093 *
3094 * The mbuf's rcvif was derived from the cpl header and
3095 * is accurate. Skip LRO and just use that.
3096 */
3097 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
3098
3099 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro &&
3100 (tcp_lro_rx(lro_ctrl, m, 0) == 0)) {
3101 /* successfully queue'd for LRO */
3102 } else
3103 #endif
3104 {
3105 /*
3106 * LRO not enabled, packet unsuitable for LRO,
3107 * or unable to queue. Pass it up right now in
3108 * either case.
3109 */
3110 struct ifnet *ifp = m->m_pkthdr.rcvif;
3111 (*ifp->if_input)(ifp, m);
3112 }
3113 rspq->rspq_mh.mh_head = NULL;
3114
3115 }
3116 __refill_fl_lt(adap, &qs->fl[0], 32);
3117 __refill_fl_lt(adap, &qs->fl[1], 32);
3118 --budget_left;
3119 }
3120
3121 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
3122
3123 #ifdef LRO_SUPPORTED
3124 /* Flush LRO */
3125 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
3126 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
3127 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
3128 tcp_lro_flush(lro_ctrl, queued);
3129 }
3130 #endif
3131
3132 if (sleeping)
3133 check_ring_db(adap, qs, sleeping);
3134
3135 mb(); /* commit Tx queue processed updates */
3136 if (__predict_false(qs->txq_stopped > 1)) {
3137 printf("restarting tx on %p\n", qs);
3138
3139 restart_tx(qs);
3140 }
3141
3142 __refill_fl_lt(adap, &qs->fl[0], 512);
3143 __refill_fl_lt(adap, &qs->fl[1], 512);
3144 budget -= budget_left;
3145 return (budget);
3146 }
3147
3148 /*
3149 * A helper function that processes responses and issues GTS.
3150 */
3151 static __inline int
3152 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
3153 {
3154 int work;
3155 static int last_holdoff = 0;
3156
3157 work = process_responses(adap, rspq_to_qset(rq), -1);
3158
3159 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
3160 printf("next_holdoff=%d\n", rq->next_holdoff);
3161 last_holdoff = rq->next_holdoff;
3162 }
3163 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
3164 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
3165
3166 return (work);
3167 }
3168
3169
3170 /*
3171 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
3172 * Handles data events from SGE response queues as well as error and other
3173 * async events as they all use the same interrupt pin. We use one SGE
3174 * response queue per port in this mode and protect all response queues with
3175 * queue 0's lock.
3176 */
3177 void
3178 t3b_intr(void *data)
3179 {
3180 uint32_t i, map;
3181 adapter_t *adap = data;
3182 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3183
3184 t3_write_reg(adap, A_PL_CLI, 0);
3185 map = t3_read_reg(adap, A_SG_DATA_INTR);
3186
3187 if (!map)
3188 return;
3189
3190 if (__predict_false(map & F_ERRINTR))
3191 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3192
3193 mtx_lock(&q0->lock);
3194 for_each_port(adap, i)
3195 if (map & (1 << i))
3196 process_responses_gts(adap, &adap->sge.qs[i].rspq);
3197 mtx_unlock(&q0->lock);
3198 }
3199
3200 /*
3201 * The MSI interrupt handler. This needs to handle data events from SGE
3202 * response queues as well as error and other async events as they all use
3203 * the same MSI vector. We use one SGE response queue per port in this mode
3204 * and protect all response queues with queue 0's lock.
3205 */
3206 void
3207 t3_intr_msi(void *data)
3208 {
3209 adapter_t *adap = data;
3210 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3211 int i, new_packets = 0;
3212
3213 mtx_lock(&q0->lock);
3214
3215 for_each_port(adap, i)
3216 if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3217 new_packets = 1;
3218 mtx_unlock(&q0->lock);
3219 if (new_packets == 0)
3220 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3221 }
3222
3223 void
3224 t3_intr_msix(void *data)
3225 {
3226 struct sge_qset *qs = data;
3227 adapter_t *adap = qs->port->adapter;
3228 struct sge_rspq *rspq = &qs->rspq;
3229
3230 if (process_responses_gts(adap, rspq) == 0)
3231 rspq->unhandled_irqs++;
3232 }
3233
3234 #define QDUMP_SBUF_SIZE 32 * 400
3235 static int
3236 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3237 {
3238 struct sge_rspq *rspq;
3239 struct sge_qset *qs;
3240 int i, err, dump_end, idx;
3241 static int multiplier = 1;
3242 struct sbuf *sb;
3243 struct rsp_desc *rspd;
3244 uint32_t data[4];
3245
3246 rspq = arg1;
3247 qs = rspq_to_qset(rspq);
3248 if (rspq->rspq_dump_count == 0)
3249 return (0);
3250 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3251 log(LOG_WARNING,
3252 "dump count is too large %d\n", rspq->rspq_dump_count);
3253 rspq->rspq_dump_count = 0;
3254 return (EINVAL);
3255 }
3256 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3257 log(LOG_WARNING,
3258 "dump start of %d is greater than queue size\n",
3259 rspq->rspq_dump_start);
3260 rspq->rspq_dump_start = 0;
3261 return (EINVAL);
3262 }
3263 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3264 if (err)
3265 return (err);
3266 retry_sbufops:
3267 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3268
3269 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3270 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3271 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3272 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3273 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3274
3275 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3276 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3277
3278 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3279 for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3280 idx = i & (RSPQ_Q_SIZE-1);
3281
3282 rspd = &rspq->desc[idx];
3283 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3284 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3285 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3286 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3287 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3288 be32toh(rspd->len_cq), rspd->intr_gen);
3289 }
3290 if (sbuf_overflowed(sb)) {
3291 sbuf_delete(sb);
3292 multiplier++;
3293 goto retry_sbufops;
3294 }
3295 sbuf_finish(sb);
3296 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3297 sbuf_delete(sb);
3298 return (err);
3299 }
3300
3301 static int
3302 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3303 {
3304 struct sge_txq *txq;
3305 struct sge_qset *qs;
3306 int i, j, err, dump_end;
3307 static int multiplier = 1;
3308 struct sbuf *sb;
3309 struct tx_desc *txd;
3310 uint32_t *WR, wr_hi, wr_lo, gen;
3311 uint32_t data[4];
3312
3313 txq = arg1;
3314 qs = txq_to_qset(txq, TXQ_ETH);
3315 if (txq->txq_dump_count == 0) {
3316 return (0);
3317 }
3318 if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3319 log(LOG_WARNING,
3320 "dump count is too large %d\n", txq->txq_dump_count);
3321 txq->txq_dump_count = 1;
3322 return (EINVAL);
3323 }
3324 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3325 log(LOG_WARNING,
3326 "dump start of %d is greater than queue size\n",
3327 txq->txq_dump_start);
3328 txq->txq_dump_start = 0;
3329 return (EINVAL);
3330 }
3331 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3332 if (err)
3333 return (err);
3334
3335
3336 retry_sbufops:
3337 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3338
3339 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3340 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3341 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3342 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3343 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3344 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3345 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3346 txq->txq_dump_start,
3347 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3348
3349 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3350 for (i = txq->txq_dump_start; i < dump_end; i++) {
3351 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3352 WR = (uint32_t *)txd->flit;
3353 wr_hi = ntohl(WR[0]);
3354 wr_lo = ntohl(WR[1]);
3355 gen = G_WR_GEN(wr_lo);
3356
3357 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3358 wr_hi, wr_lo, gen);
3359 for (j = 2; j < 30; j += 4)
3360 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3361 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3362
3363 }
3364 if (sbuf_overflowed(sb)) {
3365 sbuf_delete(sb);
3366 multiplier++;
3367 goto retry_sbufops;
3368 }
3369 sbuf_finish(sb);
3370 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3371 sbuf_delete(sb);
3372 return (err);
3373 }
3374
3375 static int
3376 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3377 {
3378 struct sge_txq *txq;
3379 struct sge_qset *qs;
3380 int i, j, err, dump_end;
3381 static int multiplier = 1;
3382 struct sbuf *sb;
3383 struct tx_desc *txd;
3384 uint32_t *WR, wr_hi, wr_lo, gen;
3385
3386 txq = arg1;
3387 qs = txq_to_qset(txq, TXQ_CTRL);
3388 if (txq->txq_dump_count == 0) {
3389 return (0);
3390 }
3391 if (txq->txq_dump_count > 256) {
3392 log(LOG_WARNING,
3393 "dump count is too large %d\n", txq->txq_dump_count);
3394 txq->txq_dump_count = 1;
3395 return (EINVAL);
3396 }
3397 if (txq->txq_dump_start > 255) {
3398 log(LOG_WARNING,
3399 "dump start of %d is greater than queue size\n",
3400 txq->txq_dump_start);
3401 txq->txq_dump_start = 0;
3402 return (EINVAL);
3403 }
3404
3405 retry_sbufops:
3406 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3407 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3408 txq->txq_dump_start,
3409 (txq->txq_dump_start + txq->txq_dump_count) & 255);
3410
3411 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3412 for (i = txq->txq_dump_start; i < dump_end; i++) {
3413 txd = &txq->desc[i & (255)];
3414 WR = (uint32_t *)txd->flit;
3415 wr_hi = ntohl(WR[0]);
3416 wr_lo = ntohl(WR[1]);
3417 gen = G_WR_GEN(wr_lo);
3418
3419 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3420 wr_hi, wr_lo, gen);
3421 for (j = 2; j < 30; j += 4)
3422 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3423 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3424
3425 }
3426 if (sbuf_overflowed(sb)) {
3427 sbuf_delete(sb);
3428 multiplier++;
3429 goto retry_sbufops;
3430 }
3431 sbuf_finish(sb);
3432 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3433 sbuf_delete(sb);
3434 return (err);
3435 }
3436
3437 static int
3438 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3439 {
3440 adapter_t *sc = arg1;
3441 struct qset_params *qsp = &sc->params.sge.qset[0];
3442 int coalesce_usecs;
3443 struct sge_qset *qs;
3444 int i, j, err, nqsets = 0;
3445 struct mtx *lock;
3446
3447 if ((sc->flags & FULL_INIT_DONE) == 0)
3448 return (ENXIO);
3449
3450 coalesce_usecs = qsp->coalesce_usecs;
3451 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3452
3453 if (err != 0) {
3454 return (err);
3455 }
3456 if (coalesce_usecs == qsp->coalesce_usecs)
3457 return (0);
3458
3459 for (i = 0; i < sc->params.nports; i++)
3460 for (j = 0; j < sc->port[i].nqsets; j++)
3461 nqsets++;
3462
3463 coalesce_usecs = max(1, coalesce_usecs);
3464
3465 for (i = 0; i < nqsets; i++) {
3466 qs = &sc->sge.qs[i];
3467 qsp = &sc->params.sge.qset[i];
3468 qsp->coalesce_usecs = coalesce_usecs;
3469
3470 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3471 &sc->sge.qs[0].rspq.lock;
3472
3473 mtx_lock(lock);
3474 t3_update_qset_coalesce(qs, qsp);
3475 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3476 V_NEWTIMER(qs->rspq.holdoff_tmr));
3477 mtx_unlock(lock);
3478 }
3479
3480 return (0);
3481 }
3482
3483
3484 void
3485 t3_add_attach_sysctls(adapter_t *sc)
3486 {
3487 struct sysctl_ctx_list *ctx;
3488 struct sysctl_oid_list *children;
3489
3490 ctx = device_get_sysctl_ctx(sc->dev);
3491 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3492
3493 /* random information */
3494 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3495 "firmware_version",
3496 CTLFLAG_RD, &sc->fw_version,
3497 0, "firmware version");
3498 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3499 "hw_revision",
3500 CTLFLAG_RD, &sc->params.rev,
3501 0, "chip model");
3502 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3503 "port_types",
3504 CTLFLAG_RD, &sc->port_types,
3505 0, "type of ports");
3506 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3507 "enable_debug",
3508 CTLFLAG_RW, &cxgb_debug,
3509 0, "enable verbose debugging output");
3510 SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce",
3511 CTLFLAG_RD, &sc->tunq_coalesce,
3512 "#tunneled packets freed");
3513 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3514 "txq_overrun",
3515 CTLFLAG_RD, &txq_fills,
3516 0, "#times txq overrun");
3517 }
3518
3519
3520 static const char *rspq_name = "rspq";
3521 static const char *txq_names[] =
3522 {
3523 "txq_eth",
3524 "txq_ofld",
3525 "txq_ctrl"
3526 };
3527
3528 static int
3529 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3530 {
3531 struct port_info *p = arg1;
3532 uint64_t *parg;
3533
3534 if (!p)
3535 return (EINVAL);
3536
3537 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3538 PORT_LOCK(p);
3539 t3_mac_update_stats(&p->mac);
3540 PORT_UNLOCK(p);
3541
3542 return (sysctl_handle_quad(oidp, parg, 0, req));
3543 }
3544
3545 void
3546 t3_add_configured_sysctls(adapter_t *sc)
3547 {
3548 struct sysctl_ctx_list *ctx;
3549 struct sysctl_oid_list *children;
3550 int i, j;
3551
3552 ctx = device_get_sysctl_ctx(sc->dev);
3553 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3554
3555 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3556 "intr_coal",
3557 CTLTYPE_INT|CTLFLAG_RW, sc,
3558 0, t3_set_coalesce_usecs,
3559 "I", "interrupt coalescing timer (us)");
3560
3561 for (i = 0; i < sc->params.nports; i++) {
3562 struct port_info *pi = &sc->port[i];
3563 struct sysctl_oid *poid;
3564 struct sysctl_oid_list *poidlist;
3565 struct mac_stats *mstats = &pi->mac.stats;
3566
3567 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3568 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3569 pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3570 poidlist = SYSCTL_CHILDREN(poid);
3571 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
3572 "nqsets", CTLFLAG_RD, &pi->nqsets,
3573 0, "#queue sets");
3574
3575 for (j = 0; j < pi->nqsets; j++) {
3576 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3577 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
3578 *ctrlqpoid, *lropoid;
3579 struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
3580 *txqpoidlist, *ctrlqpoidlist,
3581 *lropoidlist;
3582 struct sge_txq *txq = &qs->txq[TXQ_ETH];
3583
3584 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3585
3586 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3587 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3588 qspoidlist = SYSCTL_CHILDREN(qspoid);
3589
3590 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
3591 CTLFLAG_RD, &qs->fl[0].empty, 0,
3592 "freelist #0 empty");
3593 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
3594 CTLFLAG_RD, &qs->fl[1].empty, 0,
3595 "freelist #1 empty");
3596
3597 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3598 rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3599 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3600
3601 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3602 txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3603 txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3604
3605 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3606 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3607 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3608
3609 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3610 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
3611 lropoidlist = SYSCTL_CHILDREN(lropoid);
3612
3613 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3614 CTLFLAG_RD, &qs->rspq.size,
3615 0, "#entries in response queue");
3616 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3617 CTLFLAG_RD, &qs->rspq.cidx,
3618 0, "consumer index");
3619 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3620 CTLFLAG_RD, &qs->rspq.credits,
3621 0, "#credits");
3622 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3623 CTLFLAG_RD, &qs->rspq.phys_addr,
3624 "physical_address_of the queue");
3625 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3626 CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3627 0, "start rspq dump entry");
3628 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3629 CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3630 0, "#rspq entries to dump");
3631 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3632 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3633 0, t3_dump_rspq, "A", "dump of the response queue");
3634
3635
3636 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped",
3637 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops,
3638 0, "#tunneled packets dropped");
3639 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3640 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3641 0, "#tunneled packets waiting to be sent");
3642 #if 0
3643 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3644 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3645 0, "#tunneled packets queue producer index");
3646 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3647 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3648 0, "#tunneled packets queue consumer index");
3649 #endif
3650 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
3651 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3652 0, "#tunneled packets processed by the card");
3653 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3654 CTLFLAG_RD, &txq->cleaned,
3655 0, "#tunneled packets cleaned");
3656 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3657 CTLFLAG_RD, &txq->in_use,
3658 0, "#tunneled packet slots in use");
3659 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
3660 CTLFLAG_RD, &txq->txq_frees,
3661 "#tunneled packets freed");
3662 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3663 CTLFLAG_RD, &txq->txq_skipped,
3664 0, "#tunneled packet descriptors skipped");
3665 SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
3666 CTLFLAG_RD, &txq->txq_coalesced,
3667 "#tunneled packets coalesced");
3668 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3669 CTLFLAG_RD, &txq->txq_enqueued,
3670 0, "#tunneled packets enqueued to hardware");
3671 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3672 CTLFLAG_RD, &qs->txq_stopped,
3673 0, "tx queues stopped");
3674 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3675 CTLFLAG_RD, &txq->phys_addr,
3676 "physical_address_of the queue");
3677 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3678 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3679 0, "txq generation");
3680 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3681 CTLFLAG_RD, &txq->cidx,
3682 0, "hardware queue cidx");
3683 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3684 CTLFLAG_RD, &txq->pidx,
3685 0, "hardware queue pidx");
3686 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3687 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3688 0, "txq start idx for dump");
3689 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3690 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3691 0, "txq #entries to dump");
3692 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3693 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3694 0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3695
3696 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3697 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3698 0, "ctrlq start idx for dump");
3699 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3700 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3701 0, "ctrl #entries to dump");
3702 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3703 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3704 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3705
3706 #ifdef LRO_SUPPORTED
3707 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
3708 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3709 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3710 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3711 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3712 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3713 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3714 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3715 #endif
3716 }
3717
3718 /* Now add a node for mac stats. */
3719 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3720 CTLFLAG_RD, NULL, "MAC statistics");
3721 poidlist = SYSCTL_CHILDREN(poid);
3722
3723 /*
3724 * We (ab)use the length argument (arg2) to pass on the offset
3725 * of the data that we are interested in. This is only required
3726 * for the quad counters that are updated from the hardware (we
3727 * make sure that we return the latest value).
3728 * sysctl_handle_macstat first updates *all* the counters from
3729 * the hardware, and then returns the latest value of the
3730 * requested counter. Best would be to update only the
3731 * requested counter from hardware, but t3_mac_update_stats()
3732 * hides all the register details and we don't want to dive into
3733 * all that here.
3734 */
3735 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3736 (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
3737 sysctl_handle_macstat, "QU", 0)
3738 CXGB_SYSCTL_ADD_QUAD(tx_octets);
3739 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3740 CXGB_SYSCTL_ADD_QUAD(tx_frames);
3741 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3742 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3743 CXGB_SYSCTL_ADD_QUAD(tx_pause);
3744 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3745 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3746 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3747 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3748 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3749 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3750 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3751 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3752 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3753 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3754 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3755 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3756 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3757 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3758 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3759 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3760 CXGB_SYSCTL_ADD_QUAD(rx_octets);
3761 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3762 CXGB_SYSCTL_ADD_QUAD(rx_frames);
3763 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3764 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3765 CXGB_SYSCTL_ADD_QUAD(rx_pause);
3766 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
3767 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3768 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3769 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3770 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3771 CXGB_SYSCTL_ADD_QUAD(rx_runt);
3772 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3773 CXGB_SYSCTL_ADD_QUAD(rx_short);
3774 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3775 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3776 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3777 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3778 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3779 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3780 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3781 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3782 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3783 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3784 #undef CXGB_SYSCTL_ADD_QUAD
3785
3786 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3787 CTLFLAG_RD, &mstats->a, 0)
3788 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3789 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3790 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3791 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3792 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3793 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3794 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3795 CXGB_SYSCTL_ADD_ULONG(num_toggled);
3796 CXGB_SYSCTL_ADD_ULONG(num_resets);
3797 CXGB_SYSCTL_ADD_ULONG(link_faults);
3798 #undef CXGB_SYSCTL_ADD_ULONG
3799 }
3800 }
3801
3802 /**
3803 * t3_get_desc - dump an SGE descriptor for debugging purposes
3804 * @qs: the queue set
3805 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3806 * @idx: the descriptor index in the queue
3807 * @data: where to dump the descriptor contents
3808 *
3809 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3810 * size of the descriptor.
3811 */
3812 int
3813 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3814 unsigned char *data)
3815 {
3816 if (qnum >= 6)
3817 return (EINVAL);
3818
3819 if (qnum < 3) {
3820 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3821 return -EINVAL;
3822 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3823 return sizeof(struct tx_desc);
3824 }
3825
3826 if (qnum == 3) {
3827 if (!qs->rspq.desc || idx >= qs->rspq.size)
3828 return (EINVAL);
3829 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3830 return sizeof(struct rsp_desc);
3831 }
3832
3833 qnum -= 4;
3834 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3835 return (EINVAL);
3836 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3837 return sizeof(struct rx_desc);
3838 }
Cache object: ce46fc52fb7e5fb0709d70cdab9856d6
|