1 /**************************************************************************
2
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD: releng/8.4/sys/dev/cxgb/cxgb_sge.c 242370 2012-10-30 19:24:05Z np $");
32
33 #include "opt_inet.h"
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/module.h>
39 #include <sys/bus.h>
40 #include <sys/conf.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/bus_dma.h>
44 #include <sys/rman.h>
45 #include <sys/queue.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
48
49 #include <sys/proc.h>
50 #include <sys/sbuf.h>
51 #include <sys/sched.h>
52 #include <sys/smp.h>
53 #include <sys/systm.h>
54 #include <sys/syslog.h>
55 #include <sys/socket.h>
56
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_vlan_var.h>
61
62 #include <netinet/in_systm.h>
63 #include <netinet/in.h>
64 #include <netinet/ip.h>
65 #include <netinet/tcp.h>
66
67 #include <dev/pci/pcireg.h>
68 #include <dev/pci/pcivar.h>
69
70 #include <vm/vm.h>
71 #include <vm/pmap.h>
72
73 #include <cxgb_include.h>
74 #include <sys/mvec.h>
75
76 int txq_fills = 0;
77 int multiq_tx_enable = 1;
78
79 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
80 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
81 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
82 SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
83 "size of per-queue mbuf ring");
84
85 static int cxgb_tx_coalesce_force = 0;
86 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force);
87 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW,
88 &cxgb_tx_coalesce_force, 0,
89 "coalesce small packets into a single work request regardless of ring state");
90
91 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1
92 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
93 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2
94 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5
95 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5
96 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2
97 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6
98
99
100 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
101 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start",
102 &cxgb_tx_coalesce_enable_start);
103 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW,
104 &cxgb_tx_coalesce_enable_start, 0,
105 "coalesce enable threshold");
106 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
107 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop);
108 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW,
109 &cxgb_tx_coalesce_enable_stop, 0,
110 "coalesce disable threshold");
111 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
112 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold);
113 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW,
114 &cxgb_tx_reclaim_threshold, 0,
115 "tx cleaning minimum threshold");
116
117 /*
118 * XXX don't re-enable this until TOE stops assuming
119 * we have an m_ext
120 */
121 static int recycle_enable = 0;
122
123 extern int cxgb_use_16k_clusters;
124 extern int nmbjumbop;
125 extern int nmbjumbo9;
126 extern int nmbjumbo16;
127
128 #define USE_GTS 0
129
130 #define SGE_RX_SM_BUF_SIZE 1536
131 #define SGE_RX_DROP_THRES 16
132 #define SGE_RX_COPY_THRES 128
133
134 /*
135 * Period of the Tx buffer reclaim timer. This timer does not need to run
136 * frequently as Tx buffers are usually reclaimed by new Tx packets.
137 */
138 #define TX_RECLAIM_PERIOD (hz >> 1)
139
140 /*
141 * Values for sge_txq.flags
142 */
143 enum {
144 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
145 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
146 };
147
148 struct tx_desc {
149 uint64_t flit[TX_DESC_FLITS];
150 } __packed;
151
152 struct rx_desc {
153 uint32_t addr_lo;
154 uint32_t len_gen;
155 uint32_t gen2;
156 uint32_t addr_hi;
157 } __packed;
158
159 struct rsp_desc { /* response queue descriptor */
160 struct rss_header rss_hdr;
161 uint32_t flags;
162 uint32_t len_cq;
163 uint8_t imm_data[47];
164 uint8_t intr_gen;
165 } __packed;
166
167 #define RX_SW_DESC_MAP_CREATED (1 << 0)
168 #define TX_SW_DESC_MAP_CREATED (1 << 1)
169 #define RX_SW_DESC_INUSE (1 << 3)
170 #define TX_SW_DESC_MAPPED (1 << 4)
171
172 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
173 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
174 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
175 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
176
177 struct tx_sw_desc { /* SW state per Tx descriptor */
178 struct mbuf *m;
179 bus_dmamap_t map;
180 int flags;
181 };
182
183 struct rx_sw_desc { /* SW state per Rx descriptor */
184 caddr_t rxsd_cl;
185 struct mbuf *m;
186 bus_dmamap_t map;
187 int flags;
188 };
189
190 struct txq_state {
191 unsigned int compl;
192 unsigned int gen;
193 unsigned int pidx;
194 };
195
196 struct refill_fl_cb_arg {
197 int error;
198 bus_dma_segment_t seg;
199 int nseg;
200 };
201
202
203 /*
204 * Maps a number of flits to the number of Tx descriptors that can hold them.
205 * The formula is
206 *
207 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
208 *
209 * HW allows up to 4 descriptors to be combined into a WR.
210 */
211 static uint8_t flit_desc_map[] = {
212 0,
213 #if SGE_NUM_GENBITS == 1
214 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
215 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
216 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
217 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
218 #elif SGE_NUM_GENBITS == 2
219 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
220 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
221 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
222 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
223 #else
224 # error "SGE_NUM_GENBITS must be 1 or 2"
225 #endif
226 };
227
228 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED)
229 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock)
230 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock)
231 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock)
232 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
233 #define TXQ_RING_NEEDS_ENQUEUE(qs) \
234 drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
235 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
236 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \
237 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
238 #define TXQ_RING_DEQUEUE(qs) \
239 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
240
241 int cxgb_debug = 0;
242
243 static void sge_timer_cb(void *arg);
244 static void sge_timer_reclaim(void *arg, int ncount);
245 static void sge_txq_reclaim_handler(void *arg, int ncount);
246 static void cxgb_start_locked(struct sge_qset *qs);
247
248 /*
249 * XXX need to cope with bursty scheduling by looking at a wider
250 * window than we are now for determining the need for coalescing
251 *
252 */
253 static __inline uint64_t
254 check_pkt_coalesce(struct sge_qset *qs)
255 {
256 struct adapter *sc;
257 struct sge_txq *txq;
258 uint8_t *fill;
259
260 if (__predict_false(cxgb_tx_coalesce_force))
261 return (1);
262 txq = &qs->txq[TXQ_ETH];
263 sc = qs->port->adapter;
264 fill = &sc->tunq_fill[qs->idx];
265
266 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
267 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
268 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
269 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
270 /*
271 * if the hardware transmit queue is more than 1/8 full
272 * we mark it as coalescing - we drop back from coalescing
273 * when we go below 1/32 full and there are no packets enqueued,
274 * this provides us with some degree of hysteresis
275 */
276 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
277 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
278 *fill = 0;
279 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
280 *fill = 1;
281
282 return (sc->tunq_coalesce);
283 }
284
285 #ifdef __LP64__
286 static void
287 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
288 {
289 uint64_t wr_hilo;
290 #if _BYTE_ORDER == _LITTLE_ENDIAN
291 wr_hilo = wr_hi;
292 wr_hilo |= (((uint64_t)wr_lo)<<32);
293 #else
294 wr_hilo = wr_lo;
295 wr_hilo |= (((uint64_t)wr_hi)<<32);
296 #endif
297 wrp->wrh_hilo = wr_hilo;
298 }
299 #else
300 static void
301 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
302 {
303
304 wrp->wrh_hi = wr_hi;
305 wmb();
306 wrp->wrh_lo = wr_lo;
307 }
308 #endif
309
310 struct coalesce_info {
311 int count;
312 int nbytes;
313 };
314
315 static int
316 coalesce_check(struct mbuf *m, void *arg)
317 {
318 struct coalesce_info *ci = arg;
319 int *count = &ci->count;
320 int *nbytes = &ci->nbytes;
321
322 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
323 (*count < 7) && (m->m_next == NULL))) {
324 *count += 1;
325 *nbytes += m->m_len;
326 return (1);
327 }
328 return (0);
329 }
330
331 static struct mbuf *
332 cxgb_dequeue(struct sge_qset *qs)
333 {
334 struct mbuf *m, *m_head, *m_tail;
335 struct coalesce_info ci;
336
337
338 if (check_pkt_coalesce(qs) == 0)
339 return TXQ_RING_DEQUEUE(qs);
340
341 m_head = m_tail = NULL;
342 ci.count = ci.nbytes = 0;
343 do {
344 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
345 if (m_head == NULL) {
346 m_tail = m_head = m;
347 } else if (m != NULL) {
348 m_tail->m_nextpkt = m;
349 m_tail = m;
350 }
351 } while (m != NULL);
352 if (ci.count > 7)
353 panic("trying to coalesce %d packets in to one WR", ci.count);
354 return (m_head);
355 }
356
357 /**
358 * reclaim_completed_tx - reclaims completed Tx descriptors
359 * @adapter: the adapter
360 * @q: the Tx queue to reclaim completed descriptors from
361 *
362 * Reclaims Tx descriptors that the SGE has indicated it has processed,
363 * and frees the associated buffers if possible. Called with the Tx
364 * queue's lock held.
365 */
366 static __inline int
367 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
368 {
369 struct sge_txq *q = &qs->txq[queue];
370 int reclaim = desc_reclaimable(q);
371
372 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
373 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
374 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
375
376 if (reclaim < reclaim_min)
377 return (0);
378
379 mtx_assert(&qs->lock, MA_OWNED);
380 if (reclaim > 0) {
381 t3_free_tx_desc(qs, reclaim, queue);
382 q->cleaned += reclaim;
383 q->in_use -= reclaim;
384 }
385 if (isset(&qs->txq_stopped, TXQ_ETH))
386 clrbit(&qs->txq_stopped, TXQ_ETH);
387
388 return (reclaim);
389 }
390
391 /**
392 * should_restart_tx - are there enough resources to restart a Tx queue?
393 * @q: the Tx queue
394 *
395 * Checks if there are enough descriptors to restart a suspended Tx queue.
396 */
397 static __inline int
398 should_restart_tx(const struct sge_txq *q)
399 {
400 unsigned int r = q->processed - q->cleaned;
401
402 return q->in_use - r < (q->size >> 1);
403 }
404
405 /**
406 * t3_sge_init - initialize SGE
407 * @adap: the adapter
408 * @p: the SGE parameters
409 *
410 * Performs SGE initialization needed every time after a chip reset.
411 * We do not initialize any of the queue sets here, instead the driver
412 * top-level must request those individually. We also do not enable DMA
413 * here, that should be done after the queues have been set up.
414 */
415 void
416 t3_sge_init(adapter_t *adap, struct sge_params *p)
417 {
418 u_int ctrl, ups;
419
420 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
421
422 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
423 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
424 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
425 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
426 #if SGE_NUM_GENBITS == 1
427 ctrl |= F_EGRGENCTRL;
428 #endif
429 if (adap->params.rev > 0) {
430 if (!(adap->flags & (USING_MSIX | USING_MSI)))
431 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
432 }
433 t3_write_reg(adap, A_SG_CONTROL, ctrl);
434 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
435 V_LORCQDRBTHRSH(512));
436 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
437 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
438 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
439 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
440 adap->params.rev < T3_REV_C ? 1000 : 500);
441 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
442 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
443 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
444 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
445 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
446 }
447
448
449 /**
450 * sgl_len - calculates the size of an SGL of the given capacity
451 * @n: the number of SGL entries
452 *
453 * Calculates the number of flits needed for a scatter/gather list that
454 * can hold the given number of entries.
455 */
456 static __inline unsigned int
457 sgl_len(unsigned int n)
458 {
459 return ((3 * n) / 2 + (n & 1));
460 }
461
462 /**
463 * get_imm_packet - return the next ingress packet buffer from a response
464 * @resp: the response descriptor containing the packet data
465 *
466 * Return a packet containing the immediate data of the given response.
467 */
468 static int
469 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
470 {
471
472 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
473 m->m_ext.ext_buf = NULL;
474 m->m_ext.ext_type = 0;
475 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
476 return (0);
477 }
478
479 static __inline u_int
480 flits_to_desc(u_int n)
481 {
482 return (flit_desc_map[n]);
483 }
484
485 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
486 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
487 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
488 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
489 F_HIRCQPARITYERROR)
490 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
491 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
492 F_RSPQDISABLED)
493
494 /**
495 * t3_sge_err_intr_handler - SGE async event interrupt handler
496 * @adapter: the adapter
497 *
498 * Interrupt handler for SGE asynchronous (non-data) events.
499 */
500 void
501 t3_sge_err_intr_handler(adapter_t *adapter)
502 {
503 unsigned int v, status;
504
505 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
506 if (status & SGE_PARERR)
507 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
508 status & SGE_PARERR);
509 if (status & SGE_FRAMINGERR)
510 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
511 status & SGE_FRAMINGERR);
512 if (status & F_RSPQCREDITOVERFOW)
513 CH_ALERT(adapter, "SGE response queue credit overflow\n");
514
515 if (status & F_RSPQDISABLED) {
516 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
517
518 CH_ALERT(adapter,
519 "packet delivered to disabled response queue (0x%x)\n",
520 (v >> S_RSPQ0DISABLED) & 0xff);
521 }
522
523 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
524 if (status & SGE_FATALERR)
525 t3_fatal_err(adapter);
526 }
527
528 void
529 t3_sge_prep(adapter_t *adap, struct sge_params *p)
530 {
531 int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
532
533 nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
534 nqsets *= adap->params.nports;
535
536 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
537
538 while (!powerof2(fl_q_size))
539 fl_q_size--;
540
541 use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
542 is_offload(adap);
543
544 #if __FreeBSD_version >= 700111
545 if (use_16k) {
546 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
547 jumbo_buf_size = MJUM16BYTES;
548 } else {
549 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
550 jumbo_buf_size = MJUM9BYTES;
551 }
552 #else
553 jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE);
554 jumbo_buf_size = MJUMPAGESIZE;
555 #endif
556 while (!powerof2(jumbo_q_size))
557 jumbo_q_size--;
558
559 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
560 device_printf(adap->dev,
561 "Insufficient clusters and/or jumbo buffers.\n");
562
563 p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
564
565 for (i = 0; i < SGE_QSETS; ++i) {
566 struct qset_params *q = p->qset + i;
567
568 if (adap->params.nports > 2) {
569 q->coalesce_usecs = 50;
570 } else {
571 #ifdef INVARIANTS
572 q->coalesce_usecs = 10;
573 #else
574 q->coalesce_usecs = 5;
575 #endif
576 }
577 q->polling = 0;
578 q->rspq_size = RSPQ_Q_SIZE;
579 q->fl_size = fl_q_size;
580 q->jumbo_size = jumbo_q_size;
581 q->jumbo_buf_size = jumbo_buf_size;
582 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
583 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
584 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
585 q->cong_thres = 0;
586 }
587 }
588
589 int
590 t3_sge_alloc(adapter_t *sc)
591 {
592
593 /* The parent tag. */
594 if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */
595 1, 0, /* algnmnt, boundary */
596 BUS_SPACE_MAXADDR, /* lowaddr */
597 BUS_SPACE_MAXADDR, /* highaddr */
598 NULL, NULL, /* filter, filterarg */
599 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
600 BUS_SPACE_UNRESTRICTED, /* nsegments */
601 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
602 0, /* flags */
603 NULL, NULL, /* lock, lockarg */
604 &sc->parent_dmat)) {
605 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
606 return (ENOMEM);
607 }
608
609 /*
610 * DMA tag for normal sized RX frames
611 */
612 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
613 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
614 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
615 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
616 return (ENOMEM);
617 }
618
619 /*
620 * DMA tag for jumbo sized RX frames.
621 */
622 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
623 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
624 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
625 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
626 return (ENOMEM);
627 }
628
629 /*
630 * DMA tag for TX frames.
631 */
632 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
633 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
634 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
635 NULL, NULL, &sc->tx_dmat)) {
636 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
637 return (ENOMEM);
638 }
639
640 return (0);
641 }
642
643 int
644 t3_sge_free(struct adapter * sc)
645 {
646
647 if (sc->tx_dmat != NULL)
648 bus_dma_tag_destroy(sc->tx_dmat);
649
650 if (sc->rx_jumbo_dmat != NULL)
651 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
652
653 if (sc->rx_dmat != NULL)
654 bus_dma_tag_destroy(sc->rx_dmat);
655
656 if (sc->parent_dmat != NULL)
657 bus_dma_tag_destroy(sc->parent_dmat);
658
659 return (0);
660 }
661
662 void
663 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
664 {
665
666 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
667 qs->rspq.polling = 0 /* p->polling */;
668 }
669
670 #if !defined(__i386__) && !defined(__amd64__)
671 static void
672 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
673 {
674 struct refill_fl_cb_arg *cb_arg = arg;
675
676 cb_arg->error = error;
677 cb_arg->seg = segs[0];
678 cb_arg->nseg = nseg;
679
680 }
681 #endif
682 /**
683 * refill_fl - refill an SGE free-buffer list
684 * @sc: the controller softc
685 * @q: the free-list to refill
686 * @n: the number of new buffers to allocate
687 *
688 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
689 * The caller must assure that @n does not exceed the queue's capacity.
690 */
691 static void
692 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
693 {
694 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
695 struct rx_desc *d = &q->desc[q->pidx];
696 struct refill_fl_cb_arg cb_arg;
697 struct mbuf *m;
698 caddr_t cl;
699 int err;
700
701 cb_arg.error = 0;
702 while (n--) {
703 /*
704 * We only allocate a cluster, mbuf allocation happens after rx
705 */
706 if (q->zone == zone_pack) {
707 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
708 break;
709 cl = m->m_ext.ext_buf;
710 } else {
711 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
712 break;
713 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
714 uma_zfree(q->zone, cl);
715 break;
716 }
717 }
718 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
719 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
720 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
721 uma_zfree(q->zone, cl);
722 goto done;
723 }
724 sd->flags |= RX_SW_DESC_MAP_CREATED;
725 }
726 #if !defined(__i386__) && !defined(__amd64__)
727 err = bus_dmamap_load(q->entry_tag, sd->map,
728 cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
729
730 if (err != 0 || cb_arg.error) {
731 if (q->zone == zone_pack)
732 uma_zfree(q->zone, cl);
733 m_free(m);
734 goto done;
735 }
736 #else
737 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
738 #endif
739 sd->flags |= RX_SW_DESC_INUSE;
740 sd->rxsd_cl = cl;
741 sd->m = m;
742 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
743 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
744 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
745 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
746
747 d++;
748 sd++;
749
750 if (++q->pidx == q->size) {
751 q->pidx = 0;
752 q->gen ^= 1;
753 sd = q->sdesc;
754 d = q->desc;
755 }
756 q->credits++;
757 q->db_pending++;
758 }
759
760 done:
761 if (q->db_pending >= 32) {
762 q->db_pending = 0;
763 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
764 }
765 }
766
767
768 /**
769 * free_rx_bufs - free the Rx buffers on an SGE free list
770 * @sc: the controle softc
771 * @q: the SGE free list to clean up
772 *
773 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
774 * this queue should be stopped before calling this function.
775 */
776 static void
777 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
778 {
779 u_int cidx = q->cidx;
780
781 while (q->credits--) {
782 struct rx_sw_desc *d = &q->sdesc[cidx];
783
784 if (d->flags & RX_SW_DESC_INUSE) {
785 bus_dmamap_unload(q->entry_tag, d->map);
786 bus_dmamap_destroy(q->entry_tag, d->map);
787 if (q->zone == zone_pack) {
788 m_init(d->m, zone_pack, MCLBYTES,
789 M_NOWAIT, MT_DATA, M_EXT);
790 uma_zfree(zone_pack, d->m);
791 } else {
792 m_init(d->m, zone_mbuf, MLEN,
793 M_NOWAIT, MT_DATA, 0);
794 uma_zfree(zone_mbuf, d->m);
795 uma_zfree(q->zone, d->rxsd_cl);
796 }
797 }
798
799 d->rxsd_cl = NULL;
800 d->m = NULL;
801 if (++cidx == q->size)
802 cidx = 0;
803 }
804 }
805
806 static __inline void
807 __refill_fl(adapter_t *adap, struct sge_fl *fl)
808 {
809 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
810 }
811
812 static __inline void
813 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
814 {
815 uint32_t reclaimable = fl->size - fl->credits;
816
817 if (reclaimable > 0)
818 refill_fl(adap, fl, min(max, reclaimable));
819 }
820
821 /**
822 * recycle_rx_buf - recycle a receive buffer
823 * @adapter: the adapter
824 * @q: the SGE free list
825 * @idx: index of buffer to recycle
826 *
827 * Recycles the specified buffer on the given free list by adding it at
828 * the next available slot on the list.
829 */
830 static void
831 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
832 {
833 struct rx_desc *from = &q->desc[idx];
834 struct rx_desc *to = &q->desc[q->pidx];
835
836 q->sdesc[q->pidx] = q->sdesc[idx];
837 to->addr_lo = from->addr_lo; // already big endian
838 to->addr_hi = from->addr_hi; // likewise
839 wmb(); /* necessary ? */
840 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
841 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
842 q->credits++;
843
844 if (++q->pidx == q->size) {
845 q->pidx = 0;
846 q->gen ^= 1;
847 }
848 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
849 }
850
851 static void
852 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
853 {
854 uint32_t *addr;
855
856 addr = arg;
857 *addr = segs[0].ds_addr;
858 }
859
860 static int
861 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
862 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
863 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
864 {
865 size_t len = nelem * elem_size;
866 void *s = NULL;
867 void *p = NULL;
868 int err;
869
870 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
871 BUS_SPACE_MAXADDR_32BIT,
872 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
873 len, 0, NULL, NULL, tag)) != 0) {
874 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
875 return (ENOMEM);
876 }
877
878 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
879 map)) != 0) {
880 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
881 return (ENOMEM);
882 }
883
884 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
885 bzero(p, len);
886 *(void **)desc = p;
887
888 if (sw_size) {
889 len = nelem * sw_size;
890 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
891 *(void **)sdesc = s;
892 }
893 if (parent_entry_tag == NULL)
894 return (0);
895
896 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
897 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
898 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
899 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
900 NULL, NULL, entry_tag)) != 0) {
901 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
902 return (ENOMEM);
903 }
904 return (0);
905 }
906
907 static void
908 sge_slow_intr_handler(void *arg, int ncount)
909 {
910 adapter_t *sc = arg;
911
912 t3_slow_intr_handler(sc);
913 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
914 (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
915 }
916
917 /**
918 * sge_timer_cb - perform periodic maintenance of an SGE qset
919 * @data: the SGE queue set to maintain
920 *
921 * Runs periodically from a timer to perform maintenance of an SGE queue
922 * set. It performs two tasks:
923 *
924 * a) Cleans up any completed Tx descriptors that may still be pending.
925 * Normal descriptor cleanup happens when new packets are added to a Tx
926 * queue so this timer is relatively infrequent and does any cleanup only
927 * if the Tx queue has not seen any new packets in a while. We make a
928 * best effort attempt to reclaim descriptors, in that we don't wait
929 * around if we cannot get a queue's lock (which most likely is because
930 * someone else is queueing new packets and so will also handle the clean
931 * up). Since control queues use immediate data exclusively we don't
932 * bother cleaning them up here.
933 *
934 * b) Replenishes Rx queues that have run out due to memory shortage.
935 * Normally new Rx buffers are added when existing ones are consumed but
936 * when out of memory a queue can become empty. We try to add only a few
937 * buffers here, the queue will be replenished fully as these new buffers
938 * are used up if memory shortage has subsided.
939 *
940 * c) Return coalesced response queue credits in case a response queue is
941 * starved.
942 *
943 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell
944 * fifo overflows and the FW doesn't implement any recovery scheme yet.
945 */
946 static void
947 sge_timer_cb(void *arg)
948 {
949 adapter_t *sc = arg;
950 if ((sc->flags & USING_MSIX) == 0) {
951
952 struct port_info *pi;
953 struct sge_qset *qs;
954 struct sge_txq *txq;
955 int i, j;
956 int reclaim_ofl, refill_rx;
957
958 if (sc->open_device_map == 0)
959 return;
960
961 for (i = 0; i < sc->params.nports; i++) {
962 pi = &sc->port[i];
963 for (j = 0; j < pi->nqsets; j++) {
964 qs = &sc->sge.qs[pi->first_qset + j];
965 txq = &qs->txq[0];
966 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
967 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
968 (qs->fl[1].credits < qs->fl[1].size));
969 if (reclaim_ofl || refill_rx) {
970 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
971 break;
972 }
973 }
974 }
975 }
976
977 if (sc->params.nports > 2) {
978 int i;
979
980 for_each_port(sc, i) {
981 struct port_info *pi = &sc->port[i];
982
983 t3_write_reg(sc, A_SG_KDOORBELL,
984 F_SELEGRCNTX |
985 (FW_TUNNEL_SGEEC_START + pi->first_qset));
986 }
987 }
988 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
989 sc->open_device_map != 0)
990 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
991 }
992
993 /*
994 * This is meant to be a catch-all function to keep sge state private
995 * to sge.c
996 *
997 */
998 int
999 t3_sge_init_adapter(adapter_t *sc)
1000 {
1001 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
1002 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1003 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
1004 return (0);
1005 }
1006
1007 int
1008 t3_sge_reset_adapter(adapter_t *sc)
1009 {
1010 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1011 return (0);
1012 }
1013
1014 int
1015 t3_sge_init_port(struct port_info *pi)
1016 {
1017 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
1018 return (0);
1019 }
1020
1021 /**
1022 * refill_rspq - replenish an SGE response queue
1023 * @adapter: the adapter
1024 * @q: the response queue to replenish
1025 * @credits: how many new responses to make available
1026 *
1027 * Replenishes a response queue by making the supplied number of responses
1028 * available to HW.
1029 */
1030 static __inline void
1031 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
1032 {
1033
1034 /* mbufs are allocated on demand when a rspq entry is processed. */
1035 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
1036 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
1037 }
1038
1039 static void
1040 sge_txq_reclaim_handler(void *arg, int ncount)
1041 {
1042 struct sge_qset *qs = arg;
1043 int i;
1044
1045 for (i = 0; i < 3; i++)
1046 reclaim_completed_tx(qs, 16, i);
1047 }
1048
1049 static void
1050 sge_timer_reclaim(void *arg, int ncount)
1051 {
1052 struct port_info *pi = arg;
1053 int i, nqsets = pi->nqsets;
1054 adapter_t *sc = pi->adapter;
1055 struct sge_qset *qs;
1056 struct mtx *lock;
1057
1058 KASSERT((sc->flags & USING_MSIX) == 0,
1059 ("can't call timer reclaim for msi-x"));
1060
1061 for (i = 0; i < nqsets; i++) {
1062 qs = &sc->sge.qs[pi->first_qset + i];
1063
1064 reclaim_completed_tx(qs, 16, TXQ_OFLD);
1065 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
1066 &sc->sge.qs[0].rspq.lock;
1067
1068 if (mtx_trylock(lock)) {
1069 /* XXX currently assume that we are *NOT* polling */
1070 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
1071
1072 if (qs->fl[0].credits < qs->fl[0].size - 16)
1073 __refill_fl(sc, &qs->fl[0]);
1074 if (qs->fl[1].credits < qs->fl[1].size - 16)
1075 __refill_fl(sc, &qs->fl[1]);
1076
1077 if (status & (1 << qs->rspq.cntxt_id)) {
1078 if (qs->rspq.credits) {
1079 refill_rspq(sc, &qs->rspq, 1);
1080 qs->rspq.credits--;
1081 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
1082 1 << qs->rspq.cntxt_id);
1083 }
1084 }
1085 mtx_unlock(lock);
1086 }
1087 }
1088 }
1089
1090 /**
1091 * init_qset_cntxt - initialize an SGE queue set context info
1092 * @qs: the queue set
1093 * @id: the queue set id
1094 *
1095 * Initializes the TIDs and context ids for the queues of a queue set.
1096 */
1097 static void
1098 init_qset_cntxt(struct sge_qset *qs, u_int id)
1099 {
1100
1101 qs->rspq.cntxt_id = id;
1102 qs->fl[0].cntxt_id = 2 * id;
1103 qs->fl[1].cntxt_id = 2 * id + 1;
1104 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
1105 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
1106 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
1107 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
1108 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
1109
1110 mbufq_init(&qs->txq[TXQ_ETH].sendq);
1111 mbufq_init(&qs->txq[TXQ_OFLD].sendq);
1112 mbufq_init(&qs->txq[TXQ_CTRL].sendq);
1113 }
1114
1115
1116 static void
1117 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
1118 {
1119 txq->in_use += ndesc;
1120 /*
1121 * XXX we don't handle stopping of queue
1122 * presumably start handles this when we bump against the end
1123 */
1124 txqs->gen = txq->gen;
1125 txq->unacked += ndesc;
1126 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
1127 txq->unacked &= 31;
1128 txqs->pidx = txq->pidx;
1129 txq->pidx += ndesc;
1130 #ifdef INVARIANTS
1131 if (((txqs->pidx > txq->cidx) &&
1132 (txq->pidx < txqs->pidx) &&
1133 (txq->pidx >= txq->cidx)) ||
1134 ((txqs->pidx < txq->cidx) &&
1135 (txq->pidx >= txq-> cidx)) ||
1136 ((txqs->pidx < txq->cidx) &&
1137 (txq->cidx < txqs->pidx)))
1138 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
1139 txqs->pidx, txq->pidx, txq->cidx);
1140 #endif
1141 if (txq->pidx >= txq->size) {
1142 txq->pidx -= txq->size;
1143 txq->gen ^= 1;
1144 }
1145
1146 }
1147
1148 /**
1149 * calc_tx_descs - calculate the number of Tx descriptors for a packet
1150 * @m: the packet mbufs
1151 * @nsegs: the number of segments
1152 *
1153 * Returns the number of Tx descriptors needed for the given Ethernet
1154 * packet. Ethernet packets require addition of WR and CPL headers.
1155 */
1156 static __inline unsigned int
1157 calc_tx_descs(const struct mbuf *m, int nsegs)
1158 {
1159 unsigned int flits;
1160
1161 if (m->m_pkthdr.len <= PIO_LEN)
1162 return 1;
1163
1164 flits = sgl_len(nsegs) + 2;
1165 if (m->m_pkthdr.csum_flags & CSUM_TSO)
1166 flits++;
1167
1168 return flits_to_desc(flits);
1169 }
1170
1171 static unsigned int
1172 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
1173 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
1174 {
1175 struct mbuf *m0;
1176 int err, pktlen, pass = 0;
1177 bus_dma_tag_t tag = txq->entry_tag;
1178
1179 retry:
1180 err = 0;
1181 m0 = *m;
1182 pktlen = m0->m_pkthdr.len;
1183 #if defined(__i386__) || defined(__amd64__)
1184 if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) {
1185 goto done;
1186 } else
1187 #endif
1188 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0);
1189
1190 if (err == 0) {
1191 goto done;
1192 }
1193 if (err == EFBIG && pass == 0) {
1194 pass = 1;
1195 /* Too many segments, try to defrag */
1196 m0 = m_defrag(m0, M_DONTWAIT);
1197 if (m0 == NULL) {
1198 m_freem(*m);
1199 *m = NULL;
1200 return (ENOBUFS);
1201 }
1202 *m = m0;
1203 goto retry;
1204 } else if (err == ENOMEM) {
1205 return (err);
1206 } if (err) {
1207 if (cxgb_debug)
1208 printf("map failure err=%d pktlen=%d\n", err, pktlen);
1209 m_freem(m0);
1210 *m = NULL;
1211 return (err);
1212 }
1213 done:
1214 #if !defined(__i386__) && !defined(__amd64__)
1215 bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE);
1216 #endif
1217 txsd->flags |= TX_SW_DESC_MAPPED;
1218
1219 return (0);
1220 }
1221
1222 /**
1223 * make_sgl - populate a scatter/gather list for a packet
1224 * @sgp: the SGL to populate
1225 * @segs: the packet dma segments
1226 * @nsegs: the number of segments
1227 *
1228 * Generates a scatter/gather list for the buffers that make up a packet
1229 * and returns the SGL size in 8-byte words. The caller must size the SGL
1230 * appropriately.
1231 */
1232 static __inline void
1233 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1234 {
1235 int i, idx;
1236
1237 for (idx = 0, i = 0; i < nsegs; i++) {
1238 /*
1239 * firmware doesn't like empty segments
1240 */
1241 if (segs[i].ds_len == 0)
1242 continue;
1243 if (i && idx == 0)
1244 ++sgp;
1245
1246 sgp->len[idx] = htobe32(segs[i].ds_len);
1247 sgp->addr[idx] = htobe64(segs[i].ds_addr);
1248 idx ^= 1;
1249 }
1250
1251 if (idx) {
1252 sgp->len[idx] = 0;
1253 sgp->addr[idx] = 0;
1254 }
1255 }
1256
1257 /**
1258 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1259 * @adap: the adapter
1260 * @q: the Tx queue
1261 *
1262 * Ring the doorbell if a Tx queue is asleep. There is a natural race,
1263 * where the HW is going to sleep just after we checked, however,
1264 * then the interrupt handler will detect the outstanding TX packet
1265 * and ring the doorbell for us.
1266 *
1267 * When GTS is disabled we unconditionally ring the doorbell.
1268 */
1269 static __inline void
1270 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
1271 {
1272 #if USE_GTS
1273 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1274 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1275 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1276 #ifdef T3_TRACE
1277 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1278 q->cntxt_id);
1279 #endif
1280 t3_write_reg(adap, A_SG_KDOORBELL,
1281 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1282 }
1283 #else
1284 if (mustring || ++q->db_pending >= 32) {
1285 wmb(); /* write descriptors before telling HW */
1286 t3_write_reg(adap, A_SG_KDOORBELL,
1287 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1288 q->db_pending = 0;
1289 }
1290 #endif
1291 }
1292
1293 static __inline void
1294 wr_gen2(struct tx_desc *d, unsigned int gen)
1295 {
1296 #if SGE_NUM_GENBITS == 2
1297 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1298 #endif
1299 }
1300
1301 /**
1302 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1303 * @ndesc: number of Tx descriptors spanned by the SGL
1304 * @txd: first Tx descriptor to be written
1305 * @txqs: txq state (generation and producer index)
1306 * @txq: the SGE Tx queue
1307 * @sgl: the SGL
1308 * @flits: number of flits to the start of the SGL in the first descriptor
1309 * @sgl_flits: the SGL size in flits
1310 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1311 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1312 *
1313 * Write a work request header and an associated SGL. If the SGL is
1314 * small enough to fit into one Tx descriptor it has already been written
1315 * and we just need to write the WR header. Otherwise we distribute the
1316 * SGL across the number of descriptors it spans.
1317 */
1318 static void
1319 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1320 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1321 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1322 {
1323
1324 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1325 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1326
1327 if (__predict_true(ndesc == 1)) {
1328 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1329 V_WR_SGLSFLT(flits)) | wr_hi,
1330 htonl(V_WR_LEN(flits + sgl_flits) |
1331 V_WR_GEN(txqs->gen)) | wr_lo);
1332 /* XXX gen? */
1333 wr_gen2(txd, txqs->gen);
1334
1335 } else {
1336 unsigned int ogen = txqs->gen;
1337 const uint64_t *fp = (const uint64_t *)sgl;
1338 struct work_request_hdr *wp = wrp;
1339
1340 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1341 V_WR_SGLSFLT(flits)) | wr_hi;
1342
1343 while (sgl_flits) {
1344 unsigned int avail = WR_FLITS - flits;
1345
1346 if (avail > sgl_flits)
1347 avail = sgl_flits;
1348 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1349 sgl_flits -= avail;
1350 ndesc--;
1351 if (!sgl_flits)
1352 break;
1353
1354 fp += avail;
1355 txd++;
1356 txsd++;
1357 if (++txqs->pidx == txq->size) {
1358 txqs->pidx = 0;
1359 txqs->gen ^= 1;
1360 txd = txq->desc;
1361 txsd = txq->sdesc;
1362 }
1363
1364 /*
1365 * when the head of the mbuf chain
1366 * is freed all clusters will be freed
1367 * with it
1368 */
1369 wrp = (struct work_request_hdr *)txd;
1370 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
1371 V_WR_SGLSFLT(1)) | wr_hi;
1372 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
1373 sgl_flits + 1)) |
1374 V_WR_GEN(txqs->gen)) | wr_lo;
1375 wr_gen2(txd, txqs->gen);
1376 flits = 1;
1377 }
1378 wrp->wrh_hi |= htonl(F_WR_EOP);
1379 wmb();
1380 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1381 wr_gen2((struct tx_desc *)wp, ogen);
1382 }
1383 }
1384
1385 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
1386 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
1387
1388 #define GET_VTAG(cntrl, m) \
1389 do { \
1390 if ((m)->m_flags & M_VLANTAG) \
1391 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1392 } while (0)
1393
1394 static int
1395 t3_encap(struct sge_qset *qs, struct mbuf **m)
1396 {
1397 adapter_t *sc;
1398 struct mbuf *m0;
1399 struct sge_txq *txq;
1400 struct txq_state txqs;
1401 struct port_info *pi;
1402 unsigned int ndesc, flits, cntrl, mlen;
1403 int err, nsegs, tso_info = 0;
1404
1405 struct work_request_hdr *wrp;
1406 struct tx_sw_desc *txsd;
1407 struct sg_ent *sgp, *sgl;
1408 uint32_t wr_hi, wr_lo, sgl_flits;
1409 bus_dma_segment_t segs[TX_MAX_SEGS];
1410
1411 struct tx_desc *txd;
1412
1413 pi = qs->port;
1414 sc = pi->adapter;
1415 txq = &qs->txq[TXQ_ETH];
1416 txd = &txq->desc[txq->pidx];
1417 txsd = &txq->sdesc[txq->pidx];
1418 sgl = txq->txq_sgl;
1419
1420 prefetch(txd);
1421 m0 = *m;
1422
1423 mtx_assert(&qs->lock, MA_OWNED);
1424 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1425 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
1426
1427 if (m0->m_nextpkt == NULL && m0->m_next != NULL &&
1428 m0->m_pkthdr.csum_flags & (CSUM_TSO))
1429 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1430
1431 if (m0->m_nextpkt != NULL) {
1432 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
1433 ndesc = 1;
1434 mlen = 0;
1435 } else {
1436 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
1437 &m0, segs, &nsegs))) {
1438 if (cxgb_debug)
1439 printf("failed ... err=%d\n", err);
1440 return (err);
1441 }
1442 mlen = m0->m_pkthdr.len;
1443 ndesc = calc_tx_descs(m0, nsegs);
1444 }
1445 txq_prod(txq, ndesc, &txqs);
1446
1447 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
1448 txsd->m = m0;
1449
1450 if (m0->m_nextpkt != NULL) {
1451 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1452 int i, fidx;
1453
1454 if (nsegs > 7)
1455 panic("trying to coalesce %d packets in to one WR", nsegs);
1456 txq->txq_coalesced += nsegs;
1457 wrp = (struct work_request_hdr *)txd;
1458 flits = nsegs*2 + 1;
1459
1460 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
1461 struct cpl_tx_pkt_batch_entry *cbe;
1462 uint64_t flit;
1463 uint32_t *hflit = (uint32_t *)&flit;
1464 int cflags = m0->m_pkthdr.csum_flags;
1465
1466 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1467 GET_VTAG(cntrl, m0);
1468 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1469 if (__predict_false(!(cflags & CSUM_IP)))
1470 cntrl |= F_TXPKT_IPCSUM_DIS;
1471 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP))))
1472 cntrl |= F_TXPKT_L4CSUM_DIS;
1473
1474 hflit[0] = htonl(cntrl);
1475 hflit[1] = htonl(segs[i].ds_len | 0x80000000);
1476 flit |= htobe64(1 << 24);
1477 cbe = &cpl_batch->pkt_entry[i];
1478 cbe->cntrl = hflit[0];
1479 cbe->len = hflit[1];
1480 cbe->addr = htobe64(segs[i].ds_addr);
1481 }
1482
1483 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1484 V_WR_SGLSFLT(flits)) |
1485 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1486 wr_lo = htonl(V_WR_LEN(flits) |
1487 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1488 set_wr_hdr(wrp, wr_hi, wr_lo);
1489 wmb();
1490 ETHER_BPF_MTAP(pi->ifp, m0);
1491 wr_gen2(txd, txqs.gen);
1492 check_ring_tx_db(sc, txq, 0);
1493 return (0);
1494 } else if (tso_info) {
1495 int eth_type;
1496 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1497 struct ether_header *eh;
1498 struct ip *ip;
1499 struct tcphdr *tcp;
1500
1501 txd->flit[2] = 0;
1502 GET_VTAG(cntrl, m0);
1503 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1504 hdr->cntrl = htonl(cntrl);
1505 hdr->len = htonl(mlen | 0x80000000);
1506
1507 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
1508 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1509 m0, mlen, m0->m_pkthdr.tso_segsz,
1510 m0->m_pkthdr.csum_flags, m0->m_flags);
1511 panic("tx tso packet too small");
1512 }
1513
1514 /* Make sure that ether, ip, tcp headers are all in m0 */
1515 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1516 m0 = m_pullup(m0, TCPPKTHDRSIZE);
1517 if (__predict_false(m0 == NULL)) {
1518 /* XXX panic probably an overreaction */
1519 panic("couldn't fit header into mbuf");
1520 }
1521 }
1522
1523 eh = mtod(m0, struct ether_header *);
1524 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1525 eth_type = CPL_ETH_II_VLAN;
1526 ip = (struct ip *)((struct ether_vlan_header *)eh + 1);
1527 } else {
1528 eth_type = CPL_ETH_II;
1529 ip = (struct ip *)(eh + 1);
1530 }
1531 tcp = (struct tcphdr *)(ip + 1);
1532
1533 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1534 V_LSO_IPHDR_WORDS(ip->ip_hl) |
1535 V_LSO_TCPHDR_WORDS(tcp->th_off);
1536 hdr->lso_info = htonl(tso_info);
1537
1538 if (__predict_false(mlen <= PIO_LEN)) {
1539 /*
1540 * pkt not undersized but fits in PIO_LEN
1541 * Indicates a TSO bug at the higher levels.
1542 */
1543 txsd->m = NULL;
1544 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1545 flits = (mlen + 7) / 8 + 3;
1546 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1547 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1548 F_WR_SOP | F_WR_EOP | txqs.compl);
1549 wr_lo = htonl(V_WR_LEN(flits) |
1550 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1551 set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
1552 wmb();
1553 ETHER_BPF_MTAP(pi->ifp, m0);
1554 wr_gen2(txd, txqs.gen);
1555 check_ring_tx_db(sc, txq, 0);
1556 m_freem(m0);
1557 return (0);
1558 }
1559 flits = 3;
1560 } else {
1561 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1562
1563 GET_VTAG(cntrl, m0);
1564 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1565 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1566 cntrl |= F_TXPKT_IPCSUM_DIS;
1567 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1568 cntrl |= F_TXPKT_L4CSUM_DIS;
1569 cpl->cntrl = htonl(cntrl);
1570 cpl->len = htonl(mlen | 0x80000000);
1571
1572 if (mlen <= PIO_LEN) {
1573 txsd->m = NULL;
1574 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1575 flits = (mlen + 7) / 8 + 2;
1576
1577 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1578 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1579 F_WR_SOP | F_WR_EOP | txqs.compl);
1580 wr_lo = htonl(V_WR_LEN(flits) |
1581 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1582 set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
1583 wmb();
1584 ETHER_BPF_MTAP(pi->ifp, m0);
1585 wr_gen2(txd, txqs.gen);
1586 check_ring_tx_db(sc, txq, 0);
1587 m_freem(m0);
1588 return (0);
1589 }
1590 flits = 2;
1591 }
1592 wrp = (struct work_request_hdr *)txd;
1593 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1594 make_sgl(sgp, segs, nsegs);
1595
1596 sgl_flits = sgl_len(nsegs);
1597
1598 ETHER_BPF_MTAP(pi->ifp, m0);
1599
1600 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
1601 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1602 wr_lo = htonl(V_WR_TID(txq->token));
1603 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
1604 sgl_flits, wr_hi, wr_lo);
1605 check_ring_tx_db(sc, txq, 0);
1606
1607 return (0);
1608 }
1609
1610 void
1611 cxgb_tx_watchdog(void *arg)
1612 {
1613 struct sge_qset *qs = arg;
1614 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1615
1616 if (qs->coalescing != 0 &&
1617 (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
1618 TXQ_RING_EMPTY(qs))
1619 qs->coalescing = 0;
1620 else if (qs->coalescing == 0 &&
1621 (txq->in_use >= cxgb_tx_coalesce_enable_start))
1622 qs->coalescing = 1;
1623 if (TXQ_TRYLOCK(qs)) {
1624 qs->qs_flags |= QS_FLUSHING;
1625 cxgb_start_locked(qs);
1626 qs->qs_flags &= ~QS_FLUSHING;
1627 TXQ_UNLOCK(qs);
1628 }
1629 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
1630 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
1631 qs, txq->txq_watchdog.c_cpu);
1632 }
1633
1634 static void
1635 cxgb_tx_timeout(void *arg)
1636 {
1637 struct sge_qset *qs = arg;
1638 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1639
1640 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
1641 qs->coalescing = 1;
1642 if (TXQ_TRYLOCK(qs)) {
1643 qs->qs_flags |= QS_TIMEOUT;
1644 cxgb_start_locked(qs);
1645 qs->qs_flags &= ~QS_TIMEOUT;
1646 TXQ_UNLOCK(qs);
1647 }
1648 }
1649
1650 static void
1651 cxgb_start_locked(struct sge_qset *qs)
1652 {
1653 struct mbuf *m_head = NULL;
1654 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1655 struct port_info *pi = qs->port;
1656 struct ifnet *ifp = pi->ifp;
1657
1658 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
1659 reclaim_completed_tx(qs, 0, TXQ_ETH);
1660
1661 if (!pi->link_config.link_ok) {
1662 TXQ_RING_FLUSH(qs);
1663 return;
1664 }
1665 TXQ_LOCK_ASSERT(qs);
1666 while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
1667 pi->link_config.link_ok) {
1668 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1669
1670 if (txq->size - txq->in_use <= TX_MAX_DESC)
1671 break;
1672
1673 if ((m_head = cxgb_dequeue(qs)) == NULL)
1674 break;
1675 /*
1676 * Encapsulation can modify our pointer, and or make it
1677 * NULL on failure. In that event, we can't requeue.
1678 */
1679 if (t3_encap(qs, &m_head) || m_head == NULL)
1680 break;
1681
1682 m_head = NULL;
1683 }
1684
1685 if (txq->db_pending)
1686 check_ring_tx_db(pi->adapter, txq, 1);
1687
1688 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
1689 pi->link_config.link_ok)
1690 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1691 qs, txq->txq_timer.c_cpu);
1692 if (m_head != NULL)
1693 m_freem(m_head);
1694 }
1695
1696 static int
1697 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
1698 {
1699 struct port_info *pi = qs->port;
1700 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1701 struct buf_ring *br = txq->txq_mr;
1702 int error, avail;
1703
1704 avail = txq->size - txq->in_use;
1705 TXQ_LOCK_ASSERT(qs);
1706
1707 /*
1708 * We can only do a direct transmit if the following are true:
1709 * - we aren't coalescing (ring < 3/4 full)
1710 * - the link is up -- checked in caller
1711 * - there are no packets enqueued already
1712 * - there is space in hardware transmit queue
1713 */
1714 if (check_pkt_coalesce(qs) == 0 &&
1715 !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
1716 if (t3_encap(qs, &m)) {
1717 if (m != NULL &&
1718 (error = drbr_enqueue(ifp, br, m)) != 0)
1719 return (error);
1720 } else {
1721 if (txq->db_pending)
1722 check_ring_tx_db(pi->adapter, txq, 1);
1723
1724 /*
1725 * We've bypassed the buf ring so we need to update
1726 * the stats directly
1727 */
1728 txq->txq_direct_packets++;
1729 txq->txq_direct_bytes += m->m_pkthdr.len;
1730 }
1731 } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
1732 return (error);
1733
1734 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1735 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
1736 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
1737 cxgb_start_locked(qs);
1738 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
1739 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1740 qs, txq->txq_timer.c_cpu);
1741 return (0);
1742 }
1743
1744 int
1745 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
1746 {
1747 struct sge_qset *qs;
1748 struct port_info *pi = ifp->if_softc;
1749 int error, qidx = pi->first_qset;
1750
1751 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
1752 ||(!pi->link_config.link_ok)) {
1753 m_freem(m);
1754 return (0);
1755 }
1756
1757 if (m->m_flags & M_FLOWID)
1758 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
1759
1760 qs = &pi->adapter->sge.qs[qidx];
1761
1762 if (TXQ_TRYLOCK(qs)) {
1763 /* XXX running */
1764 error = cxgb_transmit_locked(ifp, qs, m);
1765 TXQ_UNLOCK(qs);
1766 } else
1767 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
1768 return (error);
1769 }
1770
1771 void
1772 cxgb_qflush(struct ifnet *ifp)
1773 {
1774 /*
1775 * flush any enqueued mbufs in the buf_rings
1776 * and in the transmit queues
1777 * no-op for now
1778 */
1779 return;
1780 }
1781
1782 /**
1783 * write_imm - write a packet into a Tx descriptor as immediate data
1784 * @d: the Tx descriptor to write
1785 * @m: the packet
1786 * @len: the length of packet data to write as immediate data
1787 * @gen: the generation bit value to write
1788 *
1789 * Writes a packet as immediate data into a Tx descriptor. The packet
1790 * contains a work request at its beginning. We must write the packet
1791 * carefully so the SGE doesn't read accidentally before it's written in
1792 * its entirety.
1793 */
1794 static __inline void
1795 write_imm(struct tx_desc *d, struct mbuf *m,
1796 unsigned int len, unsigned int gen)
1797 {
1798 struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1799 struct work_request_hdr *to = (struct work_request_hdr *)d;
1800 uint32_t wr_hi, wr_lo;
1801
1802 if (len > WR_LEN)
1803 panic("len too big %d\n", len);
1804 if (len < sizeof(*from))
1805 panic("len too small %d", len);
1806
1807 memcpy(&to[1], &from[1], len - sizeof(*from));
1808 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
1809 V_WR_BCNTLFLT(len & 7));
1810 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) |
1811 V_WR_LEN((len + 7) / 8));
1812 set_wr_hdr(to, wr_hi, wr_lo);
1813 wmb();
1814 wr_gen2(d, gen);
1815
1816 /*
1817 * This check is a hack we should really fix the logic so
1818 * that this can't happen
1819 */
1820 if (m->m_type != MT_DONTFREE)
1821 m_freem(m);
1822
1823 }
1824
1825 /**
1826 * check_desc_avail - check descriptor availability on a send queue
1827 * @adap: the adapter
1828 * @q: the TX queue
1829 * @m: the packet needing the descriptors
1830 * @ndesc: the number of Tx descriptors needed
1831 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1832 *
1833 * Checks if the requested number of Tx descriptors is available on an
1834 * SGE send queue. If the queue is already suspended or not enough
1835 * descriptors are available the packet is queued for later transmission.
1836 * Must be called with the Tx queue locked.
1837 *
1838 * Returns 0 if enough descriptors are available, 1 if there aren't
1839 * enough descriptors and the packet has been queued, and 2 if the caller
1840 * needs to retry because there weren't enough descriptors at the
1841 * beginning of the call but some freed up in the mean time.
1842 */
1843 static __inline int
1844 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1845 struct mbuf *m, unsigned int ndesc,
1846 unsigned int qid)
1847 {
1848 /*
1849 * XXX We currently only use this for checking the control queue
1850 * the control queue is only used for binding qsets which happens
1851 * at init time so we are guaranteed enough descriptors
1852 */
1853 if (__predict_false(!mbufq_empty(&q->sendq))) {
1854 addq_exit: mbufq_tail(&q->sendq, m);
1855 return 1;
1856 }
1857 if (__predict_false(q->size - q->in_use < ndesc)) {
1858
1859 struct sge_qset *qs = txq_to_qset(q, qid);
1860
1861 setbit(&qs->txq_stopped, qid);
1862 if (should_restart_tx(q) &&
1863 test_and_clear_bit(qid, &qs->txq_stopped))
1864 return 2;
1865
1866 q->stops++;
1867 goto addq_exit;
1868 }
1869 return 0;
1870 }
1871
1872
1873 /**
1874 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1875 * @q: the SGE control Tx queue
1876 *
1877 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1878 * that send only immediate data (presently just the control queues) and
1879 * thus do not have any mbufs
1880 */
1881 static __inline void
1882 reclaim_completed_tx_imm(struct sge_txq *q)
1883 {
1884 unsigned int reclaim = q->processed - q->cleaned;
1885
1886 q->in_use -= reclaim;
1887 q->cleaned += reclaim;
1888 }
1889
1890 static __inline int
1891 immediate(const struct mbuf *m)
1892 {
1893 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
1894 }
1895
1896 /**
1897 * ctrl_xmit - send a packet through an SGE control Tx queue
1898 * @adap: the adapter
1899 * @q: the control queue
1900 * @m: the packet
1901 *
1902 * Send a packet through an SGE control Tx queue. Packets sent through
1903 * a control queue must fit entirely as immediate data in a single Tx
1904 * descriptor and have no page fragments.
1905 */
1906 static int
1907 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
1908 {
1909 int ret;
1910 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1911 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1912
1913 if (__predict_false(!immediate(m))) {
1914 m_freem(m);
1915 return 0;
1916 }
1917
1918 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
1919 wrp->wrh_lo = htonl(V_WR_TID(q->token));
1920
1921 TXQ_LOCK(qs);
1922 again: reclaim_completed_tx_imm(q);
1923
1924 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1925 if (__predict_false(ret)) {
1926 if (ret == 1) {
1927 TXQ_UNLOCK(qs);
1928 return (ENOSPC);
1929 }
1930 goto again;
1931 }
1932 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1933
1934 q->in_use++;
1935 if (++q->pidx >= q->size) {
1936 q->pidx = 0;
1937 q->gen ^= 1;
1938 }
1939 TXQ_UNLOCK(qs);
1940 wmb();
1941 t3_write_reg(adap, A_SG_KDOORBELL,
1942 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1943 return (0);
1944 }
1945
1946
1947 /**
1948 * restart_ctrlq - restart a suspended control queue
1949 * @qs: the queue set cotaining the control queue
1950 *
1951 * Resumes transmission on a suspended Tx control queue.
1952 */
1953 static void
1954 restart_ctrlq(void *data, int npending)
1955 {
1956 struct mbuf *m;
1957 struct sge_qset *qs = (struct sge_qset *)data;
1958 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1959 adapter_t *adap = qs->port->adapter;
1960
1961 TXQ_LOCK(qs);
1962 again: reclaim_completed_tx_imm(q);
1963
1964 while (q->in_use < q->size &&
1965 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1966
1967 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1968
1969 if (++q->pidx >= q->size) {
1970 q->pidx = 0;
1971 q->gen ^= 1;
1972 }
1973 q->in_use++;
1974 }
1975 if (!mbufq_empty(&q->sendq)) {
1976 setbit(&qs->txq_stopped, TXQ_CTRL);
1977
1978 if (should_restart_tx(q) &&
1979 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1980 goto again;
1981 q->stops++;
1982 }
1983 TXQ_UNLOCK(qs);
1984 t3_write_reg(adap, A_SG_KDOORBELL,
1985 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1986 }
1987
1988
1989 /*
1990 * Send a management message through control queue 0
1991 */
1992 int
1993 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1994 {
1995 return ctrl_xmit(adap, &adap->sge.qs[0], m);
1996 }
1997
1998 /**
1999 * free_qset - free the resources of an SGE queue set
2000 * @sc: the controller owning the queue set
2001 * @q: the queue set
2002 *
2003 * Release the HW and SW resources associated with an SGE queue set, such
2004 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
2005 * queue set must be quiesced prior to calling this.
2006 */
2007 static void
2008 t3_free_qset(adapter_t *sc, struct sge_qset *q)
2009 {
2010 int i;
2011
2012 reclaim_completed_tx(q, 0, TXQ_ETH);
2013 if (q->txq[TXQ_ETH].txq_mr != NULL)
2014 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
2015 if (q->txq[TXQ_ETH].txq_ifq != NULL) {
2016 ifq_delete(q->txq[TXQ_ETH].txq_ifq);
2017 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
2018 }
2019
2020 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2021 if (q->fl[i].desc) {
2022 mtx_lock_spin(&sc->sge.reg_lock);
2023 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
2024 mtx_unlock_spin(&sc->sge.reg_lock);
2025 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
2026 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
2027 q->fl[i].desc_map);
2028 bus_dma_tag_destroy(q->fl[i].desc_tag);
2029 bus_dma_tag_destroy(q->fl[i].entry_tag);
2030 }
2031 if (q->fl[i].sdesc) {
2032 free_rx_bufs(sc, &q->fl[i]);
2033 free(q->fl[i].sdesc, M_DEVBUF);
2034 }
2035 }
2036
2037 mtx_unlock(&q->lock);
2038 MTX_DESTROY(&q->lock);
2039 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2040 if (q->txq[i].desc) {
2041 mtx_lock_spin(&sc->sge.reg_lock);
2042 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
2043 mtx_unlock_spin(&sc->sge.reg_lock);
2044 bus_dmamap_unload(q->txq[i].desc_tag,
2045 q->txq[i].desc_map);
2046 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
2047 q->txq[i].desc_map);
2048 bus_dma_tag_destroy(q->txq[i].desc_tag);
2049 bus_dma_tag_destroy(q->txq[i].entry_tag);
2050 }
2051 if (q->txq[i].sdesc) {
2052 free(q->txq[i].sdesc, M_DEVBUF);
2053 }
2054 }
2055
2056 if (q->rspq.desc) {
2057 mtx_lock_spin(&sc->sge.reg_lock);
2058 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
2059 mtx_unlock_spin(&sc->sge.reg_lock);
2060
2061 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
2062 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
2063 q->rspq.desc_map);
2064 bus_dma_tag_destroy(q->rspq.desc_tag);
2065 MTX_DESTROY(&q->rspq.lock);
2066 }
2067
2068 #ifdef INET
2069 tcp_lro_free(&q->lro.ctrl);
2070 #endif
2071
2072 bzero(q, sizeof(*q));
2073 }
2074
2075 /**
2076 * t3_free_sge_resources - free SGE resources
2077 * @sc: the adapter softc
2078 *
2079 * Frees resources used by the SGE queue sets.
2080 */
2081 void
2082 t3_free_sge_resources(adapter_t *sc, int nqsets)
2083 {
2084 int i;
2085
2086 for (i = 0; i < nqsets; ++i) {
2087 TXQ_LOCK(&sc->sge.qs[i]);
2088 t3_free_qset(sc, &sc->sge.qs[i]);
2089 }
2090 }
2091
2092 /**
2093 * t3_sge_start - enable SGE
2094 * @sc: the controller softc
2095 *
2096 * Enables the SGE for DMAs. This is the last step in starting packet
2097 * transfers.
2098 */
2099 void
2100 t3_sge_start(adapter_t *sc)
2101 {
2102 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2103 }
2104
2105 /**
2106 * t3_sge_stop - disable SGE operation
2107 * @sc: the adapter
2108 *
2109 * Disables the DMA engine. This can be called in emeregencies (e.g.,
2110 * from error interrupts) or from normal process context. In the latter
2111 * case it also disables any pending queue restart tasklets. Note that
2112 * if it is called in interrupt context it cannot disable the restart
2113 * tasklets as it cannot wait, however the tasklets will have no effect
2114 * since the doorbells are disabled and the driver will call this again
2115 * later from process context, at which time the tasklets will be stopped
2116 * if they are still running.
2117 */
2118 void
2119 t3_sge_stop(adapter_t *sc)
2120 {
2121 int i, nqsets;
2122
2123 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
2124
2125 if (sc->tq == NULL)
2126 return;
2127
2128 for (nqsets = i = 0; i < (sc)->params.nports; i++)
2129 nqsets += sc->port[i].nqsets;
2130 #ifdef notyet
2131 /*
2132 *
2133 * XXX
2134 */
2135 for (i = 0; i < nqsets; ++i) {
2136 struct sge_qset *qs = &sc->sge.qs[i];
2137
2138 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2139 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2140 }
2141 #endif
2142 }
2143
2144 /**
2145 * t3_free_tx_desc - reclaims Tx descriptors and their buffers
2146 * @adapter: the adapter
2147 * @q: the Tx queue to reclaim descriptors from
2148 * @reclaimable: the number of descriptors to reclaim
2149 * @m_vec_size: maximum number of buffers to reclaim
2150 * @desc_reclaimed: returns the number of descriptors reclaimed
2151 *
2152 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
2153 * Tx buffers. Called with the Tx queue lock held.
2154 *
2155 * Returns number of buffers of reclaimed
2156 */
2157 void
2158 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
2159 {
2160 struct tx_sw_desc *txsd;
2161 unsigned int cidx, mask;
2162 struct sge_txq *q = &qs->txq[queue];
2163
2164 #ifdef T3_TRACE
2165 T3_TRACE2(sc->tb[q->cntxt_id & 7],
2166 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
2167 #endif
2168 cidx = q->cidx;
2169 mask = q->size - 1;
2170 txsd = &q->sdesc[cidx];
2171
2172 mtx_assert(&qs->lock, MA_OWNED);
2173 while (reclaimable--) {
2174 prefetch(q->sdesc[(cidx + 1) & mask].m);
2175 prefetch(q->sdesc[(cidx + 2) & mask].m);
2176
2177 if (txsd->m != NULL) {
2178 if (txsd->flags & TX_SW_DESC_MAPPED) {
2179 bus_dmamap_unload(q->entry_tag, txsd->map);
2180 txsd->flags &= ~TX_SW_DESC_MAPPED;
2181 }
2182 m_freem_list(txsd->m);
2183 txsd->m = NULL;
2184 } else
2185 q->txq_skipped++;
2186
2187 ++txsd;
2188 if (++cidx == q->size) {
2189 cidx = 0;
2190 txsd = q->sdesc;
2191 }
2192 }
2193 q->cidx = cidx;
2194
2195 }
2196
2197 /**
2198 * is_new_response - check if a response is newly written
2199 * @r: the response descriptor
2200 * @q: the response queue
2201 *
2202 * Returns true if a response descriptor contains a yet unprocessed
2203 * response.
2204 */
2205 static __inline int
2206 is_new_response(const struct rsp_desc *r,
2207 const struct sge_rspq *q)
2208 {
2209 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2210 }
2211
2212 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2213 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2214 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2215 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2216 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2217
2218 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2219 #define NOMEM_INTR_DELAY 2500
2220
2221 /**
2222 * write_ofld_wr - write an offload work request
2223 * @adap: the adapter
2224 * @m: the packet to send
2225 * @q: the Tx queue
2226 * @pidx: index of the first Tx descriptor to write
2227 * @gen: the generation value to use
2228 * @ndesc: number of descriptors the packet will occupy
2229 *
2230 * Write an offload work request to send the supplied packet. The packet
2231 * data already carry the work request with most fields populated.
2232 */
2233 static void
2234 write_ofld_wr(adapter_t *adap, struct mbuf *m,
2235 struct sge_txq *q, unsigned int pidx,
2236 unsigned int gen, unsigned int ndesc,
2237 bus_dma_segment_t *segs, unsigned int nsegs)
2238 {
2239 unsigned int sgl_flits, flits;
2240 struct work_request_hdr *from;
2241 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
2242 struct tx_desc *d = &q->desc[pidx];
2243 struct txq_state txqs;
2244
2245 if (immediate(m) && nsegs == 0) {
2246 write_imm(d, m, m->m_len, gen);
2247 return;
2248 }
2249
2250 /* Only TX_DATA builds SGLs */
2251 from = mtod(m, struct work_request_hdr *);
2252 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
2253
2254 flits = m->m_len / 8;
2255 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
2256
2257 make_sgl(sgp, segs, nsegs);
2258 sgl_flits = sgl_len(nsegs);
2259
2260 txqs.gen = gen;
2261 txqs.pidx = pidx;
2262 txqs.compl = 0;
2263
2264 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
2265 from->wrh_hi, from->wrh_lo);
2266 }
2267
2268 /**
2269 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
2270 * @m: the packet
2271 *
2272 * Returns the number of Tx descriptors needed for the given offload
2273 * packet. These packets are already fully constructed.
2274 */
2275 static __inline unsigned int
2276 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
2277 {
2278 unsigned int flits, cnt = 0;
2279 int ndescs;
2280
2281 if (m->m_len <= WR_LEN && nsegs == 0)
2282 return (1); /* packet fits as immediate data */
2283
2284 /*
2285 * This needs to be re-visited for TOE
2286 */
2287
2288 cnt = nsegs;
2289
2290 /* headers */
2291 flits = m->m_len / 8;
2292
2293 ndescs = flits_to_desc(flits + sgl_len(cnt));
2294
2295 return (ndescs);
2296 }
2297
2298 /**
2299 * ofld_xmit - send a packet through an offload queue
2300 * @adap: the adapter
2301 * @q: the Tx offload queue
2302 * @m: the packet
2303 *
2304 * Send an offload packet through an SGE offload queue.
2305 */
2306 static int
2307 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
2308 {
2309 int ret, nsegs;
2310 unsigned int ndesc;
2311 unsigned int pidx, gen;
2312 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2313 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
2314 struct tx_sw_desc *stx;
2315
2316 nsegs = m_get_sgllen(m);
2317 vsegs = m_get_sgl(m);
2318 ndesc = calc_tx_descs_ofld(m, nsegs);
2319 busdma_map_sgl(vsegs, segs, nsegs);
2320
2321 stx = &q->sdesc[q->pidx];
2322
2323 TXQ_LOCK(qs);
2324 again: reclaim_completed_tx(qs, 16, TXQ_OFLD);
2325 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2326 if (__predict_false(ret)) {
2327 if (ret == 1) {
2328 printf("no ofld desc avail\n");
2329
2330 m_set_priority(m, ndesc); /* save for restart */
2331 TXQ_UNLOCK(qs);
2332 return (EINTR);
2333 }
2334 goto again;
2335 }
2336
2337 gen = q->gen;
2338 q->in_use += ndesc;
2339 pidx = q->pidx;
2340 q->pidx += ndesc;
2341 if (q->pidx >= q->size) {
2342 q->pidx -= q->size;
2343 q->gen ^= 1;
2344 }
2345 #ifdef T3_TRACE
2346 T3_TRACE5(adap->tb[q->cntxt_id & 7],
2347 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
2348 ndesc, pidx, skb->len, skb->len - skb->data_len,
2349 skb_shinfo(skb)->nr_frags);
2350 #endif
2351 TXQ_UNLOCK(qs);
2352
2353 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2354 check_ring_tx_db(adap, q, 1);
2355 return (0);
2356 }
2357
2358 /**
2359 * restart_offloadq - restart a suspended offload queue
2360 * @qs: the queue set cotaining the offload queue
2361 *
2362 * Resumes transmission on a suspended Tx offload queue.
2363 */
2364 static void
2365 restart_offloadq(void *data, int npending)
2366 {
2367 struct mbuf *m;
2368 struct sge_qset *qs = data;
2369 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2370 adapter_t *adap = qs->port->adapter;
2371 bus_dma_segment_t segs[TX_MAX_SEGS];
2372 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
2373 int nsegs, cleaned;
2374
2375 TXQ_LOCK(qs);
2376 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
2377
2378 while ((m = mbufq_peek(&q->sendq)) != NULL) {
2379 unsigned int gen, pidx;
2380 unsigned int ndesc = m_get_priority(m);
2381
2382 if (__predict_false(q->size - q->in_use < ndesc)) {
2383 setbit(&qs->txq_stopped, TXQ_OFLD);
2384 if (should_restart_tx(q) &&
2385 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2386 goto again;
2387 q->stops++;
2388 break;
2389 }
2390
2391 gen = q->gen;
2392 q->in_use += ndesc;
2393 pidx = q->pidx;
2394 q->pidx += ndesc;
2395 if (q->pidx >= q->size) {
2396 q->pidx -= q->size;
2397 q->gen ^= 1;
2398 }
2399
2400 (void)mbufq_dequeue(&q->sendq);
2401 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
2402 TXQ_UNLOCK(qs);
2403 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2404 TXQ_LOCK(qs);
2405 }
2406 #if USE_GTS
2407 set_bit(TXQ_RUNNING, &q->flags);
2408 set_bit(TXQ_LAST_PKT_DB, &q->flags);
2409 #endif
2410 TXQ_UNLOCK(qs);
2411 wmb();
2412 t3_write_reg(adap, A_SG_KDOORBELL,
2413 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2414 }
2415
2416 /**
2417 * queue_set - return the queue set a packet should use
2418 * @m: the packet
2419 *
2420 * Maps a packet to the SGE queue set it should use. The desired queue
2421 * set is carried in bits 1-3 in the packet's priority.
2422 */
2423 static __inline int
2424 queue_set(const struct mbuf *m)
2425 {
2426 return m_get_priority(m) >> 1;
2427 }
2428
2429 /**
2430 * is_ctrl_pkt - return whether an offload packet is a control packet
2431 * @m: the packet
2432 *
2433 * Determines whether an offload packet should use an OFLD or a CTRL
2434 * Tx queue. This is indicated by bit 0 in the packet's priority.
2435 */
2436 static __inline int
2437 is_ctrl_pkt(const struct mbuf *m)
2438 {
2439 return m_get_priority(m) & 1;
2440 }
2441
2442 /**
2443 * t3_offload_tx - send an offload packet
2444 * @tdev: the offload device to send to
2445 * @m: the packet
2446 *
2447 * Sends an offload packet. We use the packet priority to select the
2448 * appropriate Tx queue as follows: bit 0 indicates whether the packet
2449 * should be sent as regular or control, bits 1-3 select the queue set.
2450 */
2451 int
2452 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
2453 {
2454 adapter_t *adap = tdev2adap(tdev);
2455 struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2456
2457 if (__predict_false(is_ctrl_pkt(m)))
2458 return ctrl_xmit(adap, qs, m);
2459
2460 return ofld_xmit(adap, qs, m);
2461 }
2462
2463 /**
2464 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2465 * @tdev: the offload device that will be receiving the packets
2466 * @q: the SGE response queue that assembled the bundle
2467 * @m: the partial bundle
2468 * @n: the number of packets in the bundle
2469 *
2470 * Delivers a (partial) bundle of Rx offload packets to an offload device.
2471 */
2472 static __inline void
2473 deliver_partial_bundle(struct t3cdev *tdev,
2474 struct sge_rspq *q,
2475 struct mbuf *mbufs[], int n)
2476 {
2477 if (n) {
2478 q->offload_bundles++;
2479 cxgb_ofld_recv(tdev, mbufs, n);
2480 }
2481 }
2482
2483 static __inline int
2484 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
2485 struct mbuf *m, struct mbuf *rx_gather[],
2486 unsigned int gather_idx)
2487 {
2488
2489 rq->offload_pkts++;
2490 m->m_pkthdr.header = mtod(m, void *);
2491 rx_gather[gather_idx++] = m;
2492 if (gather_idx == RX_BUNDLE_SIZE) {
2493 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2494 gather_idx = 0;
2495 rq->offload_bundles++;
2496 }
2497 return (gather_idx);
2498 }
2499
2500 static void
2501 restart_tx(struct sge_qset *qs)
2502 {
2503 struct adapter *sc = qs->port->adapter;
2504
2505
2506 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2507 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2508 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2509 qs->txq[TXQ_OFLD].restarts++;
2510 DPRINTF("restarting TXQ_OFLD\n");
2511 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2512 }
2513 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
2514 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
2515 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
2516 qs->txq[TXQ_CTRL].in_use);
2517
2518 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2519 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2520 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2521 qs->txq[TXQ_CTRL].restarts++;
2522 DPRINTF("restarting TXQ_CTRL\n");
2523 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2524 }
2525 }
2526
2527 /**
2528 * t3_sge_alloc_qset - initialize an SGE queue set
2529 * @sc: the controller softc
2530 * @id: the queue set id
2531 * @nports: how many Ethernet ports will be using this queue set
2532 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2533 * @p: configuration parameters for this queue set
2534 * @ntxq: number of Tx queues for the queue set
2535 * @pi: port info for queue set
2536 *
2537 * Allocate resources and initialize an SGE queue set. A queue set
2538 * comprises a response queue, two Rx free-buffer queues, and up to 3
2539 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2540 * queue, offload queue, and control queue.
2541 */
2542 int
2543 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2544 const struct qset_params *p, int ntxq, struct port_info *pi)
2545 {
2546 struct sge_qset *q = &sc->sge.qs[id];
2547 int i, ret = 0;
2548
2549 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
2550 q->port = pi;
2551
2552 if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
2553 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
2554 device_printf(sc->dev, "failed to allocate mbuf ring\n");
2555 goto err;
2556 }
2557 if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
2558 M_NOWAIT | M_ZERO)) == NULL) {
2559 device_printf(sc->dev, "failed to allocate ifq\n");
2560 goto err;
2561 }
2562 ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);
2563 callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
2564 callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
2565 q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
2566 q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
2567
2568 init_qset_cntxt(q, id);
2569 q->idx = id;
2570 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2571 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2572 &q->fl[0].desc, &q->fl[0].sdesc,
2573 &q->fl[0].desc_tag, &q->fl[0].desc_map,
2574 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2575 printf("error %d from alloc ring fl0\n", ret);
2576 goto err;
2577 }
2578
2579 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2580 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2581 &q->fl[1].desc, &q->fl[1].sdesc,
2582 &q->fl[1].desc_tag, &q->fl[1].desc_map,
2583 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2584 printf("error %d from alloc ring fl1\n", ret);
2585 goto err;
2586 }
2587
2588 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2589 &q->rspq.phys_addr, &q->rspq.desc, NULL,
2590 &q->rspq.desc_tag, &q->rspq.desc_map,
2591 NULL, NULL)) != 0) {
2592 printf("error %d from alloc ring rspq\n", ret);
2593 goto err;
2594 }
2595
2596 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2597 device_get_unit(sc->dev), irq_vec_idx);
2598 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2599
2600 for (i = 0; i < ntxq; ++i) {
2601 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2602
2603 if ((ret = alloc_ring(sc, p->txq_size[i],
2604 sizeof(struct tx_desc), sz,
2605 &q->txq[i].phys_addr, &q->txq[i].desc,
2606 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2607 &q->txq[i].desc_map,
2608 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2609 printf("error %d from alloc ring tx %i\n", ret, i);
2610 goto err;
2611 }
2612 mbufq_init(&q->txq[i].sendq);
2613 q->txq[i].gen = 1;
2614 q->txq[i].size = p->txq_size[i];
2615 }
2616
2617 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2618 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2619 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2620 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2621
2622 q->fl[0].gen = q->fl[1].gen = 1;
2623 q->fl[0].size = p->fl_size;
2624 q->fl[1].size = p->jumbo_size;
2625
2626 q->rspq.gen = 1;
2627 q->rspq.cidx = 0;
2628 q->rspq.size = p->rspq_size;
2629
2630 q->txq[TXQ_ETH].stop_thres = nports *
2631 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2632
2633 q->fl[0].buf_size = MCLBYTES;
2634 q->fl[0].zone = zone_pack;
2635 q->fl[0].type = EXT_PACKET;
2636
2637 if (p->jumbo_buf_size == MJUM16BYTES) {
2638 q->fl[1].zone = zone_jumbo16;
2639 q->fl[1].type = EXT_JUMBO16;
2640 } else if (p->jumbo_buf_size == MJUM9BYTES) {
2641 q->fl[1].zone = zone_jumbo9;
2642 q->fl[1].type = EXT_JUMBO9;
2643 } else if (p->jumbo_buf_size == MJUMPAGESIZE) {
2644 q->fl[1].zone = zone_jumbop;
2645 q->fl[1].type = EXT_JUMBOP;
2646 } else {
2647 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
2648 ret = EDOOFUS;
2649 goto err;
2650 }
2651 q->fl[1].buf_size = p->jumbo_buf_size;
2652
2653 /* Allocate and setup the lro_ctrl structure */
2654 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
2655 #ifdef INET
2656 ret = tcp_lro_init(&q->lro.ctrl);
2657 if (ret) {
2658 printf("error %d from tcp_lro_init\n", ret);
2659 goto err;
2660 }
2661 #endif
2662 q->lro.ctrl.ifp = pi->ifp;
2663
2664 mtx_lock_spin(&sc->sge.reg_lock);
2665 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2666 q->rspq.phys_addr, q->rspq.size,
2667 q->fl[0].buf_size, 1, 0);
2668 if (ret) {
2669 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2670 goto err_unlock;
2671 }
2672
2673 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2674 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2675 q->fl[i].phys_addr, q->fl[i].size,
2676 q->fl[i].buf_size, p->cong_thres, 1,
2677 0);
2678 if (ret) {
2679 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2680 goto err_unlock;
2681 }
2682 }
2683
2684 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2685 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2686 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2687 1, 0);
2688 if (ret) {
2689 printf("error %d from t3_sge_init_ecntxt\n", ret);
2690 goto err_unlock;
2691 }
2692
2693 if (ntxq > 1) {
2694 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2695 USE_GTS, SGE_CNTXT_OFLD, id,
2696 q->txq[TXQ_OFLD].phys_addr,
2697 q->txq[TXQ_OFLD].size, 0, 1, 0);
2698 if (ret) {
2699 printf("error %d from t3_sge_init_ecntxt\n", ret);
2700 goto err_unlock;
2701 }
2702 }
2703
2704 if (ntxq > 2) {
2705 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2706 SGE_CNTXT_CTRL, id,
2707 q->txq[TXQ_CTRL].phys_addr,
2708 q->txq[TXQ_CTRL].size,
2709 q->txq[TXQ_CTRL].token, 1, 0);
2710 if (ret) {
2711 printf("error %d from t3_sge_init_ecntxt\n", ret);
2712 goto err_unlock;
2713 }
2714 }
2715
2716 mtx_unlock_spin(&sc->sge.reg_lock);
2717 t3_update_qset_coalesce(q, p);
2718 q->port = pi;
2719
2720 refill_fl(sc, &q->fl[0], q->fl[0].size);
2721 refill_fl(sc, &q->fl[1], q->fl[1].size);
2722 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2723
2724 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2725 V_NEWTIMER(q->rspq.holdoff_tmr));
2726
2727 return (0);
2728
2729 err_unlock:
2730 mtx_unlock_spin(&sc->sge.reg_lock);
2731 err:
2732 TXQ_LOCK(q);
2733 t3_free_qset(sc, q);
2734
2735 return (ret);
2736 }
2737
2738 /*
2739 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2740 * ethernet data. Hardware assistance with various checksums and any vlan tag
2741 * will also be taken into account here.
2742 */
2743 void
2744 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2745 {
2746 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2747 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2748 struct ifnet *ifp = pi->ifp;
2749
2750 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2751
2752 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2753 cpl->csum_valid && cpl->csum == 0xffff) {
2754 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2755 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2756 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2757 m->m_pkthdr.csum_data = 0xffff;
2758 }
2759
2760 if (cpl->vlan_valid) {
2761 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2762 m->m_flags |= M_VLANTAG;
2763 }
2764
2765 m->m_pkthdr.rcvif = ifp;
2766 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2767 /*
2768 * adjust after conversion to mbuf chain
2769 */
2770 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2771 m->m_len -= (sizeof(*cpl) + ethpad);
2772 m->m_data += (sizeof(*cpl) + ethpad);
2773 }
2774
2775 /**
2776 * get_packet - return the next ingress packet buffer from a free list
2777 * @adap: the adapter that received the packet
2778 * @drop_thres: # of remaining buffers before we start dropping packets
2779 * @qs: the qset that the SGE free list holding the packet belongs to
2780 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2781 * @r: response descriptor
2782 *
2783 * Get the next packet from a free list and complete setup of the
2784 * sk_buff. If the packet is small we make a copy and recycle the
2785 * original buffer, otherwise we use the original buffer itself. If a
2786 * positive drop threshold is supplied packets are dropped and their
2787 * buffers recycled if (a) the number of remaining buffers is under the
2788 * threshold and the packet is too big to copy, or (b) the packet should
2789 * be copied but there is no memory for the copy.
2790 */
2791 static int
2792 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2793 struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2794 {
2795
2796 unsigned int len_cq = ntohl(r->len_cq);
2797 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2798 int mask, cidx = fl->cidx;
2799 struct rx_sw_desc *sd = &fl->sdesc[cidx];
2800 uint32_t len = G_RSPD_LEN(len_cq);
2801 uint32_t flags = M_EXT;
2802 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
2803 caddr_t cl;
2804 struct mbuf *m;
2805 int ret = 0;
2806
2807 mask = fl->size - 1;
2808 prefetch(fl->sdesc[(cidx + 1) & mask].m);
2809 prefetch(fl->sdesc[(cidx + 2) & mask].m);
2810 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
2811 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl);
2812
2813 fl->credits--;
2814 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2815
2816 if (recycle_enable && len <= SGE_RX_COPY_THRES &&
2817 sopeop == RSPQ_SOP_EOP) {
2818 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2819 goto skip_recycle;
2820 cl = mtod(m, void *);
2821 memcpy(cl, sd->rxsd_cl, len);
2822 recycle_rx_buf(adap, fl, fl->cidx);
2823 m->m_pkthdr.len = m->m_len = len;
2824 m->m_flags = 0;
2825 mh->mh_head = mh->mh_tail = m;
2826 ret = 1;
2827 goto done;
2828 } else {
2829 skip_recycle:
2830 bus_dmamap_unload(fl->entry_tag, sd->map);
2831 cl = sd->rxsd_cl;
2832 m = sd->m;
2833
2834 if ((sopeop == RSPQ_SOP_EOP) ||
2835 (sopeop == RSPQ_SOP))
2836 flags |= M_PKTHDR;
2837 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags);
2838 if (fl->zone == zone_pack) {
2839 /*
2840 * restore clobbered data pointer
2841 */
2842 m->m_data = m->m_ext.ext_buf;
2843 } else {
2844 m_cljset(m, cl, fl->type);
2845 }
2846 m->m_len = len;
2847 }
2848 switch(sopeop) {
2849 case RSPQ_SOP_EOP:
2850 ret = 1;
2851 /* FALLTHROUGH */
2852 case RSPQ_SOP:
2853 mh->mh_head = mh->mh_tail = m;
2854 m->m_pkthdr.len = len;
2855 break;
2856 case RSPQ_EOP:
2857 ret = 1;
2858 /* FALLTHROUGH */
2859 case RSPQ_NSOP_NEOP:
2860 if (mh->mh_tail == NULL) {
2861 log(LOG_ERR, "discarding intermediate descriptor entry\n");
2862 m_freem(m);
2863 break;
2864 }
2865 mh->mh_tail->m_next = m;
2866 mh->mh_tail = m;
2867 mh->mh_head->m_pkthdr.len += len;
2868 break;
2869 }
2870 if (cxgb_debug)
2871 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
2872 done:
2873 if (++fl->cidx == fl->size)
2874 fl->cidx = 0;
2875
2876 return (ret);
2877 }
2878
2879 /**
2880 * handle_rsp_cntrl_info - handles control information in a response
2881 * @qs: the queue set corresponding to the response
2882 * @flags: the response control flags
2883 *
2884 * Handles the control information of an SGE response, such as GTS
2885 * indications and completion credits for the queue set's Tx queues.
2886 * HW coalesces credits, we don't do any extra SW coalescing.
2887 */
2888 static __inline void
2889 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2890 {
2891 unsigned int credits;
2892
2893 #if USE_GTS
2894 if (flags & F_RSPD_TXQ0_GTS)
2895 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2896 #endif
2897 credits = G_RSPD_TXQ0_CR(flags);
2898 if (credits)
2899 qs->txq[TXQ_ETH].processed += credits;
2900
2901 credits = G_RSPD_TXQ2_CR(flags);
2902 if (credits)
2903 qs->txq[TXQ_CTRL].processed += credits;
2904
2905 # if USE_GTS
2906 if (flags & F_RSPD_TXQ1_GTS)
2907 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2908 # endif
2909 credits = G_RSPD_TXQ1_CR(flags);
2910 if (credits)
2911 qs->txq[TXQ_OFLD].processed += credits;
2912
2913 }
2914
2915 static void
2916 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2917 unsigned int sleeping)
2918 {
2919 ;
2920 }
2921
2922 /**
2923 * process_responses - process responses from an SGE response queue
2924 * @adap: the adapter
2925 * @qs: the queue set to which the response queue belongs
2926 * @budget: how many responses can be processed in this round
2927 *
2928 * Process responses from an SGE response queue up to the supplied budget.
2929 * Responses include received packets as well as credits and other events
2930 * for the queues that belong to the response queue's queue set.
2931 * A negative budget is effectively unlimited.
2932 *
2933 * Additionally choose the interrupt holdoff time for the next interrupt
2934 * on this queue. If the system is under memory shortage use a fairly
2935 * long delay to help recovery.
2936 */
2937 static int
2938 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2939 {
2940 struct sge_rspq *rspq = &qs->rspq;
2941 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2942 int budget_left = budget;
2943 unsigned int sleeping = 0;
2944 int lro_enabled = qs->lro.enabled;
2945 int skip_lro;
2946 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2947 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2948 int ngathered = 0;
2949 struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
2950 #ifdef DEBUG
2951 static int last_holdoff = 0;
2952 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2953 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2954 last_holdoff = rspq->holdoff_tmr;
2955 }
2956 #endif
2957 rspq->next_holdoff = rspq->holdoff_tmr;
2958
2959 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2960 int eth, eop = 0, ethpad = 0;
2961 uint32_t flags = ntohl(r->flags);
2962 uint32_t rss_csum = *(const uint32_t *)r;
2963 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2964
2965 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2966
2967 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2968 struct mbuf *m;
2969
2970 if (cxgb_debug)
2971 printf("async notification\n");
2972
2973 if (mh->mh_head == NULL) {
2974 mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2975 m = mh->mh_head;
2976 } else {
2977 m = m_gethdr(M_DONTWAIT, MT_DATA);
2978 }
2979 if (m == NULL)
2980 goto no_mem;
2981
2982 memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2983 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2984 *mtod(m, char *) = CPL_ASYNC_NOTIF;
2985 rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
2986 eop = 1;
2987 rspq->async_notif++;
2988 goto skip;
2989 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2990 struct mbuf *m = NULL;
2991
2992 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
2993 r->rss_hdr.opcode, rspq->cidx);
2994 if (mh->mh_head == NULL)
2995 mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2996 else
2997 m = m_gethdr(M_DONTWAIT, MT_DATA);
2998
2999 if (mh->mh_head == NULL && m == NULL) {
3000 no_mem:
3001 rspq->next_holdoff = NOMEM_INTR_DELAY;
3002 budget_left--;
3003 break;
3004 }
3005 get_imm_packet(adap, r, mh->mh_head);
3006 eop = 1;
3007 rspq->imm_data++;
3008 } else if (r->len_cq) {
3009 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
3010
3011 eop = get_packet(adap, drop_thresh, qs, mh, r);
3012 if (eop) {
3013 if (r->rss_hdr.hash_type && !adap->timestamp)
3014 mh->mh_head->m_flags |= M_FLOWID;
3015 mh->mh_head->m_pkthdr.flowid = rss_hash;
3016 }
3017
3018 ethpad = 2;
3019 } else {
3020 rspq->pure_rsps++;
3021 }
3022 skip:
3023 if (flags & RSPD_CTRL_MASK) {
3024 sleeping |= flags & RSPD_GTS_MASK;
3025 handle_rsp_cntrl_info(qs, flags);
3026 }
3027
3028 r++;
3029 if (__predict_false(++rspq->cidx == rspq->size)) {
3030 rspq->cidx = 0;
3031 rspq->gen ^= 1;
3032 r = rspq->desc;
3033 }
3034
3035 if (++rspq->credits >= 64) {
3036 refill_rspq(adap, rspq, rspq->credits);
3037 rspq->credits = 0;
3038 }
3039 if (!eth && eop) {
3040 mh->mh_head->m_pkthdr.csum_data = rss_csum;
3041 /*
3042 * XXX size mismatch
3043 */
3044 m_set_priority(mh->mh_head, rss_hash);
3045
3046
3047 ngathered = rx_offload(&adap->tdev, rspq,
3048 mh->mh_head, offload_mbufs, ngathered);
3049 mh->mh_head = NULL;
3050 DPRINTF("received offload packet\n");
3051
3052 } else if (eth && eop) {
3053 struct mbuf *m = mh->mh_head;
3054
3055 t3_rx_eth(adap, rspq, m, ethpad);
3056
3057 /*
3058 * The T304 sends incoming packets on any qset. If LRO
3059 * is also enabled, we could end up sending packet up
3060 * lro_ctrl->ifp's input. That is incorrect.
3061 *
3062 * The mbuf's rcvif was derived from the cpl header and
3063 * is accurate. Skip LRO and just use that.
3064 */
3065 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
3066
3067 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
3068 #ifdef INET
3069 && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
3070 #endif
3071 ) {
3072 /* successfully queue'd for LRO */
3073 } else {
3074 /*
3075 * LRO not enabled, packet unsuitable for LRO,
3076 * or unable to queue. Pass it up right now in
3077 * either case.
3078 */
3079 struct ifnet *ifp = m->m_pkthdr.rcvif;
3080 (*ifp->if_input)(ifp, m);
3081 }
3082 mh->mh_head = NULL;
3083
3084 }
3085 __refill_fl_lt(adap, &qs->fl[0], 32);
3086 __refill_fl_lt(adap, &qs->fl[1], 32);
3087 --budget_left;
3088 }
3089
3090 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
3091
3092 #ifdef INET
3093 /* Flush LRO */
3094 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
3095 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
3096 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
3097 tcp_lro_flush(lro_ctrl, queued);
3098 }
3099 #endif
3100
3101 if (sleeping)
3102 check_ring_db(adap, qs, sleeping);
3103
3104 mb(); /* commit Tx queue processed updates */
3105 if (__predict_false(qs->txq_stopped > 1))
3106 restart_tx(qs);
3107
3108 __refill_fl_lt(adap, &qs->fl[0], 512);
3109 __refill_fl_lt(adap, &qs->fl[1], 512);
3110 budget -= budget_left;
3111 return (budget);
3112 }
3113
3114 /*
3115 * A helper function that processes responses and issues GTS.
3116 */
3117 static __inline int
3118 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
3119 {
3120 int work;
3121 static int last_holdoff = 0;
3122
3123 work = process_responses(adap, rspq_to_qset(rq), -1);
3124
3125 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
3126 printf("next_holdoff=%d\n", rq->next_holdoff);
3127 last_holdoff = rq->next_holdoff;
3128 }
3129 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
3130 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
3131
3132 return (work);
3133 }
3134
3135
3136 /*
3137 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
3138 * Handles data events from SGE response queues as well as error and other
3139 * async events as they all use the same interrupt pin. We use one SGE
3140 * response queue per port in this mode and protect all response queues with
3141 * queue 0's lock.
3142 */
3143 void
3144 t3b_intr(void *data)
3145 {
3146 uint32_t i, map;
3147 adapter_t *adap = data;
3148 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3149
3150 t3_write_reg(adap, A_PL_CLI, 0);
3151 map = t3_read_reg(adap, A_SG_DATA_INTR);
3152
3153 if (!map)
3154 return;
3155
3156 if (__predict_false(map & F_ERRINTR)) {
3157 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3158 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3159 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3160 }
3161
3162 mtx_lock(&q0->lock);
3163 for_each_port(adap, i)
3164 if (map & (1 << i))
3165 process_responses_gts(adap, &adap->sge.qs[i].rspq);
3166 mtx_unlock(&q0->lock);
3167 }
3168
3169 /*
3170 * The MSI interrupt handler. This needs to handle data events from SGE
3171 * response queues as well as error and other async events as they all use
3172 * the same MSI vector. We use one SGE response queue per port in this mode
3173 * and protect all response queues with queue 0's lock.
3174 */
3175 void
3176 t3_intr_msi(void *data)
3177 {
3178 adapter_t *adap = data;
3179 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3180 int i, new_packets = 0;
3181
3182 mtx_lock(&q0->lock);
3183
3184 for_each_port(adap, i)
3185 if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3186 new_packets = 1;
3187 mtx_unlock(&q0->lock);
3188 if (new_packets == 0) {
3189 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3190 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3191 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3192 }
3193 }
3194
3195 void
3196 t3_intr_msix(void *data)
3197 {
3198 struct sge_qset *qs = data;
3199 adapter_t *adap = qs->port->adapter;
3200 struct sge_rspq *rspq = &qs->rspq;
3201
3202 if (process_responses_gts(adap, rspq) == 0)
3203 rspq->unhandled_irqs++;
3204 }
3205
3206 #define QDUMP_SBUF_SIZE 32 * 400
3207 static int
3208 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3209 {
3210 struct sge_rspq *rspq;
3211 struct sge_qset *qs;
3212 int i, err, dump_end, idx;
3213 struct sbuf *sb;
3214 struct rsp_desc *rspd;
3215 uint32_t data[4];
3216
3217 rspq = arg1;
3218 qs = rspq_to_qset(rspq);
3219 if (rspq->rspq_dump_count == 0)
3220 return (0);
3221 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3222 log(LOG_WARNING,
3223 "dump count is too large %d\n", rspq->rspq_dump_count);
3224 rspq->rspq_dump_count = 0;
3225 return (EINVAL);
3226 }
3227 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3228 log(LOG_WARNING,
3229 "dump start of %d is greater than queue size\n",
3230 rspq->rspq_dump_start);
3231 rspq->rspq_dump_start = 0;
3232 return (EINVAL);
3233 }
3234 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3235 if (err)
3236 return (err);
3237 err = sysctl_wire_old_buffer(req, 0);
3238 if (err)
3239 return (err);
3240 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3241
3242 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3243 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3244 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3245 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3246 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3247
3248 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3249 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3250
3251 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3252 for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3253 idx = i & (RSPQ_Q_SIZE-1);
3254
3255 rspd = &rspq->desc[idx];
3256 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3257 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3258 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3259 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3260 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3261 be32toh(rspd->len_cq), rspd->intr_gen);
3262 }
3263
3264 err = sbuf_finish(sb);
3265 /* Output a trailing NUL. */
3266 if (err == 0)
3267 err = SYSCTL_OUT(req, "", 1);
3268 sbuf_delete(sb);
3269 return (err);
3270 }
3271
3272 static int
3273 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3274 {
3275 struct sge_txq *txq;
3276 struct sge_qset *qs;
3277 int i, j, err, dump_end;
3278 struct sbuf *sb;
3279 struct tx_desc *txd;
3280 uint32_t *WR, wr_hi, wr_lo, gen;
3281 uint32_t data[4];
3282
3283 txq = arg1;
3284 qs = txq_to_qset(txq, TXQ_ETH);
3285 if (txq->txq_dump_count == 0) {
3286 return (0);
3287 }
3288 if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3289 log(LOG_WARNING,
3290 "dump count is too large %d\n", txq->txq_dump_count);
3291 txq->txq_dump_count = 1;
3292 return (EINVAL);
3293 }
3294 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3295 log(LOG_WARNING,
3296 "dump start of %d is greater than queue size\n",
3297 txq->txq_dump_start);
3298 txq->txq_dump_start = 0;
3299 return (EINVAL);
3300 }
3301 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3302 if (err)
3303 return (err);
3304 err = sysctl_wire_old_buffer(req, 0);
3305 if (err)
3306 return (err);
3307 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3308
3309 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3310 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3311 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3312 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3313 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3314 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3315 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3316 txq->txq_dump_start,
3317 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3318
3319 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3320 for (i = txq->txq_dump_start; i < dump_end; i++) {
3321 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3322 WR = (uint32_t *)txd->flit;
3323 wr_hi = ntohl(WR[0]);
3324 wr_lo = ntohl(WR[1]);
3325 gen = G_WR_GEN(wr_lo);
3326
3327 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3328 wr_hi, wr_lo, gen);
3329 for (j = 2; j < 30; j += 4)
3330 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3331 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3332
3333 }
3334 err = sbuf_finish(sb);
3335 /* Output a trailing NUL. */
3336 if (err == 0)
3337 err = SYSCTL_OUT(req, "", 1);
3338 sbuf_delete(sb);
3339 return (err);
3340 }
3341
3342 static int
3343 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3344 {
3345 struct sge_txq *txq;
3346 struct sge_qset *qs;
3347 int i, j, err, dump_end;
3348 struct sbuf *sb;
3349 struct tx_desc *txd;
3350 uint32_t *WR, wr_hi, wr_lo, gen;
3351
3352 txq = arg1;
3353 qs = txq_to_qset(txq, TXQ_CTRL);
3354 if (txq->txq_dump_count == 0) {
3355 return (0);
3356 }
3357 if (txq->txq_dump_count > 256) {
3358 log(LOG_WARNING,
3359 "dump count is too large %d\n", txq->txq_dump_count);
3360 txq->txq_dump_count = 1;
3361 return (EINVAL);
3362 }
3363 if (txq->txq_dump_start > 255) {
3364 log(LOG_WARNING,
3365 "dump start of %d is greater than queue size\n",
3366 txq->txq_dump_start);
3367 txq->txq_dump_start = 0;
3368 return (EINVAL);
3369 }
3370
3371 err = sysctl_wire_old_buffer(req, 0);
3372 if (err != 0)
3373 return (err);
3374 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3375 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3376 txq->txq_dump_start,
3377 (txq->txq_dump_start + txq->txq_dump_count) & 255);
3378
3379 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3380 for (i = txq->txq_dump_start; i < dump_end; i++) {
3381 txd = &txq->desc[i & (255)];
3382 WR = (uint32_t *)txd->flit;
3383 wr_hi = ntohl(WR[0]);
3384 wr_lo = ntohl(WR[1]);
3385 gen = G_WR_GEN(wr_lo);
3386
3387 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3388 wr_hi, wr_lo, gen);
3389 for (j = 2; j < 30; j += 4)
3390 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3391 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3392
3393 }
3394 err = sbuf_finish(sb);
3395 /* Output a trailing NUL. */
3396 if (err == 0)
3397 err = SYSCTL_OUT(req, "", 1);
3398 sbuf_delete(sb);
3399 return (err);
3400 }
3401
3402 static int
3403 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3404 {
3405 adapter_t *sc = arg1;
3406 struct qset_params *qsp = &sc->params.sge.qset[0];
3407 int coalesce_usecs;
3408 struct sge_qset *qs;
3409 int i, j, err, nqsets = 0;
3410 struct mtx *lock;
3411
3412 if ((sc->flags & FULL_INIT_DONE) == 0)
3413 return (ENXIO);
3414
3415 coalesce_usecs = qsp->coalesce_usecs;
3416 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3417
3418 if (err != 0) {
3419 return (err);
3420 }
3421 if (coalesce_usecs == qsp->coalesce_usecs)
3422 return (0);
3423
3424 for (i = 0; i < sc->params.nports; i++)
3425 for (j = 0; j < sc->port[i].nqsets; j++)
3426 nqsets++;
3427
3428 coalesce_usecs = max(1, coalesce_usecs);
3429
3430 for (i = 0; i < nqsets; i++) {
3431 qs = &sc->sge.qs[i];
3432 qsp = &sc->params.sge.qset[i];
3433 qsp->coalesce_usecs = coalesce_usecs;
3434
3435 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3436 &sc->sge.qs[0].rspq.lock;
3437
3438 mtx_lock(lock);
3439 t3_update_qset_coalesce(qs, qsp);
3440 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3441 V_NEWTIMER(qs->rspq.holdoff_tmr));
3442 mtx_unlock(lock);
3443 }
3444
3445 return (0);
3446 }
3447
3448 static int
3449 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)
3450 {
3451 adapter_t *sc = arg1;
3452 int rc, timestamp;
3453
3454 if ((sc->flags & FULL_INIT_DONE) == 0)
3455 return (ENXIO);
3456
3457 timestamp = sc->timestamp;
3458 rc = sysctl_handle_int(oidp, ×tamp, arg2, req);
3459
3460 if (rc != 0)
3461 return (rc);
3462
3463 if (timestamp != sc->timestamp) {
3464 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS,
3465 timestamp ? F_ENABLERXPKTTMSTPRSS : 0);
3466 sc->timestamp = timestamp;
3467 }
3468
3469 return (0);
3470 }
3471
3472 void
3473 t3_add_attach_sysctls(adapter_t *sc)
3474 {
3475 struct sysctl_ctx_list *ctx;
3476 struct sysctl_oid_list *children;
3477
3478 ctx = device_get_sysctl_ctx(sc->dev);
3479 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3480
3481 /* random information */
3482 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3483 "firmware_version",
3484 CTLFLAG_RD, &sc->fw_version,
3485 0, "firmware version");
3486 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3487 "hw_revision",
3488 CTLFLAG_RD, &sc->params.rev,
3489 0, "chip model");
3490 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3491 "port_types",
3492 CTLFLAG_RD, &sc->port_types,
3493 0, "type of ports");
3494 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3495 "enable_debug",
3496 CTLFLAG_RW, &cxgb_debug,
3497 0, "enable verbose debugging output");
3498 SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce",
3499 CTLFLAG_RD, &sc->tunq_coalesce,
3500 "#tunneled packets freed");
3501 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3502 "txq_overrun",
3503 CTLFLAG_RD, &txq_fills,
3504 0, "#times txq overrun");
3505 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3506 "core_clock",
3507 CTLFLAG_RD, &sc->params.vpd.cclk,
3508 0, "core clock frequency (in KHz)");
3509 }
3510
3511
3512 static const char *rspq_name = "rspq";
3513 static const char *txq_names[] =
3514 {
3515 "txq_eth",
3516 "txq_ofld",
3517 "txq_ctrl"
3518 };
3519
3520 static int
3521 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3522 {
3523 struct port_info *p = arg1;
3524 uint64_t *parg;
3525
3526 if (!p)
3527 return (EINVAL);
3528
3529 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3530 PORT_LOCK(p);
3531 t3_mac_update_stats(&p->mac);
3532 PORT_UNLOCK(p);
3533
3534 return (sysctl_handle_quad(oidp, parg, 0, req));
3535 }
3536
3537 void
3538 t3_add_configured_sysctls(adapter_t *sc)
3539 {
3540 struct sysctl_ctx_list *ctx;
3541 struct sysctl_oid_list *children;
3542 int i, j;
3543
3544 ctx = device_get_sysctl_ctx(sc->dev);
3545 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3546
3547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3548 "intr_coal",
3549 CTLTYPE_INT|CTLFLAG_RW, sc,
3550 0, t3_set_coalesce_usecs,
3551 "I", "interrupt coalescing timer (us)");
3552
3553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3554 "pkt_timestamp",
3555 CTLTYPE_INT | CTLFLAG_RW, sc,
3556 0, t3_pkt_timestamp,
3557 "I", "provide packet timestamp instead of connection hash");
3558
3559 for (i = 0; i < sc->params.nports; i++) {
3560 struct port_info *pi = &sc->port[i];
3561 struct sysctl_oid *poid;
3562 struct sysctl_oid_list *poidlist;
3563 struct mac_stats *mstats = &pi->mac.stats;
3564
3565 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3566 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3567 pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3568 poidlist = SYSCTL_CHILDREN(poid);
3569 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
3570 "nqsets", CTLFLAG_RD, &pi->nqsets,
3571 0, "#queue sets");
3572
3573 for (j = 0; j < pi->nqsets; j++) {
3574 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3575 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
3576 *ctrlqpoid, *lropoid;
3577 struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
3578 *txqpoidlist, *ctrlqpoidlist,
3579 *lropoidlist;
3580 struct sge_txq *txq = &qs->txq[TXQ_ETH];
3581
3582 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3583
3584 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3585 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3586 qspoidlist = SYSCTL_CHILDREN(qspoid);
3587
3588 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
3589 CTLFLAG_RD, &qs->fl[0].empty, 0,
3590 "freelist #0 empty");
3591 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
3592 CTLFLAG_RD, &qs->fl[1].empty, 0,
3593 "freelist #1 empty");
3594
3595 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3596 rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3597 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3598
3599 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3600 txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3601 txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3602
3603 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3604 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3605 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3606
3607 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3608 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
3609 lropoidlist = SYSCTL_CHILDREN(lropoid);
3610
3611 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3612 CTLFLAG_RD, &qs->rspq.size,
3613 0, "#entries in response queue");
3614 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3615 CTLFLAG_RD, &qs->rspq.cidx,
3616 0, "consumer index");
3617 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3618 CTLFLAG_RD, &qs->rspq.credits,
3619 0, "#credits");
3620 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
3621 CTLFLAG_RD, &qs->rspq.starved,
3622 0, "#times starved");
3623 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3624 CTLFLAG_RD, &qs->rspq.phys_addr,
3625 "physical_address_of the queue");
3626 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3627 CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3628 0, "start rspq dump entry");
3629 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3630 CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3631 0, "#rspq entries to dump");
3632 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3633 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3634 0, t3_dump_rspq, "A", "dump of the response queue");
3635
3636 SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
3637 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
3638 "#tunneled packets dropped");
3639 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3640 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3641 0, "#tunneled packets waiting to be sent");
3642 #if 0
3643 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3644 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3645 0, "#tunneled packets queue producer index");
3646 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3647 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3648 0, "#tunneled packets queue consumer index");
3649 #endif
3650 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
3651 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3652 0, "#tunneled packets processed by the card");
3653 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3654 CTLFLAG_RD, &txq->cleaned,
3655 0, "#tunneled packets cleaned");
3656 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3657 CTLFLAG_RD, &txq->in_use,
3658 0, "#tunneled packet slots in use");
3659 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
3660 CTLFLAG_RD, &txq->txq_frees,
3661 "#tunneled packets freed");
3662 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3663 CTLFLAG_RD, &txq->txq_skipped,
3664 0, "#tunneled packet descriptors skipped");
3665 SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
3666 CTLFLAG_RD, &txq->txq_coalesced,
3667 "#tunneled packets coalesced");
3668 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3669 CTLFLAG_RD, &txq->txq_enqueued,
3670 0, "#tunneled packets enqueued to hardware");
3671 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3672 CTLFLAG_RD, &qs->txq_stopped,
3673 0, "tx queues stopped");
3674 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3675 CTLFLAG_RD, &txq->phys_addr,
3676 "physical_address_of the queue");
3677 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3678 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3679 0, "txq generation");
3680 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3681 CTLFLAG_RD, &txq->cidx,
3682 0, "hardware queue cidx");
3683 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3684 CTLFLAG_RD, &txq->pidx,
3685 0, "hardware queue pidx");
3686 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3687 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3688 0, "txq start idx for dump");
3689 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3690 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3691 0, "txq #entries to dump");
3692 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3693 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3694 0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3695
3696 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3697 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3698 0, "ctrlq start idx for dump");
3699 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3700 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3701 0, "ctrl #entries to dump");
3702 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3703 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3704 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3705
3706 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
3707 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3708 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3709 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3710 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3711 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3712 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3713 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3714 }
3715
3716 /* Now add a node for mac stats. */
3717 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3718 CTLFLAG_RD, NULL, "MAC statistics");
3719 poidlist = SYSCTL_CHILDREN(poid);
3720
3721 /*
3722 * We (ab)use the length argument (arg2) to pass on the offset
3723 * of the data that we are interested in. This is only required
3724 * for the quad counters that are updated from the hardware (we
3725 * make sure that we return the latest value).
3726 * sysctl_handle_macstat first updates *all* the counters from
3727 * the hardware, and then returns the latest value of the
3728 * requested counter. Best would be to update only the
3729 * requested counter from hardware, but t3_mac_update_stats()
3730 * hides all the register details and we don't want to dive into
3731 * all that here.
3732 */
3733 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3734 (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
3735 sysctl_handle_macstat, "QU", 0)
3736 CXGB_SYSCTL_ADD_QUAD(tx_octets);
3737 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3738 CXGB_SYSCTL_ADD_QUAD(tx_frames);
3739 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3740 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3741 CXGB_SYSCTL_ADD_QUAD(tx_pause);
3742 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3743 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3744 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3745 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3746 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3747 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3748 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3749 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3750 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3751 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3752 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3753 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3754 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3755 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3756 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3757 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3758 CXGB_SYSCTL_ADD_QUAD(rx_octets);
3759 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3760 CXGB_SYSCTL_ADD_QUAD(rx_frames);
3761 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3762 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3763 CXGB_SYSCTL_ADD_QUAD(rx_pause);
3764 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
3765 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3766 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3767 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3768 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3769 CXGB_SYSCTL_ADD_QUAD(rx_runt);
3770 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3771 CXGB_SYSCTL_ADD_QUAD(rx_short);
3772 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3773 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3774 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3775 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3776 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3777 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3778 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3779 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3780 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3781 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3782 #undef CXGB_SYSCTL_ADD_QUAD
3783
3784 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3785 CTLFLAG_RD, &mstats->a, 0)
3786 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3787 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3788 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3789 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3790 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3791 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3792 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3793 CXGB_SYSCTL_ADD_ULONG(num_toggled);
3794 CXGB_SYSCTL_ADD_ULONG(num_resets);
3795 CXGB_SYSCTL_ADD_ULONG(link_faults);
3796 #undef CXGB_SYSCTL_ADD_ULONG
3797 }
3798 }
3799
3800 /**
3801 * t3_get_desc - dump an SGE descriptor for debugging purposes
3802 * @qs: the queue set
3803 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3804 * @idx: the descriptor index in the queue
3805 * @data: where to dump the descriptor contents
3806 *
3807 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3808 * size of the descriptor.
3809 */
3810 int
3811 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3812 unsigned char *data)
3813 {
3814 if (qnum >= 6)
3815 return (EINVAL);
3816
3817 if (qnum < 3) {
3818 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3819 return -EINVAL;
3820 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3821 return sizeof(struct tx_desc);
3822 }
3823
3824 if (qnum == 3) {
3825 if (!qs->rspq.desc || idx >= qs->rspq.size)
3826 return (EINVAL);
3827 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3828 return sizeof(struct rsp_desc);
3829 }
3830
3831 qnum -= 4;
3832 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3833 return (EINVAL);
3834 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3835 return sizeof(struct rx_desc);
3836 }
Cache object: fdd51ed5c819d83c8c49c02e3b8470ab
|