1 /**************************************************************************
2
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD: releng/8.1/sys/dev/cxgb/cxgb_sge.c 208039 2010-05-13 17:45:32Z np $");
32
33 #include "opt_inet.h"
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/module.h>
39 #include <sys/bus.h>
40 #include <sys/conf.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/bus_dma.h>
44 #include <sys/rman.h>
45 #include <sys/queue.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
48
49 #include <sys/proc.h>
50 #include <sys/sbuf.h>
51 #include <sys/sched.h>
52 #include <sys/smp.h>
53 #include <sys/systm.h>
54 #include <sys/syslog.h>
55 #include <sys/socket.h>
56
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_vlan_var.h>
61
62 #include <netinet/in_systm.h>
63 #include <netinet/in.h>
64 #include <netinet/ip.h>
65 #include <netinet/tcp.h>
66
67 #include <dev/pci/pcireg.h>
68 #include <dev/pci/pcivar.h>
69
70 #include <vm/vm.h>
71 #include <vm/pmap.h>
72
73 #include <cxgb_include.h>
74 #include <sys/mvec.h>
75
76 int txq_fills = 0;
77 int multiq_tx_enable = 1;
78
79 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
80 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
81 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
82 SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
83 "size of per-queue mbuf ring");
84
85 static int cxgb_tx_coalesce_force = 0;
86 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force);
87 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW,
88 &cxgb_tx_coalesce_force, 0,
89 "coalesce small packets into a single work request regardless of ring state");
90
91 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1
92 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
93 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2
94 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5
95 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5
96 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2
97 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6
98
99
100 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
101 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start",
102 &cxgb_tx_coalesce_enable_start);
103 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW,
104 &cxgb_tx_coalesce_enable_start, 0,
105 "coalesce enable threshold");
106 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
107 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop);
108 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW,
109 &cxgb_tx_coalesce_enable_stop, 0,
110 "coalesce disable threshold");
111 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
112 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold);
113 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW,
114 &cxgb_tx_reclaim_threshold, 0,
115 "tx cleaning minimum threshold");
116
117 /*
118 * XXX don't re-enable this until TOE stops assuming
119 * we have an m_ext
120 */
121 static int recycle_enable = 0;
122
123 extern int cxgb_use_16k_clusters;
124 extern int nmbjumbop;
125 extern int nmbjumbo9;
126 extern int nmbjumbo16;
127
128 #define USE_GTS 0
129
130 #define SGE_RX_SM_BUF_SIZE 1536
131 #define SGE_RX_DROP_THRES 16
132 #define SGE_RX_COPY_THRES 128
133
134 /*
135 * Period of the Tx buffer reclaim timer. This timer does not need to run
136 * frequently as Tx buffers are usually reclaimed by new Tx packets.
137 */
138 #define TX_RECLAIM_PERIOD (hz >> 1)
139
140 /*
141 * Values for sge_txq.flags
142 */
143 enum {
144 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
145 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
146 };
147
148 struct tx_desc {
149 uint64_t flit[TX_DESC_FLITS];
150 } __packed;
151
152 struct rx_desc {
153 uint32_t addr_lo;
154 uint32_t len_gen;
155 uint32_t gen2;
156 uint32_t addr_hi;
157 } __packed;
158
159 struct rsp_desc { /* response queue descriptor */
160 struct rss_header rss_hdr;
161 uint32_t flags;
162 uint32_t len_cq;
163 uint8_t imm_data[47];
164 uint8_t intr_gen;
165 } __packed;
166
167 #define RX_SW_DESC_MAP_CREATED (1 << 0)
168 #define TX_SW_DESC_MAP_CREATED (1 << 1)
169 #define RX_SW_DESC_INUSE (1 << 3)
170 #define TX_SW_DESC_MAPPED (1 << 4)
171
172 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
173 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
174 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
175 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
176
177 struct tx_sw_desc { /* SW state per Tx descriptor */
178 struct mbuf *m;
179 bus_dmamap_t map;
180 int flags;
181 };
182
183 struct rx_sw_desc { /* SW state per Rx descriptor */
184 caddr_t rxsd_cl;
185 struct mbuf *m;
186 bus_dmamap_t map;
187 int flags;
188 };
189
190 struct txq_state {
191 unsigned int compl;
192 unsigned int gen;
193 unsigned int pidx;
194 };
195
196 struct refill_fl_cb_arg {
197 int error;
198 bus_dma_segment_t seg;
199 int nseg;
200 };
201
202
203 /*
204 * Maps a number of flits to the number of Tx descriptors that can hold them.
205 * The formula is
206 *
207 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
208 *
209 * HW allows up to 4 descriptors to be combined into a WR.
210 */
211 static uint8_t flit_desc_map[] = {
212 0,
213 #if SGE_NUM_GENBITS == 1
214 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
215 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
216 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
217 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
218 #elif SGE_NUM_GENBITS == 2
219 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
220 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
221 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
222 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
223 #else
224 # error "SGE_NUM_GENBITS must be 1 or 2"
225 #endif
226 };
227
228 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED)
229 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock)
230 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock)
231 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock)
232 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
233 #define TXQ_RING_NEEDS_ENQUEUE(qs) \
234 drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
235 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
236 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \
237 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
238 #define TXQ_RING_DEQUEUE(qs) \
239 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
240
241 int cxgb_debug = 0;
242
243 static void sge_timer_cb(void *arg);
244 static void sge_timer_reclaim(void *arg, int ncount);
245 static void sge_txq_reclaim_handler(void *arg, int ncount);
246 static void cxgb_start_locked(struct sge_qset *qs);
247
248 /*
249 * XXX need to cope with bursty scheduling by looking at a wider
250 * window than we are now for determining the need for coalescing
251 *
252 */
253 static __inline uint64_t
254 check_pkt_coalesce(struct sge_qset *qs)
255 {
256 struct adapter *sc;
257 struct sge_txq *txq;
258 uint8_t *fill;
259
260 if (__predict_false(cxgb_tx_coalesce_force))
261 return (1);
262 txq = &qs->txq[TXQ_ETH];
263 sc = qs->port->adapter;
264 fill = &sc->tunq_fill[qs->idx];
265
266 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
267 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
268 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
269 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
270 /*
271 * if the hardware transmit queue is more than 1/8 full
272 * we mark it as coalescing - we drop back from coalescing
273 * when we go below 1/32 full and there are no packets enqueued,
274 * this provides us with some degree of hysteresis
275 */
276 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
277 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
278 *fill = 0;
279 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
280 *fill = 1;
281
282 return (sc->tunq_coalesce);
283 }
284
285 #ifdef __LP64__
286 static void
287 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
288 {
289 uint64_t wr_hilo;
290 #if _BYTE_ORDER == _LITTLE_ENDIAN
291 wr_hilo = wr_hi;
292 wr_hilo |= (((uint64_t)wr_lo)<<32);
293 #else
294 wr_hilo = wr_lo;
295 wr_hilo |= (((uint64_t)wr_hi)<<32);
296 #endif
297 wrp->wrh_hilo = wr_hilo;
298 }
299 #else
300 static void
301 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
302 {
303
304 wrp->wrh_hi = wr_hi;
305 wmb();
306 wrp->wrh_lo = wr_lo;
307 }
308 #endif
309
310 struct coalesce_info {
311 int count;
312 int nbytes;
313 };
314
315 static int
316 coalesce_check(struct mbuf *m, void *arg)
317 {
318 struct coalesce_info *ci = arg;
319 int *count = &ci->count;
320 int *nbytes = &ci->nbytes;
321
322 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
323 (*count < 7) && (m->m_next == NULL))) {
324 *count += 1;
325 *nbytes += m->m_len;
326 return (1);
327 }
328 return (0);
329 }
330
331 static struct mbuf *
332 cxgb_dequeue(struct sge_qset *qs)
333 {
334 struct mbuf *m, *m_head, *m_tail;
335 struct coalesce_info ci;
336
337
338 if (check_pkt_coalesce(qs) == 0)
339 return TXQ_RING_DEQUEUE(qs);
340
341 m_head = m_tail = NULL;
342 ci.count = ci.nbytes = 0;
343 do {
344 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
345 if (m_head == NULL) {
346 m_tail = m_head = m;
347 } else if (m != NULL) {
348 m_tail->m_nextpkt = m;
349 m_tail = m;
350 }
351 } while (m != NULL);
352 if (ci.count > 7)
353 panic("trying to coalesce %d packets in to one WR", ci.count);
354 return (m_head);
355 }
356
357 /**
358 * reclaim_completed_tx - reclaims completed Tx descriptors
359 * @adapter: the adapter
360 * @q: the Tx queue to reclaim completed descriptors from
361 *
362 * Reclaims Tx descriptors that the SGE has indicated it has processed,
363 * and frees the associated buffers if possible. Called with the Tx
364 * queue's lock held.
365 */
366 static __inline int
367 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
368 {
369 struct sge_txq *q = &qs->txq[queue];
370 int reclaim = desc_reclaimable(q);
371
372 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
373 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
374 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
375
376 if (reclaim < reclaim_min)
377 return (0);
378
379 mtx_assert(&qs->lock, MA_OWNED);
380 if (reclaim > 0) {
381 t3_free_tx_desc(qs, reclaim, queue);
382 q->cleaned += reclaim;
383 q->in_use -= reclaim;
384 }
385 if (isset(&qs->txq_stopped, TXQ_ETH))
386 clrbit(&qs->txq_stopped, TXQ_ETH);
387
388 return (reclaim);
389 }
390
391 /**
392 * should_restart_tx - are there enough resources to restart a Tx queue?
393 * @q: the Tx queue
394 *
395 * Checks if there are enough descriptors to restart a suspended Tx queue.
396 */
397 static __inline int
398 should_restart_tx(const struct sge_txq *q)
399 {
400 unsigned int r = q->processed - q->cleaned;
401
402 return q->in_use - r < (q->size >> 1);
403 }
404
405 /**
406 * t3_sge_init - initialize SGE
407 * @adap: the adapter
408 * @p: the SGE parameters
409 *
410 * Performs SGE initialization needed every time after a chip reset.
411 * We do not initialize any of the queue sets here, instead the driver
412 * top-level must request those individually. We also do not enable DMA
413 * here, that should be done after the queues have been set up.
414 */
415 void
416 t3_sge_init(adapter_t *adap, struct sge_params *p)
417 {
418 u_int ctrl, ups;
419
420 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
421
422 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
423 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
424 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
425 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
426 #if SGE_NUM_GENBITS == 1
427 ctrl |= F_EGRGENCTRL;
428 #endif
429 if (adap->params.rev > 0) {
430 if (!(adap->flags & (USING_MSIX | USING_MSI)))
431 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
432 }
433 t3_write_reg(adap, A_SG_CONTROL, ctrl);
434 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
435 V_LORCQDRBTHRSH(512));
436 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
437 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
438 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
439 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
440 adap->params.rev < T3_REV_C ? 1000 : 500);
441 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
442 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
443 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
444 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
445 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
446 }
447
448
449 /**
450 * sgl_len - calculates the size of an SGL of the given capacity
451 * @n: the number of SGL entries
452 *
453 * Calculates the number of flits needed for a scatter/gather list that
454 * can hold the given number of entries.
455 */
456 static __inline unsigned int
457 sgl_len(unsigned int n)
458 {
459 return ((3 * n) / 2 + (n & 1));
460 }
461
462 /**
463 * get_imm_packet - return the next ingress packet buffer from a response
464 * @resp: the response descriptor containing the packet data
465 *
466 * Return a packet containing the immediate data of the given response.
467 */
468 static int
469 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
470 {
471
472 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
473 m->m_ext.ext_buf = NULL;
474 m->m_ext.ext_type = 0;
475 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
476 return (0);
477 }
478
479 static __inline u_int
480 flits_to_desc(u_int n)
481 {
482 return (flit_desc_map[n]);
483 }
484
485 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
486 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
487 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
488 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
489 F_HIRCQPARITYERROR)
490 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
491 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
492 F_RSPQDISABLED)
493
494 /**
495 * t3_sge_err_intr_handler - SGE async event interrupt handler
496 * @adapter: the adapter
497 *
498 * Interrupt handler for SGE asynchronous (non-data) events.
499 */
500 void
501 t3_sge_err_intr_handler(adapter_t *adapter)
502 {
503 unsigned int v, status;
504
505 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
506 if (status & SGE_PARERR)
507 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
508 status & SGE_PARERR);
509 if (status & SGE_FRAMINGERR)
510 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
511 status & SGE_FRAMINGERR);
512 if (status & F_RSPQCREDITOVERFOW)
513 CH_ALERT(adapter, "SGE response queue credit overflow\n");
514
515 if (status & F_RSPQDISABLED) {
516 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
517
518 CH_ALERT(adapter,
519 "packet delivered to disabled response queue (0x%x)\n",
520 (v >> S_RSPQ0DISABLED) & 0xff);
521 }
522
523 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
524 if (status & SGE_FATALERR)
525 t3_fatal_err(adapter);
526 }
527
528 void
529 t3_sge_prep(adapter_t *adap, struct sge_params *p)
530 {
531 int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
532
533 nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
534 nqsets *= adap->params.nports;
535
536 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
537
538 while (!powerof2(fl_q_size))
539 fl_q_size--;
540
541 use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
542 is_offload(adap);
543
544 #if __FreeBSD_version >= 700111
545 if (use_16k) {
546 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
547 jumbo_buf_size = MJUM16BYTES;
548 } else {
549 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
550 jumbo_buf_size = MJUM9BYTES;
551 }
552 #else
553 jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE);
554 jumbo_buf_size = MJUMPAGESIZE;
555 #endif
556 while (!powerof2(jumbo_q_size))
557 jumbo_q_size--;
558
559 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
560 device_printf(adap->dev,
561 "Insufficient clusters and/or jumbo buffers.\n");
562
563 p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
564
565 for (i = 0; i < SGE_QSETS; ++i) {
566 struct qset_params *q = p->qset + i;
567
568 if (adap->params.nports > 2) {
569 q->coalesce_usecs = 50;
570 } else {
571 #ifdef INVARIANTS
572 q->coalesce_usecs = 10;
573 #else
574 q->coalesce_usecs = 5;
575 #endif
576 }
577 q->polling = 0;
578 q->rspq_size = RSPQ_Q_SIZE;
579 q->fl_size = fl_q_size;
580 q->jumbo_size = jumbo_q_size;
581 q->jumbo_buf_size = jumbo_buf_size;
582 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
583 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
584 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
585 q->cong_thres = 0;
586 }
587 }
588
589 int
590 t3_sge_alloc(adapter_t *sc)
591 {
592
593 /* The parent tag. */
594 if (bus_dma_tag_create( NULL, /* parent */
595 1, 0, /* algnmnt, boundary */
596 BUS_SPACE_MAXADDR, /* lowaddr */
597 BUS_SPACE_MAXADDR, /* highaddr */
598 NULL, NULL, /* filter, filterarg */
599 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
600 BUS_SPACE_UNRESTRICTED, /* nsegments */
601 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
602 0, /* flags */
603 NULL, NULL, /* lock, lockarg */
604 &sc->parent_dmat)) {
605 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
606 return (ENOMEM);
607 }
608
609 /*
610 * DMA tag for normal sized RX frames
611 */
612 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
613 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
614 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
615 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
616 return (ENOMEM);
617 }
618
619 /*
620 * DMA tag for jumbo sized RX frames.
621 */
622 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
623 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
624 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
625 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
626 return (ENOMEM);
627 }
628
629 /*
630 * DMA tag for TX frames.
631 */
632 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
633 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
634 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
635 NULL, NULL, &sc->tx_dmat)) {
636 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
637 return (ENOMEM);
638 }
639
640 return (0);
641 }
642
643 int
644 t3_sge_free(struct adapter * sc)
645 {
646
647 if (sc->tx_dmat != NULL)
648 bus_dma_tag_destroy(sc->tx_dmat);
649
650 if (sc->rx_jumbo_dmat != NULL)
651 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
652
653 if (sc->rx_dmat != NULL)
654 bus_dma_tag_destroy(sc->rx_dmat);
655
656 if (sc->parent_dmat != NULL)
657 bus_dma_tag_destroy(sc->parent_dmat);
658
659 return (0);
660 }
661
662 void
663 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
664 {
665
666 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
667 qs->rspq.polling = 0 /* p->polling */;
668 }
669
670 #if !defined(__i386__) && !defined(__amd64__)
671 static void
672 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
673 {
674 struct refill_fl_cb_arg *cb_arg = arg;
675
676 cb_arg->error = error;
677 cb_arg->seg = segs[0];
678 cb_arg->nseg = nseg;
679
680 }
681 #endif
682 /**
683 * refill_fl - refill an SGE free-buffer list
684 * @sc: the controller softc
685 * @q: the free-list to refill
686 * @n: the number of new buffers to allocate
687 *
688 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
689 * The caller must assure that @n does not exceed the queue's capacity.
690 */
691 static void
692 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
693 {
694 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
695 struct rx_desc *d = &q->desc[q->pidx];
696 struct refill_fl_cb_arg cb_arg;
697 struct mbuf *m;
698 caddr_t cl;
699 int err;
700
701 cb_arg.error = 0;
702 while (n--) {
703 /*
704 * We only allocate a cluster, mbuf allocation happens after rx
705 */
706 if (q->zone == zone_pack) {
707 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
708 break;
709 cl = m->m_ext.ext_buf;
710 } else {
711 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
712 break;
713 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
714 uma_zfree(q->zone, cl);
715 break;
716 }
717 }
718 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
719 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
720 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
721 uma_zfree(q->zone, cl);
722 goto done;
723 }
724 sd->flags |= RX_SW_DESC_MAP_CREATED;
725 }
726 #if !defined(__i386__) && !defined(__amd64__)
727 err = bus_dmamap_load(q->entry_tag, sd->map,
728 cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
729
730 if (err != 0 || cb_arg.error) {
731 if (q->zone == zone_pack)
732 uma_zfree(q->zone, cl);
733 m_free(m);
734 goto done;
735 }
736 #else
737 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
738 #endif
739 sd->flags |= RX_SW_DESC_INUSE;
740 sd->rxsd_cl = cl;
741 sd->m = m;
742 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
743 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
744 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
745 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
746
747 d++;
748 sd++;
749
750 if (++q->pidx == q->size) {
751 q->pidx = 0;
752 q->gen ^= 1;
753 sd = q->sdesc;
754 d = q->desc;
755 }
756 q->credits++;
757 q->db_pending++;
758 }
759
760 done:
761 if (q->db_pending >= 32) {
762 q->db_pending = 0;
763 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
764 }
765 }
766
767
768 /**
769 * free_rx_bufs - free the Rx buffers on an SGE free list
770 * @sc: the controle softc
771 * @q: the SGE free list to clean up
772 *
773 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
774 * this queue should be stopped before calling this function.
775 */
776 static void
777 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
778 {
779 u_int cidx = q->cidx;
780
781 while (q->credits--) {
782 struct rx_sw_desc *d = &q->sdesc[cidx];
783
784 if (d->flags & RX_SW_DESC_INUSE) {
785 bus_dmamap_unload(q->entry_tag, d->map);
786 bus_dmamap_destroy(q->entry_tag, d->map);
787 if (q->zone == zone_pack) {
788 m_init(d->m, zone_pack, MCLBYTES,
789 M_NOWAIT, MT_DATA, M_EXT);
790 uma_zfree(zone_pack, d->m);
791 } else {
792 m_init(d->m, zone_mbuf, MLEN,
793 M_NOWAIT, MT_DATA, 0);
794 uma_zfree(zone_mbuf, d->m);
795 uma_zfree(q->zone, d->rxsd_cl);
796 }
797 }
798
799 d->rxsd_cl = NULL;
800 d->m = NULL;
801 if (++cidx == q->size)
802 cidx = 0;
803 }
804 }
805
806 static __inline void
807 __refill_fl(adapter_t *adap, struct sge_fl *fl)
808 {
809 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
810 }
811
812 static __inline void
813 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
814 {
815 uint32_t reclaimable = fl->size - fl->credits;
816
817 if (reclaimable > 0)
818 refill_fl(adap, fl, min(max, reclaimable));
819 }
820
821 /**
822 * recycle_rx_buf - recycle a receive buffer
823 * @adapter: the adapter
824 * @q: the SGE free list
825 * @idx: index of buffer to recycle
826 *
827 * Recycles the specified buffer on the given free list by adding it at
828 * the next available slot on the list.
829 */
830 static void
831 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
832 {
833 struct rx_desc *from = &q->desc[idx];
834 struct rx_desc *to = &q->desc[q->pidx];
835
836 q->sdesc[q->pidx] = q->sdesc[idx];
837 to->addr_lo = from->addr_lo; // already big endian
838 to->addr_hi = from->addr_hi; // likewise
839 wmb(); /* necessary ? */
840 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
841 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
842 q->credits++;
843
844 if (++q->pidx == q->size) {
845 q->pidx = 0;
846 q->gen ^= 1;
847 }
848 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
849 }
850
851 static void
852 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
853 {
854 uint32_t *addr;
855
856 addr = arg;
857 *addr = segs[0].ds_addr;
858 }
859
860 static int
861 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
862 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
863 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
864 {
865 size_t len = nelem * elem_size;
866 void *s = NULL;
867 void *p = NULL;
868 int err;
869
870 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
871 BUS_SPACE_MAXADDR_32BIT,
872 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
873 len, 0, NULL, NULL, tag)) != 0) {
874 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
875 return (ENOMEM);
876 }
877
878 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
879 map)) != 0) {
880 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
881 return (ENOMEM);
882 }
883
884 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
885 bzero(p, len);
886 *(void **)desc = p;
887
888 if (sw_size) {
889 len = nelem * sw_size;
890 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
891 *(void **)sdesc = s;
892 }
893 if (parent_entry_tag == NULL)
894 return (0);
895
896 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
897 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
898 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
899 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
900 NULL, NULL, entry_tag)) != 0) {
901 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
902 return (ENOMEM);
903 }
904 return (0);
905 }
906
907 static void
908 sge_slow_intr_handler(void *arg, int ncount)
909 {
910 adapter_t *sc = arg;
911
912 t3_slow_intr_handler(sc);
913 }
914
915 /**
916 * sge_timer_cb - perform periodic maintenance of an SGE qset
917 * @data: the SGE queue set to maintain
918 *
919 * Runs periodically from a timer to perform maintenance of an SGE queue
920 * set. It performs two tasks:
921 *
922 * a) Cleans up any completed Tx descriptors that may still be pending.
923 * Normal descriptor cleanup happens when new packets are added to a Tx
924 * queue so this timer is relatively infrequent and does any cleanup only
925 * if the Tx queue has not seen any new packets in a while. We make a
926 * best effort attempt to reclaim descriptors, in that we don't wait
927 * around if we cannot get a queue's lock (which most likely is because
928 * someone else is queueing new packets and so will also handle the clean
929 * up). Since control queues use immediate data exclusively we don't
930 * bother cleaning them up here.
931 *
932 * b) Replenishes Rx queues that have run out due to memory shortage.
933 * Normally new Rx buffers are added when existing ones are consumed but
934 * when out of memory a queue can become empty. We try to add only a few
935 * buffers here, the queue will be replenished fully as these new buffers
936 * are used up if memory shortage has subsided.
937 *
938 * c) Return coalesced response queue credits in case a response queue is
939 * starved.
940 *
941 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell
942 * fifo overflows and the FW doesn't implement any recovery scheme yet.
943 */
944 static void
945 sge_timer_cb(void *arg)
946 {
947 adapter_t *sc = arg;
948 if ((sc->flags & USING_MSIX) == 0) {
949
950 struct port_info *pi;
951 struct sge_qset *qs;
952 struct sge_txq *txq;
953 int i, j;
954 int reclaim_ofl, refill_rx;
955
956 if (sc->open_device_map == 0)
957 return;
958
959 for (i = 0; i < sc->params.nports; i++) {
960 pi = &sc->port[i];
961 for (j = 0; j < pi->nqsets; j++) {
962 qs = &sc->sge.qs[pi->first_qset + j];
963 txq = &qs->txq[0];
964 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
965 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
966 (qs->fl[1].credits < qs->fl[1].size));
967 if (reclaim_ofl || refill_rx) {
968 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
969 break;
970 }
971 }
972 }
973 }
974
975 if (sc->params.nports > 2) {
976 int i;
977
978 for_each_port(sc, i) {
979 struct port_info *pi = &sc->port[i];
980
981 t3_write_reg(sc, A_SG_KDOORBELL,
982 F_SELEGRCNTX |
983 (FW_TUNNEL_SGEEC_START + pi->first_qset));
984 }
985 }
986 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
987 sc->open_device_map != 0)
988 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
989 }
990
991 /*
992 * This is meant to be a catch-all function to keep sge state private
993 * to sge.c
994 *
995 */
996 int
997 t3_sge_init_adapter(adapter_t *sc)
998 {
999 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
1000 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1001 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
1002 return (0);
1003 }
1004
1005 int
1006 t3_sge_reset_adapter(adapter_t *sc)
1007 {
1008 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1009 return (0);
1010 }
1011
1012 int
1013 t3_sge_init_port(struct port_info *pi)
1014 {
1015 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
1016 return (0);
1017 }
1018
1019 /**
1020 * refill_rspq - replenish an SGE response queue
1021 * @adapter: the adapter
1022 * @q: the response queue to replenish
1023 * @credits: how many new responses to make available
1024 *
1025 * Replenishes a response queue by making the supplied number of responses
1026 * available to HW.
1027 */
1028 static __inline void
1029 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
1030 {
1031
1032 /* mbufs are allocated on demand when a rspq entry is processed. */
1033 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
1034 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
1035 }
1036
1037 static void
1038 sge_txq_reclaim_handler(void *arg, int ncount)
1039 {
1040 struct sge_qset *qs = arg;
1041 int i;
1042
1043 for (i = 0; i < 3; i++)
1044 reclaim_completed_tx(qs, 16, i);
1045 }
1046
1047 static void
1048 sge_timer_reclaim(void *arg, int ncount)
1049 {
1050 struct port_info *pi = arg;
1051 int i, nqsets = pi->nqsets;
1052 adapter_t *sc = pi->adapter;
1053 struct sge_qset *qs;
1054 struct mtx *lock;
1055
1056 KASSERT((sc->flags & USING_MSIX) == 0,
1057 ("can't call timer reclaim for msi-x"));
1058
1059 for (i = 0; i < nqsets; i++) {
1060 qs = &sc->sge.qs[pi->first_qset + i];
1061
1062 reclaim_completed_tx(qs, 16, TXQ_OFLD);
1063 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
1064 &sc->sge.qs[0].rspq.lock;
1065
1066 if (mtx_trylock(lock)) {
1067 /* XXX currently assume that we are *NOT* polling */
1068 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
1069
1070 if (qs->fl[0].credits < qs->fl[0].size - 16)
1071 __refill_fl(sc, &qs->fl[0]);
1072 if (qs->fl[1].credits < qs->fl[1].size - 16)
1073 __refill_fl(sc, &qs->fl[1]);
1074
1075 if (status & (1 << qs->rspq.cntxt_id)) {
1076 if (qs->rspq.credits) {
1077 refill_rspq(sc, &qs->rspq, 1);
1078 qs->rspq.credits--;
1079 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
1080 1 << qs->rspq.cntxt_id);
1081 }
1082 }
1083 mtx_unlock(lock);
1084 }
1085 }
1086 }
1087
1088 /**
1089 * init_qset_cntxt - initialize an SGE queue set context info
1090 * @qs: the queue set
1091 * @id: the queue set id
1092 *
1093 * Initializes the TIDs and context ids for the queues of a queue set.
1094 */
1095 static void
1096 init_qset_cntxt(struct sge_qset *qs, u_int id)
1097 {
1098
1099 qs->rspq.cntxt_id = id;
1100 qs->fl[0].cntxt_id = 2 * id;
1101 qs->fl[1].cntxt_id = 2 * id + 1;
1102 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
1103 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
1104 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
1105 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
1106 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
1107
1108 mbufq_init(&qs->txq[TXQ_ETH].sendq);
1109 mbufq_init(&qs->txq[TXQ_OFLD].sendq);
1110 mbufq_init(&qs->txq[TXQ_CTRL].sendq);
1111 }
1112
1113
1114 static void
1115 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
1116 {
1117 txq->in_use += ndesc;
1118 /*
1119 * XXX we don't handle stopping of queue
1120 * presumably start handles this when we bump against the end
1121 */
1122 txqs->gen = txq->gen;
1123 txq->unacked += ndesc;
1124 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
1125 txq->unacked &= 31;
1126 txqs->pidx = txq->pidx;
1127 txq->pidx += ndesc;
1128 #ifdef INVARIANTS
1129 if (((txqs->pidx > txq->cidx) &&
1130 (txq->pidx < txqs->pidx) &&
1131 (txq->pidx >= txq->cidx)) ||
1132 ((txqs->pidx < txq->cidx) &&
1133 (txq->pidx >= txq-> cidx)) ||
1134 ((txqs->pidx < txq->cidx) &&
1135 (txq->cidx < txqs->pidx)))
1136 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
1137 txqs->pidx, txq->pidx, txq->cidx);
1138 #endif
1139 if (txq->pidx >= txq->size) {
1140 txq->pidx -= txq->size;
1141 txq->gen ^= 1;
1142 }
1143
1144 }
1145
1146 /**
1147 * calc_tx_descs - calculate the number of Tx descriptors for a packet
1148 * @m: the packet mbufs
1149 * @nsegs: the number of segments
1150 *
1151 * Returns the number of Tx descriptors needed for the given Ethernet
1152 * packet. Ethernet packets require addition of WR and CPL headers.
1153 */
1154 static __inline unsigned int
1155 calc_tx_descs(const struct mbuf *m, int nsegs)
1156 {
1157 unsigned int flits;
1158
1159 if (m->m_pkthdr.len <= PIO_LEN)
1160 return 1;
1161
1162 flits = sgl_len(nsegs) + 2;
1163 if (m->m_pkthdr.csum_flags & CSUM_TSO)
1164 flits++;
1165
1166 return flits_to_desc(flits);
1167 }
1168
1169 static unsigned int
1170 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
1171 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
1172 {
1173 struct mbuf *m0;
1174 int err, pktlen, pass = 0;
1175 bus_dma_tag_t tag = txq->entry_tag;
1176
1177 retry:
1178 err = 0;
1179 m0 = *m;
1180 pktlen = m0->m_pkthdr.len;
1181 #if defined(__i386__) || defined(__amd64__)
1182 if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) {
1183 goto done;
1184 } else
1185 #endif
1186 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0);
1187
1188 if (err == 0) {
1189 goto done;
1190 }
1191 if (err == EFBIG && pass == 0) {
1192 pass = 1;
1193 /* Too many segments, try to defrag */
1194 m0 = m_defrag(m0, M_DONTWAIT);
1195 if (m0 == NULL) {
1196 m_freem(*m);
1197 *m = NULL;
1198 return (ENOBUFS);
1199 }
1200 *m = m0;
1201 goto retry;
1202 } else if (err == ENOMEM) {
1203 return (err);
1204 } if (err) {
1205 if (cxgb_debug)
1206 printf("map failure err=%d pktlen=%d\n", err, pktlen);
1207 m_freem(m0);
1208 *m = NULL;
1209 return (err);
1210 }
1211 done:
1212 #if !defined(__i386__) && !defined(__amd64__)
1213 bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE);
1214 #endif
1215 txsd->flags |= TX_SW_DESC_MAPPED;
1216
1217 return (0);
1218 }
1219
1220 /**
1221 * make_sgl - populate a scatter/gather list for a packet
1222 * @sgp: the SGL to populate
1223 * @segs: the packet dma segments
1224 * @nsegs: the number of segments
1225 *
1226 * Generates a scatter/gather list for the buffers that make up a packet
1227 * and returns the SGL size in 8-byte words. The caller must size the SGL
1228 * appropriately.
1229 */
1230 static __inline void
1231 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1232 {
1233 int i, idx;
1234
1235 for (idx = 0, i = 0; i < nsegs; i++) {
1236 /*
1237 * firmware doesn't like empty segments
1238 */
1239 if (segs[i].ds_len == 0)
1240 continue;
1241 if (i && idx == 0)
1242 ++sgp;
1243
1244 sgp->len[idx] = htobe32(segs[i].ds_len);
1245 sgp->addr[idx] = htobe64(segs[i].ds_addr);
1246 idx ^= 1;
1247 }
1248
1249 if (idx) {
1250 sgp->len[idx] = 0;
1251 sgp->addr[idx] = 0;
1252 }
1253 }
1254
1255 /**
1256 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1257 * @adap: the adapter
1258 * @q: the Tx queue
1259 *
1260 * Ring the doorbell if a Tx queue is asleep. There is a natural race,
1261 * where the HW is going to sleep just after we checked, however,
1262 * then the interrupt handler will detect the outstanding TX packet
1263 * and ring the doorbell for us.
1264 *
1265 * When GTS is disabled we unconditionally ring the doorbell.
1266 */
1267 static __inline void
1268 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
1269 {
1270 #if USE_GTS
1271 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1272 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1273 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1274 #ifdef T3_TRACE
1275 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1276 q->cntxt_id);
1277 #endif
1278 t3_write_reg(adap, A_SG_KDOORBELL,
1279 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1280 }
1281 #else
1282 if (mustring || ++q->db_pending >= 32) {
1283 wmb(); /* write descriptors before telling HW */
1284 t3_write_reg(adap, A_SG_KDOORBELL,
1285 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1286 q->db_pending = 0;
1287 }
1288 #endif
1289 }
1290
1291 static __inline void
1292 wr_gen2(struct tx_desc *d, unsigned int gen)
1293 {
1294 #if SGE_NUM_GENBITS == 2
1295 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1296 #endif
1297 }
1298
1299 /**
1300 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1301 * @ndesc: number of Tx descriptors spanned by the SGL
1302 * @txd: first Tx descriptor to be written
1303 * @txqs: txq state (generation and producer index)
1304 * @txq: the SGE Tx queue
1305 * @sgl: the SGL
1306 * @flits: number of flits to the start of the SGL in the first descriptor
1307 * @sgl_flits: the SGL size in flits
1308 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1309 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1310 *
1311 * Write a work request header and an associated SGL. If the SGL is
1312 * small enough to fit into one Tx descriptor it has already been written
1313 * and we just need to write the WR header. Otherwise we distribute the
1314 * SGL across the number of descriptors it spans.
1315 */
1316 static void
1317 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1318 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1319 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1320 {
1321
1322 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1323 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1324
1325 if (__predict_true(ndesc == 1)) {
1326 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1327 V_WR_SGLSFLT(flits)) | wr_hi,
1328 htonl(V_WR_LEN(flits + sgl_flits) |
1329 V_WR_GEN(txqs->gen)) | wr_lo);
1330 /* XXX gen? */
1331 wr_gen2(txd, txqs->gen);
1332
1333 } else {
1334 unsigned int ogen = txqs->gen;
1335 const uint64_t *fp = (const uint64_t *)sgl;
1336 struct work_request_hdr *wp = wrp;
1337
1338 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1339 V_WR_SGLSFLT(flits)) | wr_hi;
1340
1341 while (sgl_flits) {
1342 unsigned int avail = WR_FLITS - flits;
1343
1344 if (avail > sgl_flits)
1345 avail = sgl_flits;
1346 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1347 sgl_flits -= avail;
1348 ndesc--;
1349 if (!sgl_flits)
1350 break;
1351
1352 fp += avail;
1353 txd++;
1354 txsd++;
1355 if (++txqs->pidx == txq->size) {
1356 txqs->pidx = 0;
1357 txqs->gen ^= 1;
1358 txd = txq->desc;
1359 txsd = txq->sdesc;
1360 }
1361
1362 /*
1363 * when the head of the mbuf chain
1364 * is freed all clusters will be freed
1365 * with it
1366 */
1367 wrp = (struct work_request_hdr *)txd;
1368 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
1369 V_WR_SGLSFLT(1)) | wr_hi;
1370 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
1371 sgl_flits + 1)) |
1372 V_WR_GEN(txqs->gen)) | wr_lo;
1373 wr_gen2(txd, txqs->gen);
1374 flits = 1;
1375 }
1376 wrp->wrh_hi |= htonl(F_WR_EOP);
1377 wmb();
1378 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1379 wr_gen2((struct tx_desc *)wp, ogen);
1380 }
1381 }
1382
1383 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
1384 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
1385
1386 #define GET_VTAG(cntrl, m) \
1387 do { \
1388 if ((m)->m_flags & M_VLANTAG) \
1389 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1390 } while (0)
1391
1392 static int
1393 t3_encap(struct sge_qset *qs, struct mbuf **m)
1394 {
1395 adapter_t *sc;
1396 struct mbuf *m0;
1397 struct sge_txq *txq;
1398 struct txq_state txqs;
1399 struct port_info *pi;
1400 unsigned int ndesc, flits, cntrl, mlen;
1401 int err, nsegs, tso_info = 0;
1402
1403 struct work_request_hdr *wrp;
1404 struct tx_sw_desc *txsd;
1405 struct sg_ent *sgp, *sgl;
1406 uint32_t wr_hi, wr_lo, sgl_flits;
1407 bus_dma_segment_t segs[TX_MAX_SEGS];
1408
1409 struct tx_desc *txd;
1410
1411 pi = qs->port;
1412 sc = pi->adapter;
1413 txq = &qs->txq[TXQ_ETH];
1414 txd = &txq->desc[txq->pidx];
1415 txsd = &txq->sdesc[txq->pidx];
1416 sgl = txq->txq_sgl;
1417
1418 prefetch(txd);
1419 m0 = *m;
1420
1421 mtx_assert(&qs->lock, MA_OWNED);
1422 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1423 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
1424
1425 if (m0->m_nextpkt == NULL && m0->m_next != NULL &&
1426 m0->m_pkthdr.csum_flags & (CSUM_TSO))
1427 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1428
1429 if (m0->m_nextpkt != NULL) {
1430 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
1431 ndesc = 1;
1432 mlen = 0;
1433 } else {
1434 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
1435 &m0, segs, &nsegs))) {
1436 if (cxgb_debug)
1437 printf("failed ... err=%d\n", err);
1438 return (err);
1439 }
1440 mlen = m0->m_pkthdr.len;
1441 ndesc = calc_tx_descs(m0, nsegs);
1442 }
1443 txq_prod(txq, ndesc, &txqs);
1444
1445 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
1446 txsd->m = m0;
1447
1448 if (m0->m_nextpkt != NULL) {
1449 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1450 int i, fidx;
1451
1452 if (nsegs > 7)
1453 panic("trying to coalesce %d packets in to one WR", nsegs);
1454 txq->txq_coalesced += nsegs;
1455 wrp = (struct work_request_hdr *)txd;
1456 flits = nsegs*2 + 1;
1457
1458 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
1459 struct cpl_tx_pkt_batch_entry *cbe;
1460 uint64_t flit;
1461 uint32_t *hflit = (uint32_t *)&flit;
1462 int cflags = m0->m_pkthdr.csum_flags;
1463
1464 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1465 GET_VTAG(cntrl, m0);
1466 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1467 if (__predict_false(!(cflags & CSUM_IP)))
1468 cntrl |= F_TXPKT_IPCSUM_DIS;
1469 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP))))
1470 cntrl |= F_TXPKT_L4CSUM_DIS;
1471
1472 hflit[0] = htonl(cntrl);
1473 hflit[1] = htonl(segs[i].ds_len | 0x80000000);
1474 flit |= htobe64(1 << 24);
1475 cbe = &cpl_batch->pkt_entry[i];
1476 cbe->cntrl = hflit[0];
1477 cbe->len = hflit[1];
1478 cbe->addr = htobe64(segs[i].ds_addr);
1479 }
1480
1481 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1482 V_WR_SGLSFLT(flits)) |
1483 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1484 wr_lo = htonl(V_WR_LEN(flits) |
1485 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1486 set_wr_hdr(wrp, wr_hi, wr_lo);
1487 wmb();
1488 ETHER_BPF_MTAP(pi->ifp, m0);
1489 wr_gen2(txd, txqs.gen);
1490 check_ring_tx_db(sc, txq, 0);
1491 return (0);
1492 } else if (tso_info) {
1493 int eth_type;
1494 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1495 struct ether_header *eh;
1496 struct ip *ip;
1497 struct tcphdr *tcp;
1498
1499 txd->flit[2] = 0;
1500 GET_VTAG(cntrl, m0);
1501 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1502 hdr->cntrl = htonl(cntrl);
1503 hdr->len = htonl(mlen | 0x80000000);
1504
1505 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
1506 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1507 m0, mlen, m0->m_pkthdr.tso_segsz,
1508 m0->m_pkthdr.csum_flags, m0->m_flags);
1509 panic("tx tso packet too small");
1510 }
1511
1512 /* Make sure that ether, ip, tcp headers are all in m0 */
1513 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1514 m0 = m_pullup(m0, TCPPKTHDRSIZE);
1515 if (__predict_false(m0 == NULL)) {
1516 /* XXX panic probably an overreaction */
1517 panic("couldn't fit header into mbuf");
1518 }
1519 }
1520
1521 eh = mtod(m0, struct ether_header *);
1522 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1523 eth_type = CPL_ETH_II_VLAN;
1524 ip = (struct ip *)((struct ether_vlan_header *)eh + 1);
1525 } else {
1526 eth_type = CPL_ETH_II;
1527 ip = (struct ip *)(eh + 1);
1528 }
1529 tcp = (struct tcphdr *)(ip + 1);
1530
1531 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1532 V_LSO_IPHDR_WORDS(ip->ip_hl) |
1533 V_LSO_TCPHDR_WORDS(tcp->th_off);
1534 hdr->lso_info = htonl(tso_info);
1535
1536 if (__predict_false(mlen <= PIO_LEN)) {
1537 /*
1538 * pkt not undersized but fits in PIO_LEN
1539 * Indicates a TSO bug at the higher levels.
1540 */
1541 txsd->m = NULL;
1542 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1543 flits = (mlen + 7) / 8 + 3;
1544 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1545 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1546 F_WR_SOP | F_WR_EOP | txqs.compl);
1547 wr_lo = htonl(V_WR_LEN(flits) |
1548 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1549 set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
1550 wmb();
1551 ETHER_BPF_MTAP(pi->ifp, m0);
1552 wr_gen2(txd, txqs.gen);
1553 check_ring_tx_db(sc, txq, 0);
1554 m_freem(m0);
1555 return (0);
1556 }
1557 flits = 3;
1558 } else {
1559 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1560
1561 GET_VTAG(cntrl, m0);
1562 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1563 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1564 cntrl |= F_TXPKT_IPCSUM_DIS;
1565 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1566 cntrl |= F_TXPKT_L4CSUM_DIS;
1567 cpl->cntrl = htonl(cntrl);
1568 cpl->len = htonl(mlen | 0x80000000);
1569
1570 if (mlen <= PIO_LEN) {
1571 txsd->m = NULL;
1572 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1573 flits = (mlen + 7) / 8 + 2;
1574
1575 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1576 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1577 F_WR_SOP | F_WR_EOP | txqs.compl);
1578 wr_lo = htonl(V_WR_LEN(flits) |
1579 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1580 set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
1581 wmb();
1582 ETHER_BPF_MTAP(pi->ifp, m0);
1583 wr_gen2(txd, txqs.gen);
1584 check_ring_tx_db(sc, txq, 0);
1585 m_freem(m0);
1586 return (0);
1587 }
1588 flits = 2;
1589 }
1590 wrp = (struct work_request_hdr *)txd;
1591 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1592 make_sgl(sgp, segs, nsegs);
1593
1594 sgl_flits = sgl_len(nsegs);
1595
1596 ETHER_BPF_MTAP(pi->ifp, m0);
1597
1598 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
1599 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1600 wr_lo = htonl(V_WR_TID(txq->token));
1601 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
1602 sgl_flits, wr_hi, wr_lo);
1603 check_ring_tx_db(sc, txq, 0);
1604
1605 return (0);
1606 }
1607
1608 void
1609 cxgb_tx_watchdog(void *arg)
1610 {
1611 struct sge_qset *qs = arg;
1612 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1613
1614 if (qs->coalescing != 0 &&
1615 (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
1616 TXQ_RING_EMPTY(qs))
1617 qs->coalescing = 0;
1618 else if (qs->coalescing == 0 &&
1619 (txq->in_use >= cxgb_tx_coalesce_enable_start))
1620 qs->coalescing = 1;
1621 if (TXQ_TRYLOCK(qs)) {
1622 qs->qs_flags |= QS_FLUSHING;
1623 cxgb_start_locked(qs);
1624 qs->qs_flags &= ~QS_FLUSHING;
1625 TXQ_UNLOCK(qs);
1626 }
1627 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
1628 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
1629 qs, txq->txq_watchdog.c_cpu);
1630 }
1631
1632 static void
1633 cxgb_tx_timeout(void *arg)
1634 {
1635 struct sge_qset *qs = arg;
1636 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1637
1638 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
1639 qs->coalescing = 1;
1640 if (TXQ_TRYLOCK(qs)) {
1641 qs->qs_flags |= QS_TIMEOUT;
1642 cxgb_start_locked(qs);
1643 qs->qs_flags &= ~QS_TIMEOUT;
1644 TXQ_UNLOCK(qs);
1645 }
1646 }
1647
1648 static void
1649 cxgb_start_locked(struct sge_qset *qs)
1650 {
1651 struct mbuf *m_head = NULL;
1652 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1653 struct port_info *pi = qs->port;
1654 struct ifnet *ifp = pi->ifp;
1655
1656 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
1657 reclaim_completed_tx(qs, 0, TXQ_ETH);
1658
1659 if (!pi->link_config.link_ok) {
1660 TXQ_RING_FLUSH(qs);
1661 return;
1662 }
1663 TXQ_LOCK_ASSERT(qs);
1664 while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
1665 pi->link_config.link_ok) {
1666 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1667
1668 if (txq->size - txq->in_use <= TX_MAX_DESC)
1669 break;
1670
1671 if ((m_head = cxgb_dequeue(qs)) == NULL)
1672 break;
1673 /*
1674 * Encapsulation can modify our pointer, and or make it
1675 * NULL on failure. In that event, we can't requeue.
1676 */
1677 if (t3_encap(qs, &m_head) || m_head == NULL)
1678 break;
1679
1680 m_head = NULL;
1681 }
1682
1683 if (txq->db_pending)
1684 check_ring_tx_db(pi->adapter, txq, 1);
1685
1686 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
1687 pi->link_config.link_ok)
1688 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1689 qs, txq->txq_timer.c_cpu);
1690 if (m_head != NULL)
1691 m_freem(m_head);
1692 }
1693
1694 static int
1695 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
1696 {
1697 struct port_info *pi = qs->port;
1698 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1699 struct buf_ring *br = txq->txq_mr;
1700 int error, avail;
1701
1702 avail = txq->size - txq->in_use;
1703 TXQ_LOCK_ASSERT(qs);
1704
1705 /*
1706 * We can only do a direct transmit if the following are true:
1707 * - we aren't coalescing (ring < 3/4 full)
1708 * - the link is up -- checked in caller
1709 * - there are no packets enqueued already
1710 * - there is space in hardware transmit queue
1711 */
1712 if (check_pkt_coalesce(qs) == 0 &&
1713 !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
1714 if (t3_encap(qs, &m)) {
1715 if (m != NULL &&
1716 (error = drbr_enqueue(ifp, br, m)) != 0)
1717 return (error);
1718 } else {
1719 if (txq->db_pending)
1720 check_ring_tx_db(pi->adapter, txq, 1);
1721
1722 /*
1723 * We've bypassed the buf ring so we need to update
1724 * the stats directly
1725 */
1726 txq->txq_direct_packets++;
1727 txq->txq_direct_bytes += m->m_pkthdr.len;
1728 }
1729 } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
1730 return (error);
1731
1732 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1733 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
1734 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
1735 cxgb_start_locked(qs);
1736 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
1737 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1738 qs, txq->txq_timer.c_cpu);
1739 return (0);
1740 }
1741
1742 int
1743 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
1744 {
1745 struct sge_qset *qs;
1746 struct port_info *pi = ifp->if_softc;
1747 int error, qidx = pi->first_qset;
1748
1749 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
1750 ||(!pi->link_config.link_ok)) {
1751 m_freem(m);
1752 return (0);
1753 }
1754
1755 if (m->m_flags & M_FLOWID)
1756 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
1757
1758 qs = &pi->adapter->sge.qs[qidx];
1759
1760 if (TXQ_TRYLOCK(qs)) {
1761 /* XXX running */
1762 error = cxgb_transmit_locked(ifp, qs, m);
1763 TXQ_UNLOCK(qs);
1764 } else
1765 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
1766 return (error);
1767 }
1768 void
1769 cxgb_start(struct ifnet *ifp)
1770 {
1771 struct port_info *pi = ifp->if_softc;
1772 struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset];
1773
1774 if (!pi->link_config.link_ok)
1775 return;
1776
1777 TXQ_LOCK(qs);
1778 cxgb_start_locked(qs);
1779 TXQ_UNLOCK(qs);
1780 }
1781
1782 void
1783 cxgb_qflush(struct ifnet *ifp)
1784 {
1785 /*
1786 * flush any enqueued mbufs in the buf_rings
1787 * and in the transmit queues
1788 * no-op for now
1789 */
1790 return;
1791 }
1792
1793 /**
1794 * write_imm - write a packet into a Tx descriptor as immediate data
1795 * @d: the Tx descriptor to write
1796 * @m: the packet
1797 * @len: the length of packet data to write as immediate data
1798 * @gen: the generation bit value to write
1799 *
1800 * Writes a packet as immediate data into a Tx descriptor. The packet
1801 * contains a work request at its beginning. We must write the packet
1802 * carefully so the SGE doesn't read accidentally before it's written in
1803 * its entirety.
1804 */
1805 static __inline void
1806 write_imm(struct tx_desc *d, struct mbuf *m,
1807 unsigned int len, unsigned int gen)
1808 {
1809 struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1810 struct work_request_hdr *to = (struct work_request_hdr *)d;
1811 uint32_t wr_hi, wr_lo;
1812
1813 if (len > WR_LEN)
1814 panic("len too big %d\n", len);
1815 if (len < sizeof(*from))
1816 panic("len too small %d", len);
1817
1818 memcpy(&to[1], &from[1], len - sizeof(*from));
1819 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
1820 V_WR_BCNTLFLT(len & 7));
1821 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) |
1822 V_WR_LEN((len + 7) / 8));
1823 set_wr_hdr(to, wr_hi, wr_lo);
1824 wmb();
1825 wr_gen2(d, gen);
1826
1827 /*
1828 * This check is a hack we should really fix the logic so
1829 * that this can't happen
1830 */
1831 if (m->m_type != MT_DONTFREE)
1832 m_freem(m);
1833
1834 }
1835
1836 /**
1837 * check_desc_avail - check descriptor availability on a send queue
1838 * @adap: the adapter
1839 * @q: the TX queue
1840 * @m: the packet needing the descriptors
1841 * @ndesc: the number of Tx descriptors needed
1842 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1843 *
1844 * Checks if the requested number of Tx descriptors is available on an
1845 * SGE send queue. If the queue is already suspended or not enough
1846 * descriptors are available the packet is queued for later transmission.
1847 * Must be called with the Tx queue locked.
1848 *
1849 * Returns 0 if enough descriptors are available, 1 if there aren't
1850 * enough descriptors and the packet has been queued, and 2 if the caller
1851 * needs to retry because there weren't enough descriptors at the
1852 * beginning of the call but some freed up in the mean time.
1853 */
1854 static __inline int
1855 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1856 struct mbuf *m, unsigned int ndesc,
1857 unsigned int qid)
1858 {
1859 /*
1860 * XXX We currently only use this for checking the control queue
1861 * the control queue is only used for binding qsets which happens
1862 * at init time so we are guaranteed enough descriptors
1863 */
1864 if (__predict_false(!mbufq_empty(&q->sendq))) {
1865 addq_exit: mbufq_tail(&q->sendq, m);
1866 return 1;
1867 }
1868 if (__predict_false(q->size - q->in_use < ndesc)) {
1869
1870 struct sge_qset *qs = txq_to_qset(q, qid);
1871
1872 setbit(&qs->txq_stopped, qid);
1873 if (should_restart_tx(q) &&
1874 test_and_clear_bit(qid, &qs->txq_stopped))
1875 return 2;
1876
1877 q->stops++;
1878 goto addq_exit;
1879 }
1880 return 0;
1881 }
1882
1883
1884 /**
1885 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1886 * @q: the SGE control Tx queue
1887 *
1888 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1889 * that send only immediate data (presently just the control queues) and
1890 * thus do not have any mbufs
1891 */
1892 static __inline void
1893 reclaim_completed_tx_imm(struct sge_txq *q)
1894 {
1895 unsigned int reclaim = q->processed - q->cleaned;
1896
1897 q->in_use -= reclaim;
1898 q->cleaned += reclaim;
1899 }
1900
1901 static __inline int
1902 immediate(const struct mbuf *m)
1903 {
1904 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
1905 }
1906
1907 /**
1908 * ctrl_xmit - send a packet through an SGE control Tx queue
1909 * @adap: the adapter
1910 * @q: the control queue
1911 * @m: the packet
1912 *
1913 * Send a packet through an SGE control Tx queue. Packets sent through
1914 * a control queue must fit entirely as immediate data in a single Tx
1915 * descriptor and have no page fragments.
1916 */
1917 static int
1918 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
1919 {
1920 int ret;
1921 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1922 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1923
1924 if (__predict_false(!immediate(m))) {
1925 m_freem(m);
1926 return 0;
1927 }
1928
1929 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
1930 wrp->wrh_lo = htonl(V_WR_TID(q->token));
1931
1932 TXQ_LOCK(qs);
1933 again: reclaim_completed_tx_imm(q);
1934
1935 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1936 if (__predict_false(ret)) {
1937 if (ret == 1) {
1938 TXQ_UNLOCK(qs);
1939 return (ENOSPC);
1940 }
1941 goto again;
1942 }
1943 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1944
1945 q->in_use++;
1946 if (++q->pidx >= q->size) {
1947 q->pidx = 0;
1948 q->gen ^= 1;
1949 }
1950 TXQ_UNLOCK(qs);
1951 wmb();
1952 t3_write_reg(adap, A_SG_KDOORBELL,
1953 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1954 return (0);
1955 }
1956
1957
1958 /**
1959 * restart_ctrlq - restart a suspended control queue
1960 * @qs: the queue set cotaining the control queue
1961 *
1962 * Resumes transmission on a suspended Tx control queue.
1963 */
1964 static void
1965 restart_ctrlq(void *data, int npending)
1966 {
1967 struct mbuf *m;
1968 struct sge_qset *qs = (struct sge_qset *)data;
1969 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1970 adapter_t *adap = qs->port->adapter;
1971
1972 TXQ_LOCK(qs);
1973 again: reclaim_completed_tx_imm(q);
1974
1975 while (q->in_use < q->size &&
1976 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1977
1978 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1979
1980 if (++q->pidx >= q->size) {
1981 q->pidx = 0;
1982 q->gen ^= 1;
1983 }
1984 q->in_use++;
1985 }
1986 if (!mbufq_empty(&q->sendq)) {
1987 setbit(&qs->txq_stopped, TXQ_CTRL);
1988
1989 if (should_restart_tx(q) &&
1990 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1991 goto again;
1992 q->stops++;
1993 }
1994 TXQ_UNLOCK(qs);
1995 t3_write_reg(adap, A_SG_KDOORBELL,
1996 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1997 }
1998
1999
2000 /*
2001 * Send a management message through control queue 0
2002 */
2003 int
2004 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
2005 {
2006 return ctrl_xmit(adap, &adap->sge.qs[0], m);
2007 }
2008
2009 /**
2010 * free_qset - free the resources of an SGE queue set
2011 * @sc: the controller owning the queue set
2012 * @q: the queue set
2013 *
2014 * Release the HW and SW resources associated with an SGE queue set, such
2015 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
2016 * queue set must be quiesced prior to calling this.
2017 */
2018 static void
2019 t3_free_qset(adapter_t *sc, struct sge_qset *q)
2020 {
2021 int i;
2022
2023 reclaim_completed_tx(q, 0, TXQ_ETH);
2024 if (q->txq[TXQ_ETH].txq_mr != NULL)
2025 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
2026 if (q->txq[TXQ_ETH].txq_ifq != NULL) {
2027 ifq_delete(q->txq[TXQ_ETH].txq_ifq);
2028 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
2029 }
2030
2031 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2032 if (q->fl[i].desc) {
2033 mtx_lock_spin(&sc->sge.reg_lock);
2034 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
2035 mtx_unlock_spin(&sc->sge.reg_lock);
2036 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
2037 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
2038 q->fl[i].desc_map);
2039 bus_dma_tag_destroy(q->fl[i].desc_tag);
2040 bus_dma_tag_destroy(q->fl[i].entry_tag);
2041 }
2042 if (q->fl[i].sdesc) {
2043 free_rx_bufs(sc, &q->fl[i]);
2044 free(q->fl[i].sdesc, M_DEVBUF);
2045 }
2046 }
2047
2048 mtx_unlock(&q->lock);
2049 MTX_DESTROY(&q->lock);
2050 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2051 if (q->txq[i].desc) {
2052 mtx_lock_spin(&sc->sge.reg_lock);
2053 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
2054 mtx_unlock_spin(&sc->sge.reg_lock);
2055 bus_dmamap_unload(q->txq[i].desc_tag,
2056 q->txq[i].desc_map);
2057 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
2058 q->txq[i].desc_map);
2059 bus_dma_tag_destroy(q->txq[i].desc_tag);
2060 bus_dma_tag_destroy(q->txq[i].entry_tag);
2061 }
2062 if (q->txq[i].sdesc) {
2063 free(q->txq[i].sdesc, M_DEVBUF);
2064 }
2065 }
2066
2067 if (q->rspq.desc) {
2068 mtx_lock_spin(&sc->sge.reg_lock);
2069 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
2070 mtx_unlock_spin(&sc->sge.reg_lock);
2071
2072 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
2073 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
2074 q->rspq.desc_map);
2075 bus_dma_tag_destroy(q->rspq.desc_tag);
2076 MTX_DESTROY(&q->rspq.lock);
2077 }
2078
2079 #ifdef INET
2080 tcp_lro_free(&q->lro.ctrl);
2081 #endif
2082
2083 bzero(q, sizeof(*q));
2084 }
2085
2086 /**
2087 * t3_free_sge_resources - free SGE resources
2088 * @sc: the adapter softc
2089 *
2090 * Frees resources used by the SGE queue sets.
2091 */
2092 void
2093 t3_free_sge_resources(adapter_t *sc)
2094 {
2095 int i, nqsets;
2096
2097 for (nqsets = i = 0; i < (sc)->params.nports; i++)
2098 nqsets += sc->port[i].nqsets;
2099
2100 for (i = 0; i < nqsets; ++i) {
2101 TXQ_LOCK(&sc->sge.qs[i]);
2102 t3_free_qset(sc, &sc->sge.qs[i]);
2103 }
2104
2105 }
2106
2107 /**
2108 * t3_sge_start - enable SGE
2109 * @sc: the controller softc
2110 *
2111 * Enables the SGE for DMAs. This is the last step in starting packet
2112 * transfers.
2113 */
2114 void
2115 t3_sge_start(adapter_t *sc)
2116 {
2117 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2118 }
2119
2120 /**
2121 * t3_sge_stop - disable SGE operation
2122 * @sc: the adapter
2123 *
2124 * Disables the DMA engine. This can be called in emeregencies (e.g.,
2125 * from error interrupts) or from normal process context. In the latter
2126 * case it also disables any pending queue restart tasklets. Note that
2127 * if it is called in interrupt context it cannot disable the restart
2128 * tasklets as it cannot wait, however the tasklets will have no effect
2129 * since the doorbells are disabled and the driver will call this again
2130 * later from process context, at which time the tasklets will be stopped
2131 * if they are still running.
2132 */
2133 void
2134 t3_sge_stop(adapter_t *sc)
2135 {
2136 int i, nqsets;
2137
2138 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
2139
2140 if (sc->tq == NULL)
2141 return;
2142
2143 for (nqsets = i = 0; i < (sc)->params.nports; i++)
2144 nqsets += sc->port[i].nqsets;
2145 #ifdef notyet
2146 /*
2147 *
2148 * XXX
2149 */
2150 for (i = 0; i < nqsets; ++i) {
2151 struct sge_qset *qs = &sc->sge.qs[i];
2152
2153 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2154 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2155 }
2156 #endif
2157 }
2158
2159 /**
2160 * t3_free_tx_desc - reclaims Tx descriptors and their buffers
2161 * @adapter: the adapter
2162 * @q: the Tx queue to reclaim descriptors from
2163 * @reclaimable: the number of descriptors to reclaim
2164 * @m_vec_size: maximum number of buffers to reclaim
2165 * @desc_reclaimed: returns the number of descriptors reclaimed
2166 *
2167 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
2168 * Tx buffers. Called with the Tx queue lock held.
2169 *
2170 * Returns number of buffers of reclaimed
2171 */
2172 void
2173 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
2174 {
2175 struct tx_sw_desc *txsd;
2176 unsigned int cidx, mask;
2177 struct sge_txq *q = &qs->txq[queue];
2178
2179 #ifdef T3_TRACE
2180 T3_TRACE2(sc->tb[q->cntxt_id & 7],
2181 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
2182 #endif
2183 cidx = q->cidx;
2184 mask = q->size - 1;
2185 txsd = &q->sdesc[cidx];
2186
2187 mtx_assert(&qs->lock, MA_OWNED);
2188 while (reclaimable--) {
2189 prefetch(q->sdesc[(cidx + 1) & mask].m);
2190 prefetch(q->sdesc[(cidx + 2) & mask].m);
2191
2192 if (txsd->m != NULL) {
2193 if (txsd->flags & TX_SW_DESC_MAPPED) {
2194 bus_dmamap_unload(q->entry_tag, txsd->map);
2195 txsd->flags &= ~TX_SW_DESC_MAPPED;
2196 }
2197 m_freem_list(txsd->m);
2198 txsd->m = NULL;
2199 } else
2200 q->txq_skipped++;
2201
2202 ++txsd;
2203 if (++cidx == q->size) {
2204 cidx = 0;
2205 txsd = q->sdesc;
2206 }
2207 }
2208 q->cidx = cidx;
2209
2210 }
2211
2212 /**
2213 * is_new_response - check if a response is newly written
2214 * @r: the response descriptor
2215 * @q: the response queue
2216 *
2217 * Returns true if a response descriptor contains a yet unprocessed
2218 * response.
2219 */
2220 static __inline int
2221 is_new_response(const struct rsp_desc *r,
2222 const struct sge_rspq *q)
2223 {
2224 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2225 }
2226
2227 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2228 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2229 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2230 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2231 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2232
2233 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2234 #define NOMEM_INTR_DELAY 2500
2235
2236 /**
2237 * write_ofld_wr - write an offload work request
2238 * @adap: the adapter
2239 * @m: the packet to send
2240 * @q: the Tx queue
2241 * @pidx: index of the first Tx descriptor to write
2242 * @gen: the generation value to use
2243 * @ndesc: number of descriptors the packet will occupy
2244 *
2245 * Write an offload work request to send the supplied packet. The packet
2246 * data already carry the work request with most fields populated.
2247 */
2248 static void
2249 write_ofld_wr(adapter_t *adap, struct mbuf *m,
2250 struct sge_txq *q, unsigned int pidx,
2251 unsigned int gen, unsigned int ndesc,
2252 bus_dma_segment_t *segs, unsigned int nsegs)
2253 {
2254 unsigned int sgl_flits, flits;
2255 struct work_request_hdr *from;
2256 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
2257 struct tx_desc *d = &q->desc[pidx];
2258 struct txq_state txqs;
2259
2260 if (immediate(m) && nsegs == 0) {
2261 write_imm(d, m, m->m_len, gen);
2262 return;
2263 }
2264
2265 /* Only TX_DATA builds SGLs */
2266 from = mtod(m, struct work_request_hdr *);
2267 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
2268
2269 flits = m->m_len / 8;
2270 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
2271
2272 make_sgl(sgp, segs, nsegs);
2273 sgl_flits = sgl_len(nsegs);
2274
2275 txqs.gen = gen;
2276 txqs.pidx = pidx;
2277 txqs.compl = 0;
2278
2279 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
2280 from->wrh_hi, from->wrh_lo);
2281 }
2282
2283 /**
2284 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
2285 * @m: the packet
2286 *
2287 * Returns the number of Tx descriptors needed for the given offload
2288 * packet. These packets are already fully constructed.
2289 */
2290 static __inline unsigned int
2291 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
2292 {
2293 unsigned int flits, cnt = 0;
2294 int ndescs;
2295
2296 if (m->m_len <= WR_LEN && nsegs == 0)
2297 return (1); /* packet fits as immediate data */
2298
2299 /*
2300 * This needs to be re-visited for TOE
2301 */
2302
2303 cnt = nsegs;
2304
2305 /* headers */
2306 flits = m->m_len / 8;
2307
2308 ndescs = flits_to_desc(flits + sgl_len(cnt));
2309
2310 return (ndescs);
2311 }
2312
2313 /**
2314 * ofld_xmit - send a packet through an offload queue
2315 * @adap: the adapter
2316 * @q: the Tx offload queue
2317 * @m: the packet
2318 *
2319 * Send an offload packet through an SGE offload queue.
2320 */
2321 static int
2322 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
2323 {
2324 int ret, nsegs;
2325 unsigned int ndesc;
2326 unsigned int pidx, gen;
2327 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2328 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
2329 struct tx_sw_desc *stx;
2330
2331 nsegs = m_get_sgllen(m);
2332 vsegs = m_get_sgl(m);
2333 ndesc = calc_tx_descs_ofld(m, nsegs);
2334 busdma_map_sgl(vsegs, segs, nsegs);
2335
2336 stx = &q->sdesc[q->pidx];
2337
2338 TXQ_LOCK(qs);
2339 again: reclaim_completed_tx(qs, 16, TXQ_OFLD);
2340 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2341 if (__predict_false(ret)) {
2342 if (ret == 1) {
2343 printf("no ofld desc avail\n");
2344
2345 m_set_priority(m, ndesc); /* save for restart */
2346 TXQ_UNLOCK(qs);
2347 return (EINTR);
2348 }
2349 goto again;
2350 }
2351
2352 gen = q->gen;
2353 q->in_use += ndesc;
2354 pidx = q->pidx;
2355 q->pidx += ndesc;
2356 if (q->pidx >= q->size) {
2357 q->pidx -= q->size;
2358 q->gen ^= 1;
2359 }
2360 #ifdef T3_TRACE
2361 T3_TRACE5(adap->tb[q->cntxt_id & 7],
2362 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
2363 ndesc, pidx, skb->len, skb->len - skb->data_len,
2364 skb_shinfo(skb)->nr_frags);
2365 #endif
2366 TXQ_UNLOCK(qs);
2367
2368 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2369 check_ring_tx_db(adap, q, 1);
2370 return (0);
2371 }
2372
2373 /**
2374 * restart_offloadq - restart a suspended offload queue
2375 * @qs: the queue set cotaining the offload queue
2376 *
2377 * Resumes transmission on a suspended Tx offload queue.
2378 */
2379 static void
2380 restart_offloadq(void *data, int npending)
2381 {
2382 struct mbuf *m;
2383 struct sge_qset *qs = data;
2384 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2385 adapter_t *adap = qs->port->adapter;
2386 bus_dma_segment_t segs[TX_MAX_SEGS];
2387 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
2388 int nsegs, cleaned;
2389
2390 TXQ_LOCK(qs);
2391 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
2392
2393 while ((m = mbufq_peek(&q->sendq)) != NULL) {
2394 unsigned int gen, pidx;
2395 unsigned int ndesc = m_get_priority(m);
2396
2397 if (__predict_false(q->size - q->in_use < ndesc)) {
2398 setbit(&qs->txq_stopped, TXQ_OFLD);
2399 if (should_restart_tx(q) &&
2400 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2401 goto again;
2402 q->stops++;
2403 break;
2404 }
2405
2406 gen = q->gen;
2407 q->in_use += ndesc;
2408 pidx = q->pidx;
2409 q->pidx += ndesc;
2410 if (q->pidx >= q->size) {
2411 q->pidx -= q->size;
2412 q->gen ^= 1;
2413 }
2414
2415 (void)mbufq_dequeue(&q->sendq);
2416 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
2417 TXQ_UNLOCK(qs);
2418 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2419 TXQ_LOCK(qs);
2420 }
2421 #if USE_GTS
2422 set_bit(TXQ_RUNNING, &q->flags);
2423 set_bit(TXQ_LAST_PKT_DB, &q->flags);
2424 #endif
2425 TXQ_UNLOCK(qs);
2426 wmb();
2427 t3_write_reg(adap, A_SG_KDOORBELL,
2428 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2429 }
2430
2431 /**
2432 * queue_set - return the queue set a packet should use
2433 * @m: the packet
2434 *
2435 * Maps a packet to the SGE queue set it should use. The desired queue
2436 * set is carried in bits 1-3 in the packet's priority.
2437 */
2438 static __inline int
2439 queue_set(const struct mbuf *m)
2440 {
2441 return m_get_priority(m) >> 1;
2442 }
2443
2444 /**
2445 * is_ctrl_pkt - return whether an offload packet is a control packet
2446 * @m: the packet
2447 *
2448 * Determines whether an offload packet should use an OFLD or a CTRL
2449 * Tx queue. This is indicated by bit 0 in the packet's priority.
2450 */
2451 static __inline int
2452 is_ctrl_pkt(const struct mbuf *m)
2453 {
2454 return m_get_priority(m) & 1;
2455 }
2456
2457 /**
2458 * t3_offload_tx - send an offload packet
2459 * @tdev: the offload device to send to
2460 * @m: the packet
2461 *
2462 * Sends an offload packet. We use the packet priority to select the
2463 * appropriate Tx queue as follows: bit 0 indicates whether the packet
2464 * should be sent as regular or control, bits 1-3 select the queue set.
2465 */
2466 int
2467 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
2468 {
2469 adapter_t *adap = tdev2adap(tdev);
2470 struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2471
2472 if (__predict_false(is_ctrl_pkt(m)))
2473 return ctrl_xmit(adap, qs, m);
2474
2475 return ofld_xmit(adap, qs, m);
2476 }
2477
2478 /**
2479 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2480 * @tdev: the offload device that will be receiving the packets
2481 * @q: the SGE response queue that assembled the bundle
2482 * @m: the partial bundle
2483 * @n: the number of packets in the bundle
2484 *
2485 * Delivers a (partial) bundle of Rx offload packets to an offload device.
2486 */
2487 static __inline void
2488 deliver_partial_bundle(struct t3cdev *tdev,
2489 struct sge_rspq *q,
2490 struct mbuf *mbufs[], int n)
2491 {
2492 if (n) {
2493 q->offload_bundles++;
2494 cxgb_ofld_recv(tdev, mbufs, n);
2495 }
2496 }
2497
2498 static __inline int
2499 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
2500 struct mbuf *m, struct mbuf *rx_gather[],
2501 unsigned int gather_idx)
2502 {
2503
2504 rq->offload_pkts++;
2505 m->m_pkthdr.header = mtod(m, void *);
2506 rx_gather[gather_idx++] = m;
2507 if (gather_idx == RX_BUNDLE_SIZE) {
2508 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2509 gather_idx = 0;
2510 rq->offload_bundles++;
2511 }
2512 return (gather_idx);
2513 }
2514
2515 static void
2516 restart_tx(struct sge_qset *qs)
2517 {
2518 struct adapter *sc = qs->port->adapter;
2519
2520
2521 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2522 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2523 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2524 qs->txq[TXQ_OFLD].restarts++;
2525 DPRINTF("restarting TXQ_OFLD\n");
2526 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2527 }
2528 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
2529 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
2530 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
2531 qs->txq[TXQ_CTRL].in_use);
2532
2533 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2534 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2535 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2536 qs->txq[TXQ_CTRL].restarts++;
2537 DPRINTF("restarting TXQ_CTRL\n");
2538 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2539 }
2540 }
2541
2542 /**
2543 * t3_sge_alloc_qset - initialize an SGE queue set
2544 * @sc: the controller softc
2545 * @id: the queue set id
2546 * @nports: how many Ethernet ports will be using this queue set
2547 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2548 * @p: configuration parameters for this queue set
2549 * @ntxq: number of Tx queues for the queue set
2550 * @pi: port info for queue set
2551 *
2552 * Allocate resources and initialize an SGE queue set. A queue set
2553 * comprises a response queue, two Rx free-buffer queues, and up to 3
2554 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2555 * queue, offload queue, and control queue.
2556 */
2557 int
2558 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2559 const struct qset_params *p, int ntxq, struct port_info *pi)
2560 {
2561 struct sge_qset *q = &sc->sge.qs[id];
2562 int i, ret = 0;
2563
2564 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
2565 q->port = pi;
2566
2567 if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
2568 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
2569 device_printf(sc->dev, "failed to allocate mbuf ring\n");
2570 goto err;
2571 }
2572 if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
2573 M_NOWAIT | M_ZERO)) == NULL) {
2574 device_printf(sc->dev, "failed to allocate ifq\n");
2575 goto err;
2576 }
2577 ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);
2578 callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
2579 callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
2580 q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
2581 q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
2582
2583 init_qset_cntxt(q, id);
2584 q->idx = id;
2585 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2586 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2587 &q->fl[0].desc, &q->fl[0].sdesc,
2588 &q->fl[0].desc_tag, &q->fl[0].desc_map,
2589 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2590 printf("error %d from alloc ring fl0\n", ret);
2591 goto err;
2592 }
2593
2594 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2595 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2596 &q->fl[1].desc, &q->fl[1].sdesc,
2597 &q->fl[1].desc_tag, &q->fl[1].desc_map,
2598 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2599 printf("error %d from alloc ring fl1\n", ret);
2600 goto err;
2601 }
2602
2603 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2604 &q->rspq.phys_addr, &q->rspq.desc, NULL,
2605 &q->rspq.desc_tag, &q->rspq.desc_map,
2606 NULL, NULL)) != 0) {
2607 printf("error %d from alloc ring rspq\n", ret);
2608 goto err;
2609 }
2610
2611 for (i = 0; i < ntxq; ++i) {
2612 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2613
2614 if ((ret = alloc_ring(sc, p->txq_size[i],
2615 sizeof(struct tx_desc), sz,
2616 &q->txq[i].phys_addr, &q->txq[i].desc,
2617 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2618 &q->txq[i].desc_map,
2619 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2620 printf("error %d from alloc ring tx %i\n", ret, i);
2621 goto err;
2622 }
2623 mbufq_init(&q->txq[i].sendq);
2624 q->txq[i].gen = 1;
2625 q->txq[i].size = p->txq_size[i];
2626 }
2627
2628 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2629 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2630 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2631 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2632
2633 q->fl[0].gen = q->fl[1].gen = 1;
2634 q->fl[0].size = p->fl_size;
2635 q->fl[1].size = p->jumbo_size;
2636
2637 q->rspq.gen = 1;
2638 q->rspq.cidx = 0;
2639 q->rspq.size = p->rspq_size;
2640
2641 q->txq[TXQ_ETH].stop_thres = nports *
2642 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2643
2644 q->fl[0].buf_size = MCLBYTES;
2645 q->fl[0].zone = zone_pack;
2646 q->fl[0].type = EXT_PACKET;
2647
2648 if (p->jumbo_buf_size == MJUM16BYTES) {
2649 q->fl[1].zone = zone_jumbo16;
2650 q->fl[1].type = EXT_JUMBO16;
2651 } else if (p->jumbo_buf_size == MJUM9BYTES) {
2652 q->fl[1].zone = zone_jumbo9;
2653 q->fl[1].type = EXT_JUMBO9;
2654 } else if (p->jumbo_buf_size == MJUMPAGESIZE) {
2655 q->fl[1].zone = zone_jumbop;
2656 q->fl[1].type = EXT_JUMBOP;
2657 } else {
2658 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
2659 ret = EDOOFUS;
2660 goto err;
2661 }
2662 q->fl[1].buf_size = p->jumbo_buf_size;
2663
2664 /* Allocate and setup the lro_ctrl structure */
2665 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
2666 #ifdef INET
2667 ret = tcp_lro_init(&q->lro.ctrl);
2668 if (ret) {
2669 printf("error %d from tcp_lro_init\n", ret);
2670 goto err;
2671 }
2672 #endif
2673 q->lro.ctrl.ifp = pi->ifp;
2674
2675 mtx_lock_spin(&sc->sge.reg_lock);
2676 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2677 q->rspq.phys_addr, q->rspq.size,
2678 q->fl[0].buf_size, 1, 0);
2679 if (ret) {
2680 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2681 goto err_unlock;
2682 }
2683
2684 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2685 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2686 q->fl[i].phys_addr, q->fl[i].size,
2687 q->fl[i].buf_size, p->cong_thres, 1,
2688 0);
2689 if (ret) {
2690 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2691 goto err_unlock;
2692 }
2693 }
2694
2695 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2696 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2697 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2698 1, 0);
2699 if (ret) {
2700 printf("error %d from t3_sge_init_ecntxt\n", ret);
2701 goto err_unlock;
2702 }
2703
2704 if (ntxq > 1) {
2705 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2706 USE_GTS, SGE_CNTXT_OFLD, id,
2707 q->txq[TXQ_OFLD].phys_addr,
2708 q->txq[TXQ_OFLD].size, 0, 1, 0);
2709 if (ret) {
2710 printf("error %d from t3_sge_init_ecntxt\n", ret);
2711 goto err_unlock;
2712 }
2713 }
2714
2715 if (ntxq > 2) {
2716 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2717 SGE_CNTXT_CTRL, id,
2718 q->txq[TXQ_CTRL].phys_addr,
2719 q->txq[TXQ_CTRL].size,
2720 q->txq[TXQ_CTRL].token, 1, 0);
2721 if (ret) {
2722 printf("error %d from t3_sge_init_ecntxt\n", ret);
2723 goto err_unlock;
2724 }
2725 }
2726
2727 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2728 device_get_unit(sc->dev), irq_vec_idx);
2729 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2730
2731 mtx_unlock_spin(&sc->sge.reg_lock);
2732 t3_update_qset_coalesce(q, p);
2733 q->port = pi;
2734
2735 refill_fl(sc, &q->fl[0], q->fl[0].size);
2736 refill_fl(sc, &q->fl[1], q->fl[1].size);
2737 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2738
2739 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2740 V_NEWTIMER(q->rspq.holdoff_tmr));
2741
2742 return (0);
2743
2744 err_unlock:
2745 mtx_unlock_spin(&sc->sge.reg_lock);
2746 err:
2747 TXQ_LOCK(q);
2748 t3_free_qset(sc, q);
2749
2750 return (ret);
2751 }
2752
2753 /*
2754 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2755 * ethernet data. Hardware assistance with various checksums and any vlan tag
2756 * will also be taken into account here.
2757 */
2758 void
2759 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2760 {
2761 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2762 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2763 struct ifnet *ifp = pi->ifp;
2764
2765 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2766
2767 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2768 cpl->csum_valid && cpl->csum == 0xffff) {
2769 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2770 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2771 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2772 m->m_pkthdr.csum_data = 0xffff;
2773 }
2774
2775 if (cpl->vlan_valid) {
2776 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2777 m->m_flags |= M_VLANTAG;
2778 }
2779
2780 m->m_pkthdr.rcvif = ifp;
2781 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2782 /*
2783 * adjust after conversion to mbuf chain
2784 */
2785 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2786 m->m_len -= (sizeof(*cpl) + ethpad);
2787 m->m_data += (sizeof(*cpl) + ethpad);
2788 }
2789
2790 /**
2791 * get_packet - return the next ingress packet buffer from a free list
2792 * @adap: the adapter that received the packet
2793 * @drop_thres: # of remaining buffers before we start dropping packets
2794 * @qs: the qset that the SGE free list holding the packet belongs to
2795 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2796 * @r: response descriptor
2797 *
2798 * Get the next packet from a free list and complete setup of the
2799 * sk_buff. If the packet is small we make a copy and recycle the
2800 * original buffer, otherwise we use the original buffer itself. If a
2801 * positive drop threshold is supplied packets are dropped and their
2802 * buffers recycled if (a) the number of remaining buffers is under the
2803 * threshold and the packet is too big to copy, or (b) the packet should
2804 * be copied but there is no memory for the copy.
2805 */
2806 static int
2807 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2808 struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2809 {
2810
2811 unsigned int len_cq = ntohl(r->len_cq);
2812 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2813 int mask, cidx = fl->cidx;
2814 struct rx_sw_desc *sd = &fl->sdesc[cidx];
2815 uint32_t len = G_RSPD_LEN(len_cq);
2816 uint32_t flags = M_EXT;
2817 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
2818 caddr_t cl;
2819 struct mbuf *m;
2820 int ret = 0;
2821
2822 mask = fl->size - 1;
2823 prefetch(fl->sdesc[(cidx + 1) & mask].m);
2824 prefetch(fl->sdesc[(cidx + 2) & mask].m);
2825 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
2826 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl);
2827
2828 fl->credits--;
2829 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2830
2831 if (recycle_enable && len <= SGE_RX_COPY_THRES &&
2832 sopeop == RSPQ_SOP_EOP) {
2833 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2834 goto skip_recycle;
2835 cl = mtod(m, void *);
2836 memcpy(cl, sd->rxsd_cl, len);
2837 recycle_rx_buf(adap, fl, fl->cidx);
2838 m->m_pkthdr.len = m->m_len = len;
2839 m->m_flags = 0;
2840 mh->mh_head = mh->mh_tail = m;
2841 ret = 1;
2842 goto done;
2843 } else {
2844 skip_recycle:
2845 bus_dmamap_unload(fl->entry_tag, sd->map);
2846 cl = sd->rxsd_cl;
2847 m = sd->m;
2848
2849 if ((sopeop == RSPQ_SOP_EOP) ||
2850 (sopeop == RSPQ_SOP))
2851 flags |= M_PKTHDR;
2852 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags);
2853 if (fl->zone == zone_pack) {
2854 /*
2855 * restore clobbered data pointer
2856 */
2857 m->m_data = m->m_ext.ext_buf;
2858 } else {
2859 m_cljset(m, cl, fl->type);
2860 }
2861 m->m_len = len;
2862 }
2863 switch(sopeop) {
2864 case RSPQ_SOP_EOP:
2865 ret = 1;
2866 /* FALLTHROUGH */
2867 case RSPQ_SOP:
2868 mh->mh_head = mh->mh_tail = m;
2869 m->m_pkthdr.len = len;
2870 break;
2871 case RSPQ_EOP:
2872 ret = 1;
2873 /* FALLTHROUGH */
2874 case RSPQ_NSOP_NEOP:
2875 if (mh->mh_tail == NULL) {
2876 log(LOG_ERR, "discarding intermediate descriptor entry\n");
2877 m_freem(m);
2878 break;
2879 }
2880 mh->mh_tail->m_next = m;
2881 mh->mh_tail = m;
2882 mh->mh_head->m_pkthdr.len += len;
2883 break;
2884 }
2885 if (cxgb_debug)
2886 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
2887 done:
2888 if (++fl->cidx == fl->size)
2889 fl->cidx = 0;
2890
2891 return (ret);
2892 }
2893
2894 /**
2895 * handle_rsp_cntrl_info - handles control information in a response
2896 * @qs: the queue set corresponding to the response
2897 * @flags: the response control flags
2898 *
2899 * Handles the control information of an SGE response, such as GTS
2900 * indications and completion credits for the queue set's Tx queues.
2901 * HW coalesces credits, we don't do any extra SW coalescing.
2902 */
2903 static __inline void
2904 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2905 {
2906 unsigned int credits;
2907
2908 #if USE_GTS
2909 if (flags & F_RSPD_TXQ0_GTS)
2910 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2911 #endif
2912 credits = G_RSPD_TXQ0_CR(flags);
2913 if (credits)
2914 qs->txq[TXQ_ETH].processed += credits;
2915
2916 credits = G_RSPD_TXQ2_CR(flags);
2917 if (credits)
2918 qs->txq[TXQ_CTRL].processed += credits;
2919
2920 # if USE_GTS
2921 if (flags & F_RSPD_TXQ1_GTS)
2922 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2923 # endif
2924 credits = G_RSPD_TXQ1_CR(flags);
2925 if (credits)
2926 qs->txq[TXQ_OFLD].processed += credits;
2927
2928 }
2929
2930 static void
2931 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2932 unsigned int sleeping)
2933 {
2934 ;
2935 }
2936
2937 /**
2938 * process_responses - process responses from an SGE response queue
2939 * @adap: the adapter
2940 * @qs: the queue set to which the response queue belongs
2941 * @budget: how many responses can be processed in this round
2942 *
2943 * Process responses from an SGE response queue up to the supplied budget.
2944 * Responses include received packets as well as credits and other events
2945 * for the queues that belong to the response queue's queue set.
2946 * A negative budget is effectively unlimited.
2947 *
2948 * Additionally choose the interrupt holdoff time for the next interrupt
2949 * on this queue. If the system is under memory shortage use a fairly
2950 * long delay to help recovery.
2951 */
2952 static int
2953 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2954 {
2955 struct sge_rspq *rspq = &qs->rspq;
2956 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2957 int budget_left = budget;
2958 unsigned int sleeping = 0;
2959 int lro_enabled = qs->lro.enabled;
2960 int skip_lro;
2961 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2962 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2963 int ngathered = 0;
2964 #ifdef DEBUG
2965 static int last_holdoff = 0;
2966 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2967 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2968 last_holdoff = rspq->holdoff_tmr;
2969 }
2970 #endif
2971 rspq->next_holdoff = rspq->holdoff_tmr;
2972
2973 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2974 int eth, eop = 0, ethpad = 0;
2975 uint32_t flags = ntohl(r->flags);
2976 uint32_t rss_csum = *(const uint32_t *)r;
2977 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2978
2979 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2980
2981 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2982 struct mbuf *m;
2983
2984 if (cxgb_debug)
2985 printf("async notification\n");
2986
2987 if (rspq->rspq_mh.mh_head == NULL) {
2988 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2989 m = rspq->rspq_mh.mh_head;
2990 } else {
2991 m = m_gethdr(M_DONTWAIT, MT_DATA);
2992 }
2993 if (m == NULL)
2994 goto no_mem;
2995
2996 memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2997 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2998 *mtod(m, char *) = CPL_ASYNC_NOTIF;
2999 rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
3000 eop = 1;
3001 rspq->async_notif++;
3002 goto skip;
3003 } else if (flags & F_RSPD_IMM_DATA_VALID) {
3004 struct mbuf *m = NULL;
3005
3006 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
3007 r->rss_hdr.opcode, rspq->cidx);
3008 if (rspq->rspq_mh.mh_head == NULL)
3009 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
3010 else
3011 m = m_gethdr(M_DONTWAIT, MT_DATA);
3012
3013 if (rspq->rspq_mh.mh_head == NULL && m == NULL) {
3014 no_mem:
3015 rspq->next_holdoff = NOMEM_INTR_DELAY;
3016 budget_left--;
3017 break;
3018 }
3019 get_imm_packet(adap, r, rspq->rspq_mh.mh_head);
3020 eop = 1;
3021 rspq->imm_data++;
3022 } else if (r->len_cq) {
3023 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
3024
3025 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
3026 if (eop) {
3027 rspq->rspq_mh.mh_head->m_flags |= M_FLOWID;
3028 rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash;
3029 }
3030
3031 ethpad = 2;
3032 } else {
3033 rspq->pure_rsps++;
3034 }
3035 skip:
3036 if (flags & RSPD_CTRL_MASK) {
3037 sleeping |= flags & RSPD_GTS_MASK;
3038 handle_rsp_cntrl_info(qs, flags);
3039 }
3040
3041 r++;
3042 if (__predict_false(++rspq->cidx == rspq->size)) {
3043 rspq->cidx = 0;
3044 rspq->gen ^= 1;
3045 r = rspq->desc;
3046 }
3047
3048 if (++rspq->credits >= 64) {
3049 refill_rspq(adap, rspq, rspq->credits);
3050 rspq->credits = 0;
3051 }
3052 if (!eth && eop) {
3053 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
3054 /*
3055 * XXX size mismatch
3056 */
3057 m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
3058
3059
3060 ngathered = rx_offload(&adap->tdev, rspq,
3061 rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
3062 rspq->rspq_mh.mh_head = NULL;
3063 DPRINTF("received offload packet\n");
3064
3065 } else if (eth && eop) {
3066 struct mbuf *m = rspq->rspq_mh.mh_head;
3067
3068 t3_rx_eth(adap, rspq, m, ethpad);
3069
3070 /*
3071 * The T304 sends incoming packets on any qset. If LRO
3072 * is also enabled, we could end up sending packet up
3073 * lro_ctrl->ifp's input. That is incorrect.
3074 *
3075 * The mbuf's rcvif was derived from the cpl header and
3076 * is accurate. Skip LRO and just use that.
3077 */
3078 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
3079
3080 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
3081 #ifdef INET
3082 && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
3083 #endif
3084 ) {
3085 /* successfully queue'd for LRO */
3086 } else {
3087 /*
3088 * LRO not enabled, packet unsuitable for LRO,
3089 * or unable to queue. Pass it up right now in
3090 * either case.
3091 */
3092 struct ifnet *ifp = m->m_pkthdr.rcvif;
3093 (*ifp->if_input)(ifp, m);
3094 }
3095 rspq->rspq_mh.mh_head = NULL;
3096
3097 }
3098 __refill_fl_lt(adap, &qs->fl[0], 32);
3099 __refill_fl_lt(adap, &qs->fl[1], 32);
3100 --budget_left;
3101 }
3102
3103 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
3104
3105 #ifdef INET
3106 /* Flush LRO */
3107 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
3108 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
3109 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
3110 tcp_lro_flush(lro_ctrl, queued);
3111 }
3112 #endif
3113
3114 if (sleeping)
3115 check_ring_db(adap, qs, sleeping);
3116
3117 mb(); /* commit Tx queue processed updates */
3118 if (__predict_false(qs->txq_stopped > 1))
3119 restart_tx(qs);
3120
3121 __refill_fl_lt(adap, &qs->fl[0], 512);
3122 __refill_fl_lt(adap, &qs->fl[1], 512);
3123 budget -= budget_left;
3124 return (budget);
3125 }
3126
3127 /*
3128 * A helper function that processes responses and issues GTS.
3129 */
3130 static __inline int
3131 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
3132 {
3133 int work;
3134 static int last_holdoff = 0;
3135
3136 work = process_responses(adap, rspq_to_qset(rq), -1);
3137
3138 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
3139 printf("next_holdoff=%d\n", rq->next_holdoff);
3140 last_holdoff = rq->next_holdoff;
3141 }
3142 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
3143 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
3144
3145 return (work);
3146 }
3147
3148
3149 /*
3150 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
3151 * Handles data events from SGE response queues as well as error and other
3152 * async events as they all use the same interrupt pin. We use one SGE
3153 * response queue per port in this mode and protect all response queues with
3154 * queue 0's lock.
3155 */
3156 void
3157 t3b_intr(void *data)
3158 {
3159 uint32_t i, map;
3160 adapter_t *adap = data;
3161 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3162
3163 t3_write_reg(adap, A_PL_CLI, 0);
3164 map = t3_read_reg(adap, A_SG_DATA_INTR);
3165
3166 if (!map)
3167 return;
3168
3169 if (__predict_false(map & F_ERRINTR))
3170 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3171
3172 mtx_lock(&q0->lock);
3173 for_each_port(adap, i)
3174 if (map & (1 << i))
3175 process_responses_gts(adap, &adap->sge.qs[i].rspq);
3176 mtx_unlock(&q0->lock);
3177 }
3178
3179 /*
3180 * The MSI interrupt handler. This needs to handle data events from SGE
3181 * response queues as well as error and other async events as they all use
3182 * the same MSI vector. We use one SGE response queue per port in this mode
3183 * and protect all response queues with queue 0's lock.
3184 */
3185 void
3186 t3_intr_msi(void *data)
3187 {
3188 adapter_t *adap = data;
3189 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3190 int i, new_packets = 0;
3191
3192 mtx_lock(&q0->lock);
3193
3194 for_each_port(adap, i)
3195 if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3196 new_packets = 1;
3197 mtx_unlock(&q0->lock);
3198 if (new_packets == 0)
3199 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3200 }
3201
3202 void
3203 t3_intr_msix(void *data)
3204 {
3205 struct sge_qset *qs = data;
3206 adapter_t *adap = qs->port->adapter;
3207 struct sge_rspq *rspq = &qs->rspq;
3208
3209 if (process_responses_gts(adap, rspq) == 0)
3210 rspq->unhandled_irqs++;
3211 }
3212
3213 #define QDUMP_SBUF_SIZE 32 * 400
3214 static int
3215 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3216 {
3217 struct sge_rspq *rspq;
3218 struct sge_qset *qs;
3219 int i, err, dump_end, idx;
3220 static int multiplier = 1;
3221 struct sbuf *sb;
3222 struct rsp_desc *rspd;
3223 uint32_t data[4];
3224
3225 rspq = arg1;
3226 qs = rspq_to_qset(rspq);
3227 if (rspq->rspq_dump_count == 0)
3228 return (0);
3229 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3230 log(LOG_WARNING,
3231 "dump count is too large %d\n", rspq->rspq_dump_count);
3232 rspq->rspq_dump_count = 0;
3233 return (EINVAL);
3234 }
3235 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3236 log(LOG_WARNING,
3237 "dump start of %d is greater than queue size\n",
3238 rspq->rspq_dump_start);
3239 rspq->rspq_dump_start = 0;
3240 return (EINVAL);
3241 }
3242 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3243 if (err)
3244 return (err);
3245 retry_sbufops:
3246 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3247
3248 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3249 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3250 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3251 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3252 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3253
3254 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3255 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3256
3257 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3258 for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3259 idx = i & (RSPQ_Q_SIZE-1);
3260
3261 rspd = &rspq->desc[idx];
3262 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3263 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3264 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3265 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3266 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3267 be32toh(rspd->len_cq), rspd->intr_gen);
3268 }
3269 if (sbuf_overflowed(sb)) {
3270 sbuf_delete(sb);
3271 multiplier++;
3272 goto retry_sbufops;
3273 }
3274 sbuf_finish(sb);
3275 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3276 sbuf_delete(sb);
3277 return (err);
3278 }
3279
3280 static int
3281 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3282 {
3283 struct sge_txq *txq;
3284 struct sge_qset *qs;
3285 int i, j, err, dump_end;
3286 static int multiplier = 1;
3287 struct sbuf *sb;
3288 struct tx_desc *txd;
3289 uint32_t *WR, wr_hi, wr_lo, gen;
3290 uint32_t data[4];
3291
3292 txq = arg1;
3293 qs = txq_to_qset(txq, TXQ_ETH);
3294 if (txq->txq_dump_count == 0) {
3295 return (0);
3296 }
3297 if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3298 log(LOG_WARNING,
3299 "dump count is too large %d\n", txq->txq_dump_count);
3300 txq->txq_dump_count = 1;
3301 return (EINVAL);
3302 }
3303 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3304 log(LOG_WARNING,
3305 "dump start of %d is greater than queue size\n",
3306 txq->txq_dump_start);
3307 txq->txq_dump_start = 0;
3308 return (EINVAL);
3309 }
3310 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3311 if (err)
3312 return (err);
3313
3314
3315 retry_sbufops:
3316 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3317
3318 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3319 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3320 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3321 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3322 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3323 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3324 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3325 txq->txq_dump_start,
3326 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3327
3328 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3329 for (i = txq->txq_dump_start; i < dump_end; i++) {
3330 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3331 WR = (uint32_t *)txd->flit;
3332 wr_hi = ntohl(WR[0]);
3333 wr_lo = ntohl(WR[1]);
3334 gen = G_WR_GEN(wr_lo);
3335
3336 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3337 wr_hi, wr_lo, gen);
3338 for (j = 2; j < 30; j += 4)
3339 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3340 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3341
3342 }
3343 if (sbuf_overflowed(sb)) {
3344 sbuf_delete(sb);
3345 multiplier++;
3346 goto retry_sbufops;
3347 }
3348 sbuf_finish(sb);
3349 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3350 sbuf_delete(sb);
3351 return (err);
3352 }
3353
3354 static int
3355 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3356 {
3357 struct sge_txq *txq;
3358 struct sge_qset *qs;
3359 int i, j, err, dump_end;
3360 static int multiplier = 1;
3361 struct sbuf *sb;
3362 struct tx_desc *txd;
3363 uint32_t *WR, wr_hi, wr_lo, gen;
3364
3365 txq = arg1;
3366 qs = txq_to_qset(txq, TXQ_CTRL);
3367 if (txq->txq_dump_count == 0) {
3368 return (0);
3369 }
3370 if (txq->txq_dump_count > 256) {
3371 log(LOG_WARNING,
3372 "dump count is too large %d\n", txq->txq_dump_count);
3373 txq->txq_dump_count = 1;
3374 return (EINVAL);
3375 }
3376 if (txq->txq_dump_start > 255) {
3377 log(LOG_WARNING,
3378 "dump start of %d is greater than queue size\n",
3379 txq->txq_dump_start);
3380 txq->txq_dump_start = 0;
3381 return (EINVAL);
3382 }
3383
3384 retry_sbufops:
3385 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3386 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3387 txq->txq_dump_start,
3388 (txq->txq_dump_start + txq->txq_dump_count) & 255);
3389
3390 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3391 for (i = txq->txq_dump_start; i < dump_end; i++) {
3392 txd = &txq->desc[i & (255)];
3393 WR = (uint32_t *)txd->flit;
3394 wr_hi = ntohl(WR[0]);
3395 wr_lo = ntohl(WR[1]);
3396 gen = G_WR_GEN(wr_lo);
3397
3398 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3399 wr_hi, wr_lo, gen);
3400 for (j = 2; j < 30; j += 4)
3401 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3402 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3403
3404 }
3405 if (sbuf_overflowed(sb)) {
3406 sbuf_delete(sb);
3407 multiplier++;
3408 goto retry_sbufops;
3409 }
3410 sbuf_finish(sb);
3411 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3412 sbuf_delete(sb);
3413 return (err);
3414 }
3415
3416 static int
3417 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3418 {
3419 adapter_t *sc = arg1;
3420 struct qset_params *qsp = &sc->params.sge.qset[0];
3421 int coalesce_usecs;
3422 struct sge_qset *qs;
3423 int i, j, err, nqsets = 0;
3424 struct mtx *lock;
3425
3426 if ((sc->flags & FULL_INIT_DONE) == 0)
3427 return (ENXIO);
3428
3429 coalesce_usecs = qsp->coalesce_usecs;
3430 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3431
3432 if (err != 0) {
3433 return (err);
3434 }
3435 if (coalesce_usecs == qsp->coalesce_usecs)
3436 return (0);
3437
3438 for (i = 0; i < sc->params.nports; i++)
3439 for (j = 0; j < sc->port[i].nqsets; j++)
3440 nqsets++;
3441
3442 coalesce_usecs = max(1, coalesce_usecs);
3443
3444 for (i = 0; i < nqsets; i++) {
3445 qs = &sc->sge.qs[i];
3446 qsp = &sc->params.sge.qset[i];
3447 qsp->coalesce_usecs = coalesce_usecs;
3448
3449 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3450 &sc->sge.qs[0].rspq.lock;
3451
3452 mtx_lock(lock);
3453 t3_update_qset_coalesce(qs, qsp);
3454 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3455 V_NEWTIMER(qs->rspq.holdoff_tmr));
3456 mtx_unlock(lock);
3457 }
3458
3459 return (0);
3460 }
3461
3462
3463 void
3464 t3_add_attach_sysctls(adapter_t *sc)
3465 {
3466 struct sysctl_ctx_list *ctx;
3467 struct sysctl_oid_list *children;
3468
3469 ctx = device_get_sysctl_ctx(sc->dev);
3470 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3471
3472 /* random information */
3473 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3474 "firmware_version",
3475 CTLFLAG_RD, &sc->fw_version,
3476 0, "firmware version");
3477 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3478 "hw_revision",
3479 CTLFLAG_RD, &sc->params.rev,
3480 0, "chip model");
3481 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3482 "port_types",
3483 CTLFLAG_RD, &sc->port_types,
3484 0, "type of ports");
3485 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3486 "enable_debug",
3487 CTLFLAG_RW, &cxgb_debug,
3488 0, "enable verbose debugging output");
3489 SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce",
3490 CTLFLAG_RD, &sc->tunq_coalesce,
3491 "#tunneled packets freed");
3492 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3493 "txq_overrun",
3494 CTLFLAG_RD, &txq_fills,
3495 0, "#times txq overrun");
3496 }
3497
3498
3499 static const char *rspq_name = "rspq";
3500 static const char *txq_names[] =
3501 {
3502 "txq_eth",
3503 "txq_ofld",
3504 "txq_ctrl"
3505 };
3506
3507 static int
3508 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3509 {
3510 struct port_info *p = arg1;
3511 uint64_t *parg;
3512
3513 if (!p)
3514 return (EINVAL);
3515
3516 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3517 PORT_LOCK(p);
3518 t3_mac_update_stats(&p->mac);
3519 PORT_UNLOCK(p);
3520
3521 return (sysctl_handle_quad(oidp, parg, 0, req));
3522 }
3523
3524 void
3525 t3_add_configured_sysctls(adapter_t *sc)
3526 {
3527 struct sysctl_ctx_list *ctx;
3528 struct sysctl_oid_list *children;
3529 int i, j;
3530
3531 ctx = device_get_sysctl_ctx(sc->dev);
3532 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3533
3534 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3535 "intr_coal",
3536 CTLTYPE_INT|CTLFLAG_RW, sc,
3537 0, t3_set_coalesce_usecs,
3538 "I", "interrupt coalescing timer (us)");
3539
3540 for (i = 0; i < sc->params.nports; i++) {
3541 struct port_info *pi = &sc->port[i];
3542 struct sysctl_oid *poid;
3543 struct sysctl_oid_list *poidlist;
3544 struct mac_stats *mstats = &pi->mac.stats;
3545
3546 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3547 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3548 pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3549 poidlist = SYSCTL_CHILDREN(poid);
3550 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
3551 "nqsets", CTLFLAG_RD, &pi->nqsets,
3552 0, "#queue sets");
3553
3554 for (j = 0; j < pi->nqsets; j++) {
3555 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3556 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
3557 *ctrlqpoid, *lropoid;
3558 struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
3559 *txqpoidlist, *ctrlqpoidlist,
3560 *lropoidlist;
3561 struct sge_txq *txq = &qs->txq[TXQ_ETH];
3562
3563 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3564
3565 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3566 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3567 qspoidlist = SYSCTL_CHILDREN(qspoid);
3568
3569 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
3570 CTLFLAG_RD, &qs->fl[0].empty, 0,
3571 "freelist #0 empty");
3572 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
3573 CTLFLAG_RD, &qs->fl[1].empty, 0,
3574 "freelist #1 empty");
3575
3576 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3577 rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3578 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3579
3580 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3581 txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3582 txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3583
3584 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3585 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3586 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3587
3588 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3589 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
3590 lropoidlist = SYSCTL_CHILDREN(lropoid);
3591
3592 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3593 CTLFLAG_RD, &qs->rspq.size,
3594 0, "#entries in response queue");
3595 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3596 CTLFLAG_RD, &qs->rspq.cidx,
3597 0, "consumer index");
3598 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3599 CTLFLAG_RD, &qs->rspq.credits,
3600 0, "#credits");
3601 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
3602 CTLFLAG_RD, &qs->rspq.starved,
3603 0, "#times starved");
3604 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3605 CTLFLAG_RD, &qs->rspq.phys_addr,
3606 "physical_address_of the queue");
3607 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3608 CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3609 0, "start rspq dump entry");
3610 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3611 CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3612 0, "#rspq entries to dump");
3613 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3614 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3615 0, t3_dump_rspq, "A", "dump of the response queue");
3616
3617 SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
3618 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
3619 "#tunneled packets dropped");
3620 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3621 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3622 0, "#tunneled packets waiting to be sent");
3623 #if 0
3624 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3625 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3626 0, "#tunneled packets queue producer index");
3627 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3628 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3629 0, "#tunneled packets queue consumer index");
3630 #endif
3631 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
3632 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3633 0, "#tunneled packets processed by the card");
3634 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3635 CTLFLAG_RD, &txq->cleaned,
3636 0, "#tunneled packets cleaned");
3637 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3638 CTLFLAG_RD, &txq->in_use,
3639 0, "#tunneled packet slots in use");
3640 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
3641 CTLFLAG_RD, &txq->txq_frees,
3642 "#tunneled packets freed");
3643 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3644 CTLFLAG_RD, &txq->txq_skipped,
3645 0, "#tunneled packet descriptors skipped");
3646 SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
3647 CTLFLAG_RD, &txq->txq_coalesced,
3648 "#tunneled packets coalesced");
3649 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3650 CTLFLAG_RD, &txq->txq_enqueued,
3651 0, "#tunneled packets enqueued to hardware");
3652 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3653 CTLFLAG_RD, &qs->txq_stopped,
3654 0, "tx queues stopped");
3655 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3656 CTLFLAG_RD, &txq->phys_addr,
3657 "physical_address_of the queue");
3658 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3659 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3660 0, "txq generation");
3661 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3662 CTLFLAG_RD, &txq->cidx,
3663 0, "hardware queue cidx");
3664 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3665 CTLFLAG_RD, &txq->pidx,
3666 0, "hardware queue pidx");
3667 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3668 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3669 0, "txq start idx for dump");
3670 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3671 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3672 0, "txq #entries to dump");
3673 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3674 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3675 0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3676
3677 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3678 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3679 0, "ctrlq start idx for dump");
3680 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3681 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3682 0, "ctrl #entries to dump");
3683 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3684 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3685 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3686
3687 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
3688 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3689 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3690 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3691 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3692 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3693 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3694 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3695 }
3696
3697 /* Now add a node for mac stats. */
3698 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3699 CTLFLAG_RD, NULL, "MAC statistics");
3700 poidlist = SYSCTL_CHILDREN(poid);
3701
3702 /*
3703 * We (ab)use the length argument (arg2) to pass on the offset
3704 * of the data that we are interested in. This is only required
3705 * for the quad counters that are updated from the hardware (we
3706 * make sure that we return the latest value).
3707 * sysctl_handle_macstat first updates *all* the counters from
3708 * the hardware, and then returns the latest value of the
3709 * requested counter. Best would be to update only the
3710 * requested counter from hardware, but t3_mac_update_stats()
3711 * hides all the register details and we don't want to dive into
3712 * all that here.
3713 */
3714 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3715 (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
3716 sysctl_handle_macstat, "QU", 0)
3717 CXGB_SYSCTL_ADD_QUAD(tx_octets);
3718 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3719 CXGB_SYSCTL_ADD_QUAD(tx_frames);
3720 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3721 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3722 CXGB_SYSCTL_ADD_QUAD(tx_pause);
3723 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3724 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3725 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3726 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3727 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3728 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3729 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3730 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3731 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3732 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3733 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3734 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3735 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3736 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3737 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3738 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3739 CXGB_SYSCTL_ADD_QUAD(rx_octets);
3740 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3741 CXGB_SYSCTL_ADD_QUAD(rx_frames);
3742 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3743 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3744 CXGB_SYSCTL_ADD_QUAD(rx_pause);
3745 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
3746 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3747 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3748 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3749 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3750 CXGB_SYSCTL_ADD_QUAD(rx_runt);
3751 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3752 CXGB_SYSCTL_ADD_QUAD(rx_short);
3753 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3754 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3755 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3756 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3757 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3758 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3759 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3760 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3761 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3762 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3763 #undef CXGB_SYSCTL_ADD_QUAD
3764
3765 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3766 CTLFLAG_RD, &mstats->a, 0)
3767 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3768 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3769 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3770 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3771 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3772 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3773 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3774 CXGB_SYSCTL_ADD_ULONG(num_toggled);
3775 CXGB_SYSCTL_ADD_ULONG(num_resets);
3776 CXGB_SYSCTL_ADD_ULONG(link_faults);
3777 #undef CXGB_SYSCTL_ADD_ULONG
3778 }
3779 }
3780
3781 /**
3782 * t3_get_desc - dump an SGE descriptor for debugging purposes
3783 * @qs: the queue set
3784 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3785 * @idx: the descriptor index in the queue
3786 * @data: where to dump the descriptor contents
3787 *
3788 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3789 * size of the descriptor.
3790 */
3791 int
3792 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3793 unsigned char *data)
3794 {
3795 if (qnum >= 6)
3796 return (EINVAL);
3797
3798 if (qnum < 3) {
3799 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3800 return -EINVAL;
3801 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3802 return sizeof(struct tx_desc);
3803 }
3804
3805 if (qnum == 3) {
3806 if (!qs->rspq.desc || idx >= qs->rspq.size)
3807 return (EINVAL);
3808 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3809 return sizeof(struct rsp_desc);
3810 }
3811
3812 qnum -= 4;
3813 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3814 return (EINVAL);
3815 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3816 return sizeof(struct rx_desc);
3817 }
Cache object: ea31946d7fa2fdd1d3c5de36487c47bb
|