1 /**************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3
4 Copyright (c) 2007-2009, Chelsio Inc.
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
12
13 2. Neither the name of the Chelsio Corporation nor the names of its
14 contributors may be used to endorse or promote products derived from
15 this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28
29 ***************************************************************************/
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD: releng/12.0/sys/dev/cxgb/cxgb_sge.c 333288 2018-05-06 00:48:43Z markj $");
33
34 #include "opt_inet6.h"
35 #include "opt_inet.h"
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/module.h>
41 #include <sys/bus.h>
42 #include <sys/conf.h>
43 #include <machine/bus.h>
44 #include <machine/resource.h>
45 #include <sys/rman.h>
46 #include <sys/queue.h>
47 #include <sys/sysctl.h>
48 #include <sys/taskqueue.h>
49
50 #include <sys/proc.h>
51 #include <sys/sbuf.h>
52 #include <sys/sched.h>
53 #include <sys/smp.h>
54 #include <sys/systm.h>
55 #include <sys/syslog.h>
56 #include <sys/socket.h>
57 #include <sys/sglist.h>
58
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/bpf.h>
62 #include <net/ethernet.h>
63 #include <net/if_vlan_var.h>
64
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip6.h>
69 #include <netinet/tcp.h>
70
71 #include <dev/pci/pcireg.h>
72 #include <dev/pci/pcivar.h>
73
74 #include <vm/vm.h>
75 #include <vm/pmap.h>
76
77 #include <cxgb_include.h>
78 #include <sys/mvec.h>
79
80 int txq_fills = 0;
81 int multiq_tx_enable = 1;
82
83 #ifdef TCP_OFFLOAD
84 CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS);
85 #endif
86
87 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
88 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
89 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
90 "size of per-queue mbuf ring");
91
92 static int cxgb_tx_coalesce_force = 0;
93 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RWTUN,
94 &cxgb_tx_coalesce_force, 0,
95 "coalesce small packets into a single work request regardless of ring state");
96
97 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1
98 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
99 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2
100 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5
101 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5
102 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2
103 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6
104
105
106 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
107 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RWTUN,
108 &cxgb_tx_coalesce_enable_start, 0,
109 "coalesce enable threshold");
110 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
111 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RWTUN,
112 &cxgb_tx_coalesce_enable_stop, 0,
113 "coalesce disable threshold");
114 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
115 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RWTUN,
116 &cxgb_tx_reclaim_threshold, 0,
117 "tx cleaning minimum threshold");
118
119 /*
120 * XXX don't re-enable this until TOE stops assuming
121 * we have an m_ext
122 */
123 static int recycle_enable = 0;
124
125 extern int cxgb_use_16k_clusters;
126 extern int nmbjumbop;
127 extern int nmbjumbo9;
128 extern int nmbjumbo16;
129
130 #define USE_GTS 0
131
132 #define SGE_RX_SM_BUF_SIZE 1536
133 #define SGE_RX_DROP_THRES 16
134 #define SGE_RX_COPY_THRES 128
135
136 /*
137 * Period of the Tx buffer reclaim timer. This timer does not need to run
138 * frequently as Tx buffers are usually reclaimed by new Tx packets.
139 */
140 #define TX_RECLAIM_PERIOD (hz >> 1)
141
142 /*
143 * Values for sge_txq.flags
144 */
145 enum {
146 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
147 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
148 };
149
150 struct tx_desc {
151 uint64_t flit[TX_DESC_FLITS];
152 } __packed;
153
154 struct rx_desc {
155 uint32_t addr_lo;
156 uint32_t len_gen;
157 uint32_t gen2;
158 uint32_t addr_hi;
159 } __packed;
160
161 struct rsp_desc { /* response queue descriptor */
162 struct rss_header rss_hdr;
163 uint32_t flags;
164 uint32_t len_cq;
165 uint8_t imm_data[47];
166 uint8_t intr_gen;
167 } __packed;
168
169 #define RX_SW_DESC_MAP_CREATED (1 << 0)
170 #define TX_SW_DESC_MAP_CREATED (1 << 1)
171 #define RX_SW_DESC_INUSE (1 << 3)
172 #define TX_SW_DESC_MAPPED (1 << 4)
173
174 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
175 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
176 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
177 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
178
179 struct tx_sw_desc { /* SW state per Tx descriptor */
180 struct mbuf *m;
181 bus_dmamap_t map;
182 int flags;
183 };
184
185 struct rx_sw_desc { /* SW state per Rx descriptor */
186 caddr_t rxsd_cl;
187 struct mbuf *m;
188 bus_dmamap_t map;
189 int flags;
190 };
191
192 struct txq_state {
193 unsigned int compl;
194 unsigned int gen;
195 unsigned int pidx;
196 };
197
198 struct refill_fl_cb_arg {
199 int error;
200 bus_dma_segment_t seg;
201 int nseg;
202 };
203
204
205 /*
206 * Maps a number of flits to the number of Tx descriptors that can hold them.
207 * The formula is
208 *
209 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
210 *
211 * HW allows up to 4 descriptors to be combined into a WR.
212 */
213 static uint8_t flit_desc_map[] = {
214 0,
215 #if SGE_NUM_GENBITS == 1
216 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
217 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
218 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
219 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
220 #elif SGE_NUM_GENBITS == 2
221 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
222 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
223 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
224 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
225 #else
226 # error "SGE_NUM_GENBITS must be 1 or 2"
227 #endif
228 };
229
230 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED)
231 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock)
232 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock)
233 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock)
234 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
235 #define TXQ_RING_NEEDS_ENQUEUE(qs) \
236 drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
237 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
238 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \
239 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
240 #define TXQ_RING_DEQUEUE(qs) \
241 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
242
243 int cxgb_debug = 0;
244
245 static void sge_timer_cb(void *arg);
246 static void sge_timer_reclaim(void *arg, int ncount);
247 static void sge_txq_reclaim_handler(void *arg, int ncount);
248 static void cxgb_start_locked(struct sge_qset *qs);
249
250 /*
251 * XXX need to cope with bursty scheduling by looking at a wider
252 * window than we are now for determining the need for coalescing
253 *
254 */
255 static __inline uint64_t
256 check_pkt_coalesce(struct sge_qset *qs)
257 {
258 struct adapter *sc;
259 struct sge_txq *txq;
260 uint8_t *fill;
261
262 if (__predict_false(cxgb_tx_coalesce_force))
263 return (1);
264 txq = &qs->txq[TXQ_ETH];
265 sc = qs->port->adapter;
266 fill = &sc->tunq_fill[qs->idx];
267
268 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
269 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
270 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
271 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
272 /*
273 * if the hardware transmit queue is more than 1/8 full
274 * we mark it as coalescing - we drop back from coalescing
275 * when we go below 1/32 full and there are no packets enqueued,
276 * this provides us with some degree of hysteresis
277 */
278 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
279 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
280 *fill = 0;
281 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
282 *fill = 1;
283
284 return (sc->tunq_coalesce);
285 }
286
287 #ifdef __LP64__
288 static void
289 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
290 {
291 uint64_t wr_hilo;
292 #if _BYTE_ORDER == _LITTLE_ENDIAN
293 wr_hilo = wr_hi;
294 wr_hilo |= (((uint64_t)wr_lo)<<32);
295 #else
296 wr_hilo = wr_lo;
297 wr_hilo |= (((uint64_t)wr_hi)<<32);
298 #endif
299 wrp->wrh_hilo = wr_hilo;
300 }
301 #else
302 static void
303 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
304 {
305
306 wrp->wrh_hi = wr_hi;
307 wmb();
308 wrp->wrh_lo = wr_lo;
309 }
310 #endif
311
312 struct coalesce_info {
313 int count;
314 int nbytes;
315 };
316
317 static int
318 coalesce_check(struct mbuf *m, void *arg)
319 {
320 struct coalesce_info *ci = arg;
321 int *count = &ci->count;
322 int *nbytes = &ci->nbytes;
323
324 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
325 (*count < 7) && (m->m_next == NULL))) {
326 *count += 1;
327 *nbytes += m->m_len;
328 return (1);
329 }
330 return (0);
331 }
332
333 static struct mbuf *
334 cxgb_dequeue(struct sge_qset *qs)
335 {
336 struct mbuf *m, *m_head, *m_tail;
337 struct coalesce_info ci;
338
339
340 if (check_pkt_coalesce(qs) == 0)
341 return TXQ_RING_DEQUEUE(qs);
342
343 m_head = m_tail = NULL;
344 ci.count = ci.nbytes = 0;
345 do {
346 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
347 if (m_head == NULL) {
348 m_tail = m_head = m;
349 } else if (m != NULL) {
350 m_tail->m_nextpkt = m;
351 m_tail = m;
352 }
353 } while (m != NULL);
354 if (ci.count > 7)
355 panic("trying to coalesce %d packets in to one WR", ci.count);
356 return (m_head);
357 }
358
359 /**
360 * reclaim_completed_tx - reclaims completed Tx descriptors
361 * @adapter: the adapter
362 * @q: the Tx queue to reclaim completed descriptors from
363 *
364 * Reclaims Tx descriptors that the SGE has indicated it has processed,
365 * and frees the associated buffers if possible. Called with the Tx
366 * queue's lock held.
367 */
368 static __inline int
369 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
370 {
371 struct sge_txq *q = &qs->txq[queue];
372 int reclaim = desc_reclaimable(q);
373
374 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
375 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
376 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
377
378 if (reclaim < reclaim_min)
379 return (0);
380
381 mtx_assert(&qs->lock, MA_OWNED);
382 if (reclaim > 0) {
383 t3_free_tx_desc(qs, reclaim, queue);
384 q->cleaned += reclaim;
385 q->in_use -= reclaim;
386 }
387 if (isset(&qs->txq_stopped, TXQ_ETH))
388 clrbit(&qs->txq_stopped, TXQ_ETH);
389
390 return (reclaim);
391 }
392
393 #ifdef NETDUMP
394 int
395 cxgb_netdump_poll_tx(struct sge_qset *qs)
396 {
397
398 return (reclaim_completed_tx(qs, TX_RECLAIM_MAX, TXQ_ETH));
399 }
400 #endif
401
402 /**
403 * should_restart_tx - are there enough resources to restart a Tx queue?
404 * @q: the Tx queue
405 *
406 * Checks if there are enough descriptors to restart a suspended Tx queue.
407 */
408 static __inline int
409 should_restart_tx(const struct sge_txq *q)
410 {
411 unsigned int r = q->processed - q->cleaned;
412
413 return q->in_use - r < (q->size >> 1);
414 }
415
416 /**
417 * t3_sge_init - initialize SGE
418 * @adap: the adapter
419 * @p: the SGE parameters
420 *
421 * Performs SGE initialization needed every time after a chip reset.
422 * We do not initialize any of the queue sets here, instead the driver
423 * top-level must request those individually. We also do not enable DMA
424 * here, that should be done after the queues have been set up.
425 */
426 void
427 t3_sge_init(adapter_t *adap, struct sge_params *p)
428 {
429 u_int ctrl, ups;
430
431 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
432
433 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
434 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
435 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
436 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
437 #if SGE_NUM_GENBITS == 1
438 ctrl |= F_EGRGENCTRL;
439 #endif
440 if (adap->params.rev > 0) {
441 if (!(adap->flags & (USING_MSIX | USING_MSI)))
442 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
443 }
444 t3_write_reg(adap, A_SG_CONTROL, ctrl);
445 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
446 V_LORCQDRBTHRSH(512));
447 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
448 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
449 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
450 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
451 adap->params.rev < T3_REV_C ? 1000 : 500);
452 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
453 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
454 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
455 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
456 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
457 }
458
459
460 /**
461 * sgl_len - calculates the size of an SGL of the given capacity
462 * @n: the number of SGL entries
463 *
464 * Calculates the number of flits needed for a scatter/gather list that
465 * can hold the given number of entries.
466 */
467 static __inline unsigned int
468 sgl_len(unsigned int n)
469 {
470 return ((3 * n) / 2 + (n & 1));
471 }
472
473 /**
474 * get_imm_packet - return the next ingress packet buffer from a response
475 * @resp: the response descriptor containing the packet data
476 *
477 * Return a packet containing the immediate data of the given response.
478 */
479 static int
480 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
481 {
482
483 if (resp->rss_hdr.opcode == CPL_RX_DATA) {
484 const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0];
485 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
486 } else if (resp->rss_hdr.opcode == CPL_RX_PKT) {
487 const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0];
488 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
489 } else
490 m->m_len = IMMED_PKT_SIZE;
491 m->m_ext.ext_buf = NULL;
492 m->m_ext.ext_type = 0;
493 memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len);
494 return (0);
495 }
496
497 static __inline u_int
498 flits_to_desc(u_int n)
499 {
500 return (flit_desc_map[n]);
501 }
502
503 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
504 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
505 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
506 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
507 F_HIRCQPARITYERROR)
508 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
509 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
510 F_RSPQDISABLED)
511
512 /**
513 * t3_sge_err_intr_handler - SGE async event interrupt handler
514 * @adapter: the adapter
515 *
516 * Interrupt handler for SGE asynchronous (non-data) events.
517 */
518 void
519 t3_sge_err_intr_handler(adapter_t *adapter)
520 {
521 unsigned int v, status;
522
523 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
524 if (status & SGE_PARERR)
525 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
526 status & SGE_PARERR);
527 if (status & SGE_FRAMINGERR)
528 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
529 status & SGE_FRAMINGERR);
530 if (status & F_RSPQCREDITOVERFOW)
531 CH_ALERT(adapter, "SGE response queue credit overflow\n");
532
533 if (status & F_RSPQDISABLED) {
534 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
535
536 CH_ALERT(adapter,
537 "packet delivered to disabled response queue (0x%x)\n",
538 (v >> S_RSPQ0DISABLED) & 0xff);
539 }
540
541 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
542 if (status & SGE_FATALERR)
543 t3_fatal_err(adapter);
544 }
545
546 void
547 t3_sge_prep(adapter_t *adap, struct sge_params *p)
548 {
549 int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
550
551 nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
552 nqsets *= adap->params.nports;
553
554 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
555
556 while (!powerof2(fl_q_size))
557 fl_q_size--;
558
559 use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
560 is_offload(adap);
561
562 #if __FreeBSD_version >= 700111
563 if (use_16k) {
564 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
565 jumbo_buf_size = MJUM16BYTES;
566 } else {
567 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
568 jumbo_buf_size = MJUM9BYTES;
569 }
570 #else
571 jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE);
572 jumbo_buf_size = MJUMPAGESIZE;
573 #endif
574 while (!powerof2(jumbo_q_size))
575 jumbo_q_size--;
576
577 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
578 device_printf(adap->dev,
579 "Insufficient clusters and/or jumbo buffers.\n");
580
581 p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
582
583 for (i = 0; i < SGE_QSETS; ++i) {
584 struct qset_params *q = p->qset + i;
585
586 if (adap->params.nports > 2) {
587 q->coalesce_usecs = 50;
588 } else {
589 #ifdef INVARIANTS
590 q->coalesce_usecs = 10;
591 #else
592 q->coalesce_usecs = 5;
593 #endif
594 }
595 q->polling = 0;
596 q->rspq_size = RSPQ_Q_SIZE;
597 q->fl_size = fl_q_size;
598 q->jumbo_size = jumbo_q_size;
599 q->jumbo_buf_size = jumbo_buf_size;
600 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
601 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
602 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
603 q->cong_thres = 0;
604 }
605 }
606
607 int
608 t3_sge_alloc(adapter_t *sc)
609 {
610
611 /* The parent tag. */
612 if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */
613 1, 0, /* algnmnt, boundary */
614 BUS_SPACE_MAXADDR, /* lowaddr */
615 BUS_SPACE_MAXADDR, /* highaddr */
616 NULL, NULL, /* filter, filterarg */
617 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
618 BUS_SPACE_UNRESTRICTED, /* nsegments */
619 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
620 0, /* flags */
621 NULL, NULL, /* lock, lockarg */
622 &sc->parent_dmat)) {
623 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
624 return (ENOMEM);
625 }
626
627 /*
628 * DMA tag for normal sized RX frames
629 */
630 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
631 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
632 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
633 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
634 return (ENOMEM);
635 }
636
637 /*
638 * DMA tag for jumbo sized RX frames.
639 */
640 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
641 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
642 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
643 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
644 return (ENOMEM);
645 }
646
647 /*
648 * DMA tag for TX frames.
649 */
650 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
651 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
652 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
653 NULL, NULL, &sc->tx_dmat)) {
654 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
655 return (ENOMEM);
656 }
657
658 return (0);
659 }
660
661 int
662 t3_sge_free(struct adapter * sc)
663 {
664
665 if (sc->tx_dmat != NULL)
666 bus_dma_tag_destroy(sc->tx_dmat);
667
668 if (sc->rx_jumbo_dmat != NULL)
669 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
670
671 if (sc->rx_dmat != NULL)
672 bus_dma_tag_destroy(sc->rx_dmat);
673
674 if (sc->parent_dmat != NULL)
675 bus_dma_tag_destroy(sc->parent_dmat);
676
677 return (0);
678 }
679
680 void
681 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
682 {
683
684 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
685 qs->rspq.polling = 0 /* p->polling */;
686 }
687
688 #if !defined(__i386__) && !defined(__amd64__)
689 static void
690 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
691 {
692 struct refill_fl_cb_arg *cb_arg = arg;
693
694 cb_arg->error = error;
695 cb_arg->seg = segs[0];
696 cb_arg->nseg = nseg;
697
698 }
699 #endif
700 /**
701 * refill_fl - refill an SGE free-buffer list
702 * @sc: the controller softc
703 * @q: the free-list to refill
704 * @n: the number of new buffers to allocate
705 *
706 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
707 * The caller must assure that @n does not exceed the queue's capacity.
708 */
709 static void
710 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
711 {
712 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
713 struct rx_desc *d = &q->desc[q->pidx];
714 struct refill_fl_cb_arg cb_arg;
715 struct mbuf *m;
716 caddr_t cl;
717 int err;
718
719 cb_arg.error = 0;
720 while (n--) {
721 /*
722 * We allocate an uninitialized mbuf + cluster, mbuf is
723 * initialized after rx.
724 */
725 if (q->zone == zone_pack) {
726 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
727 break;
728 cl = m->m_ext.ext_buf;
729 } else {
730 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
731 break;
732 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
733 uma_zfree(q->zone, cl);
734 break;
735 }
736 }
737 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
738 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
739 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
740 uma_zfree(q->zone, cl);
741 goto done;
742 }
743 sd->flags |= RX_SW_DESC_MAP_CREATED;
744 }
745 #if !defined(__i386__) && !defined(__amd64__)
746 err = bus_dmamap_load(q->entry_tag, sd->map,
747 cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
748
749 if (err != 0 || cb_arg.error) {
750 if (q->zone != zone_pack)
751 uma_zfree(q->zone, cl);
752 m_free(m);
753 goto done;
754 }
755 #else
756 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
757 #endif
758 sd->flags |= RX_SW_DESC_INUSE;
759 sd->rxsd_cl = cl;
760 sd->m = m;
761 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
762 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
763 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
764 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
765
766 d++;
767 sd++;
768
769 if (++q->pidx == q->size) {
770 q->pidx = 0;
771 q->gen ^= 1;
772 sd = q->sdesc;
773 d = q->desc;
774 }
775 q->credits++;
776 q->db_pending++;
777 }
778
779 done:
780 if (q->db_pending >= 32) {
781 q->db_pending = 0;
782 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
783 }
784 }
785
786
787 /**
788 * free_rx_bufs - free the Rx buffers on an SGE free list
789 * @sc: the controle softc
790 * @q: the SGE free list to clean up
791 *
792 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
793 * this queue should be stopped before calling this function.
794 */
795 static void
796 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
797 {
798 u_int cidx = q->cidx;
799
800 while (q->credits--) {
801 struct rx_sw_desc *d = &q->sdesc[cidx];
802
803 if (d->flags & RX_SW_DESC_INUSE) {
804 bus_dmamap_unload(q->entry_tag, d->map);
805 bus_dmamap_destroy(q->entry_tag, d->map);
806 if (q->zone == zone_pack) {
807 m_init(d->m, M_NOWAIT, MT_DATA, M_EXT);
808 uma_zfree(zone_pack, d->m);
809 } else {
810 m_init(d->m, M_NOWAIT, MT_DATA, 0);
811 uma_zfree(zone_mbuf, d->m);
812 uma_zfree(q->zone, d->rxsd_cl);
813 }
814 }
815
816 d->rxsd_cl = NULL;
817 d->m = NULL;
818 if (++cidx == q->size)
819 cidx = 0;
820 }
821 }
822
823 static __inline void
824 __refill_fl(adapter_t *adap, struct sge_fl *fl)
825 {
826 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
827 }
828
829 static __inline void
830 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
831 {
832 uint32_t reclaimable = fl->size - fl->credits;
833
834 if (reclaimable > 0)
835 refill_fl(adap, fl, min(max, reclaimable));
836 }
837
838 /**
839 * recycle_rx_buf - recycle a receive buffer
840 * @adapter: the adapter
841 * @q: the SGE free list
842 * @idx: index of buffer to recycle
843 *
844 * Recycles the specified buffer on the given free list by adding it at
845 * the next available slot on the list.
846 */
847 static void
848 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
849 {
850 struct rx_desc *from = &q->desc[idx];
851 struct rx_desc *to = &q->desc[q->pidx];
852
853 q->sdesc[q->pidx] = q->sdesc[idx];
854 to->addr_lo = from->addr_lo; // already big endian
855 to->addr_hi = from->addr_hi; // likewise
856 wmb(); /* necessary ? */
857 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
858 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
859 q->credits++;
860
861 if (++q->pidx == q->size) {
862 q->pidx = 0;
863 q->gen ^= 1;
864 }
865 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
866 }
867
868 static void
869 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
870 {
871 uint32_t *addr;
872
873 addr = arg;
874 *addr = segs[0].ds_addr;
875 }
876
877 static int
878 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
879 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
880 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
881 {
882 size_t len = nelem * elem_size;
883 void *s = NULL;
884 void *p = NULL;
885 int err;
886
887 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
888 BUS_SPACE_MAXADDR_32BIT,
889 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
890 len, 0, NULL, NULL, tag)) != 0) {
891 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
892 return (ENOMEM);
893 }
894
895 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
896 map)) != 0) {
897 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
898 return (ENOMEM);
899 }
900
901 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
902 bzero(p, len);
903 *(void **)desc = p;
904
905 if (sw_size) {
906 len = nelem * sw_size;
907 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
908 *(void **)sdesc = s;
909 }
910 if (parent_entry_tag == NULL)
911 return (0);
912
913 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
914 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
915 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
916 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
917 NULL, NULL, entry_tag)) != 0) {
918 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
919 return (ENOMEM);
920 }
921 return (0);
922 }
923
924 static void
925 sge_slow_intr_handler(void *arg, int ncount)
926 {
927 adapter_t *sc = arg;
928
929 t3_slow_intr_handler(sc);
930 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
931 (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
932 }
933
934 /**
935 * sge_timer_cb - perform periodic maintenance of an SGE qset
936 * @data: the SGE queue set to maintain
937 *
938 * Runs periodically from a timer to perform maintenance of an SGE queue
939 * set. It performs two tasks:
940 *
941 * a) Cleans up any completed Tx descriptors that may still be pending.
942 * Normal descriptor cleanup happens when new packets are added to a Tx
943 * queue so this timer is relatively infrequent and does any cleanup only
944 * if the Tx queue has not seen any new packets in a while. We make a
945 * best effort attempt to reclaim descriptors, in that we don't wait
946 * around if we cannot get a queue's lock (which most likely is because
947 * someone else is queueing new packets and so will also handle the clean
948 * up). Since control queues use immediate data exclusively we don't
949 * bother cleaning them up here.
950 *
951 * b) Replenishes Rx queues that have run out due to memory shortage.
952 * Normally new Rx buffers are added when existing ones are consumed but
953 * when out of memory a queue can become empty. We try to add only a few
954 * buffers here, the queue will be replenished fully as these new buffers
955 * are used up if memory shortage has subsided.
956 *
957 * c) Return coalesced response queue credits in case a response queue is
958 * starved.
959 *
960 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell
961 * fifo overflows and the FW doesn't implement any recovery scheme yet.
962 */
963 static void
964 sge_timer_cb(void *arg)
965 {
966 adapter_t *sc = arg;
967 if ((sc->flags & USING_MSIX) == 0) {
968
969 struct port_info *pi;
970 struct sge_qset *qs;
971 struct sge_txq *txq;
972 int i, j;
973 int reclaim_ofl, refill_rx;
974
975 if (sc->open_device_map == 0)
976 return;
977
978 for (i = 0; i < sc->params.nports; i++) {
979 pi = &sc->port[i];
980 for (j = 0; j < pi->nqsets; j++) {
981 qs = &sc->sge.qs[pi->first_qset + j];
982 txq = &qs->txq[0];
983 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
984 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
985 (qs->fl[1].credits < qs->fl[1].size));
986 if (reclaim_ofl || refill_rx) {
987 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
988 break;
989 }
990 }
991 }
992 }
993
994 if (sc->params.nports > 2) {
995 int i;
996
997 for_each_port(sc, i) {
998 struct port_info *pi = &sc->port[i];
999
1000 t3_write_reg(sc, A_SG_KDOORBELL,
1001 F_SELEGRCNTX |
1002 (FW_TUNNEL_SGEEC_START + pi->first_qset));
1003 }
1004 }
1005 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
1006 sc->open_device_map != 0)
1007 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1008 }
1009
1010 /*
1011 * This is meant to be a catch-all function to keep sge state private
1012 * to sge.c
1013 *
1014 */
1015 int
1016 t3_sge_init_adapter(adapter_t *sc)
1017 {
1018 callout_init(&sc->sge_timer_ch, 1);
1019 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1020 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
1021 return (0);
1022 }
1023
1024 int
1025 t3_sge_reset_adapter(adapter_t *sc)
1026 {
1027 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1028 return (0);
1029 }
1030
1031 int
1032 t3_sge_init_port(struct port_info *pi)
1033 {
1034 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
1035 return (0);
1036 }
1037
1038 /**
1039 * refill_rspq - replenish an SGE response queue
1040 * @adapter: the adapter
1041 * @q: the response queue to replenish
1042 * @credits: how many new responses to make available
1043 *
1044 * Replenishes a response queue by making the supplied number of responses
1045 * available to HW.
1046 */
1047 static __inline void
1048 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
1049 {
1050
1051 /* mbufs are allocated on demand when a rspq entry is processed. */
1052 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
1053 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
1054 }
1055
1056 static void
1057 sge_txq_reclaim_handler(void *arg, int ncount)
1058 {
1059 struct sge_qset *qs = arg;
1060 int i;
1061
1062 for (i = 0; i < 3; i++)
1063 reclaim_completed_tx(qs, 16, i);
1064 }
1065
1066 static void
1067 sge_timer_reclaim(void *arg, int ncount)
1068 {
1069 struct port_info *pi = arg;
1070 int i, nqsets = pi->nqsets;
1071 adapter_t *sc = pi->adapter;
1072 struct sge_qset *qs;
1073 struct mtx *lock;
1074
1075 KASSERT((sc->flags & USING_MSIX) == 0,
1076 ("can't call timer reclaim for msi-x"));
1077
1078 for (i = 0; i < nqsets; i++) {
1079 qs = &sc->sge.qs[pi->first_qset + i];
1080
1081 reclaim_completed_tx(qs, 16, TXQ_OFLD);
1082 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
1083 &sc->sge.qs[0].rspq.lock;
1084
1085 if (mtx_trylock(lock)) {
1086 /* XXX currently assume that we are *NOT* polling */
1087 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
1088
1089 if (qs->fl[0].credits < qs->fl[0].size - 16)
1090 __refill_fl(sc, &qs->fl[0]);
1091 if (qs->fl[1].credits < qs->fl[1].size - 16)
1092 __refill_fl(sc, &qs->fl[1]);
1093
1094 if (status & (1 << qs->rspq.cntxt_id)) {
1095 if (qs->rspq.credits) {
1096 refill_rspq(sc, &qs->rspq, 1);
1097 qs->rspq.credits--;
1098 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
1099 1 << qs->rspq.cntxt_id);
1100 }
1101 }
1102 mtx_unlock(lock);
1103 }
1104 }
1105 }
1106
1107 /**
1108 * init_qset_cntxt - initialize an SGE queue set context info
1109 * @qs: the queue set
1110 * @id: the queue set id
1111 *
1112 * Initializes the TIDs and context ids for the queues of a queue set.
1113 */
1114 static void
1115 init_qset_cntxt(struct sge_qset *qs, u_int id)
1116 {
1117
1118 qs->rspq.cntxt_id = id;
1119 qs->fl[0].cntxt_id = 2 * id;
1120 qs->fl[1].cntxt_id = 2 * id + 1;
1121 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
1122 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
1123 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
1124 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
1125 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
1126
1127 /* XXX: a sane limit is needed instead of INT_MAX */
1128 mbufq_init(&qs->txq[TXQ_ETH].sendq, INT_MAX);
1129 mbufq_init(&qs->txq[TXQ_OFLD].sendq, INT_MAX);
1130 mbufq_init(&qs->txq[TXQ_CTRL].sendq, INT_MAX);
1131 }
1132
1133
1134 static void
1135 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
1136 {
1137 txq->in_use += ndesc;
1138 /*
1139 * XXX we don't handle stopping of queue
1140 * presumably start handles this when we bump against the end
1141 */
1142 txqs->gen = txq->gen;
1143 txq->unacked += ndesc;
1144 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
1145 txq->unacked &= 31;
1146 txqs->pidx = txq->pidx;
1147 txq->pidx += ndesc;
1148 #ifdef INVARIANTS
1149 if (((txqs->pidx > txq->cidx) &&
1150 (txq->pidx < txqs->pidx) &&
1151 (txq->pidx >= txq->cidx)) ||
1152 ((txqs->pidx < txq->cidx) &&
1153 (txq->pidx >= txq-> cidx)) ||
1154 ((txqs->pidx < txq->cidx) &&
1155 (txq->cidx < txqs->pidx)))
1156 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
1157 txqs->pidx, txq->pidx, txq->cidx);
1158 #endif
1159 if (txq->pidx >= txq->size) {
1160 txq->pidx -= txq->size;
1161 txq->gen ^= 1;
1162 }
1163
1164 }
1165
1166 /**
1167 * calc_tx_descs - calculate the number of Tx descriptors for a packet
1168 * @m: the packet mbufs
1169 * @nsegs: the number of segments
1170 *
1171 * Returns the number of Tx descriptors needed for the given Ethernet
1172 * packet. Ethernet packets require addition of WR and CPL headers.
1173 */
1174 static __inline unsigned int
1175 calc_tx_descs(const struct mbuf *m, int nsegs)
1176 {
1177 unsigned int flits;
1178
1179 if (m->m_pkthdr.len <= PIO_LEN)
1180 return 1;
1181
1182 flits = sgl_len(nsegs) + 2;
1183 if (m->m_pkthdr.csum_flags & CSUM_TSO)
1184 flits++;
1185
1186 return flits_to_desc(flits);
1187 }
1188
1189 /**
1190 * make_sgl - populate a scatter/gather list for a packet
1191 * @sgp: the SGL to populate
1192 * @segs: the packet dma segments
1193 * @nsegs: the number of segments
1194 *
1195 * Generates a scatter/gather list for the buffers that make up a packet
1196 * and returns the SGL size in 8-byte words. The caller must size the SGL
1197 * appropriately.
1198 */
1199 static __inline void
1200 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1201 {
1202 int i, idx;
1203
1204 for (idx = 0, i = 0; i < nsegs; i++) {
1205 /*
1206 * firmware doesn't like empty segments
1207 */
1208 if (segs[i].ds_len == 0)
1209 continue;
1210 if (i && idx == 0)
1211 ++sgp;
1212
1213 sgp->len[idx] = htobe32(segs[i].ds_len);
1214 sgp->addr[idx] = htobe64(segs[i].ds_addr);
1215 idx ^= 1;
1216 }
1217
1218 if (idx) {
1219 sgp->len[idx] = 0;
1220 sgp->addr[idx] = 0;
1221 }
1222 }
1223
1224 /**
1225 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1226 * @adap: the adapter
1227 * @q: the Tx queue
1228 *
1229 * Ring the doorbell if a Tx queue is asleep. There is a natural race,
1230 * where the HW is going to sleep just after we checked, however,
1231 * then the interrupt handler will detect the outstanding TX packet
1232 * and ring the doorbell for us.
1233 *
1234 * When GTS is disabled we unconditionally ring the doorbell.
1235 */
1236 static __inline void
1237 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
1238 {
1239 #if USE_GTS
1240 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1241 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1242 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1243 #ifdef T3_TRACE
1244 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1245 q->cntxt_id);
1246 #endif
1247 t3_write_reg(adap, A_SG_KDOORBELL,
1248 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1249 }
1250 #else
1251 if (mustring || ++q->db_pending >= 32) {
1252 wmb(); /* write descriptors before telling HW */
1253 t3_write_reg(adap, A_SG_KDOORBELL,
1254 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1255 q->db_pending = 0;
1256 }
1257 #endif
1258 }
1259
1260 static __inline void
1261 wr_gen2(struct tx_desc *d, unsigned int gen)
1262 {
1263 #if SGE_NUM_GENBITS == 2
1264 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1265 #endif
1266 }
1267
1268 /**
1269 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1270 * @ndesc: number of Tx descriptors spanned by the SGL
1271 * @txd: first Tx descriptor to be written
1272 * @txqs: txq state (generation and producer index)
1273 * @txq: the SGE Tx queue
1274 * @sgl: the SGL
1275 * @flits: number of flits to the start of the SGL in the first descriptor
1276 * @sgl_flits: the SGL size in flits
1277 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1278 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1279 *
1280 * Write a work request header and an associated SGL. If the SGL is
1281 * small enough to fit into one Tx descriptor it has already been written
1282 * and we just need to write the WR header. Otherwise we distribute the
1283 * SGL across the number of descriptors it spans.
1284 */
1285 static void
1286 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1287 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1288 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1289 {
1290
1291 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1292 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1293
1294 if (__predict_true(ndesc == 1)) {
1295 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1296 V_WR_SGLSFLT(flits)) | wr_hi,
1297 htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) |
1298 wr_lo);
1299
1300 wr_gen2(txd, txqs->gen);
1301
1302 } else {
1303 unsigned int ogen = txqs->gen;
1304 const uint64_t *fp = (const uint64_t *)sgl;
1305 struct work_request_hdr *wp = wrp;
1306
1307 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1308 V_WR_SGLSFLT(flits)) | wr_hi;
1309
1310 while (sgl_flits) {
1311 unsigned int avail = WR_FLITS - flits;
1312
1313 if (avail > sgl_flits)
1314 avail = sgl_flits;
1315 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1316 sgl_flits -= avail;
1317 ndesc--;
1318 if (!sgl_flits)
1319 break;
1320
1321 fp += avail;
1322 txd++;
1323 txsd++;
1324 if (++txqs->pidx == txq->size) {
1325 txqs->pidx = 0;
1326 txqs->gen ^= 1;
1327 txd = txq->desc;
1328 txsd = txq->sdesc;
1329 }
1330
1331 /*
1332 * when the head of the mbuf chain
1333 * is freed all clusters will be freed
1334 * with it
1335 */
1336 wrp = (struct work_request_hdr *)txd;
1337 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
1338 V_WR_SGLSFLT(1)) | wr_hi;
1339 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
1340 sgl_flits + 1)) |
1341 V_WR_GEN(txqs->gen)) | wr_lo;
1342 wr_gen2(txd, txqs->gen);
1343 flits = 1;
1344 }
1345 wrp->wrh_hi |= htonl(F_WR_EOP);
1346 wmb();
1347 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1348 wr_gen2((struct tx_desc *)wp, ogen);
1349 }
1350 }
1351
1352 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
1353 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
1354
1355 #define GET_VTAG(cntrl, m) \
1356 do { \
1357 if ((m)->m_flags & M_VLANTAG) \
1358 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1359 } while (0)
1360
1361 static int
1362 t3_encap(struct sge_qset *qs, struct mbuf **m)
1363 {
1364 adapter_t *sc;
1365 struct mbuf *m0;
1366 struct sge_txq *txq;
1367 struct txq_state txqs;
1368 struct port_info *pi;
1369 unsigned int ndesc, flits, cntrl, mlen;
1370 int err, nsegs, tso_info = 0;
1371
1372 struct work_request_hdr *wrp;
1373 struct tx_sw_desc *txsd;
1374 struct sg_ent *sgp, *sgl;
1375 uint32_t wr_hi, wr_lo, sgl_flits;
1376 bus_dma_segment_t segs[TX_MAX_SEGS];
1377
1378 struct tx_desc *txd;
1379
1380 pi = qs->port;
1381 sc = pi->adapter;
1382 txq = &qs->txq[TXQ_ETH];
1383 txd = &txq->desc[txq->pidx];
1384 txsd = &txq->sdesc[txq->pidx];
1385 sgl = txq->txq_sgl;
1386
1387 prefetch(txd);
1388 m0 = *m;
1389
1390 mtx_assert(&qs->lock, MA_OWNED);
1391 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1392 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
1393
1394 if (m0->m_nextpkt == NULL && m0->m_next != NULL &&
1395 m0->m_pkthdr.csum_flags & (CSUM_TSO))
1396 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1397
1398 if (m0->m_nextpkt != NULL) {
1399 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
1400 ndesc = 1;
1401 mlen = 0;
1402 } else {
1403 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
1404 &m0, segs, &nsegs))) {
1405 if (cxgb_debug)
1406 printf("failed ... err=%d\n", err);
1407 return (err);
1408 }
1409 mlen = m0->m_pkthdr.len;
1410 ndesc = calc_tx_descs(m0, nsegs);
1411 }
1412 txq_prod(txq, ndesc, &txqs);
1413
1414 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
1415 txsd->m = m0;
1416
1417 if (m0->m_nextpkt != NULL) {
1418 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1419 int i, fidx;
1420
1421 if (nsegs > 7)
1422 panic("trying to coalesce %d packets in to one WR", nsegs);
1423 txq->txq_coalesced += nsegs;
1424 wrp = (struct work_request_hdr *)txd;
1425 flits = nsegs*2 + 1;
1426
1427 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
1428 struct cpl_tx_pkt_batch_entry *cbe;
1429 uint64_t flit;
1430 uint32_t *hflit = (uint32_t *)&flit;
1431 int cflags = m0->m_pkthdr.csum_flags;
1432
1433 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1434 GET_VTAG(cntrl, m0);
1435 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1436 if (__predict_false(!(cflags & CSUM_IP)))
1437 cntrl |= F_TXPKT_IPCSUM_DIS;
1438 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP |
1439 CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
1440 cntrl |= F_TXPKT_L4CSUM_DIS;
1441
1442 hflit[0] = htonl(cntrl);
1443 hflit[1] = htonl(segs[i].ds_len | 0x80000000);
1444 flit |= htobe64(1 << 24);
1445 cbe = &cpl_batch->pkt_entry[i];
1446 cbe->cntrl = hflit[0];
1447 cbe->len = hflit[1];
1448 cbe->addr = htobe64(segs[i].ds_addr);
1449 }
1450
1451 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1452 V_WR_SGLSFLT(flits)) |
1453 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1454 wr_lo = htonl(V_WR_LEN(flits) |
1455 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1456 set_wr_hdr(wrp, wr_hi, wr_lo);
1457 wmb();
1458 ETHER_BPF_MTAP(pi->ifp, m0);
1459 wr_gen2(txd, txqs.gen);
1460 check_ring_tx_db(sc, txq, 0);
1461 return (0);
1462 } else if (tso_info) {
1463 uint16_t eth_type;
1464 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1465 struct ether_header *eh;
1466 void *l3hdr;
1467 struct tcphdr *tcp;
1468
1469 txd->flit[2] = 0;
1470 GET_VTAG(cntrl, m0);
1471 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1472 hdr->cntrl = htonl(cntrl);
1473 hdr->len = htonl(mlen | 0x80000000);
1474
1475 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
1476 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x",
1477 m0, mlen, m0->m_pkthdr.tso_segsz,
1478 (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags);
1479 panic("tx tso packet too small");
1480 }
1481
1482 /* Make sure that ether, ip, tcp headers are all in m0 */
1483 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1484 m0 = m_pullup(m0, TCPPKTHDRSIZE);
1485 if (__predict_false(m0 == NULL)) {
1486 /* XXX panic probably an overreaction */
1487 panic("couldn't fit header into mbuf");
1488 }
1489 }
1490
1491 eh = mtod(m0, struct ether_header *);
1492 eth_type = eh->ether_type;
1493 if (eth_type == htons(ETHERTYPE_VLAN)) {
1494 struct ether_vlan_header *evh = (void *)eh;
1495
1496 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN);
1497 l3hdr = evh + 1;
1498 eth_type = evh->evl_proto;
1499 } else {
1500 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II);
1501 l3hdr = eh + 1;
1502 }
1503
1504 if (eth_type == htons(ETHERTYPE_IP)) {
1505 struct ip *ip = l3hdr;
1506
1507 tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl);
1508 tcp = (struct tcphdr *)(ip + 1);
1509 } else if (eth_type == htons(ETHERTYPE_IPV6)) {
1510 struct ip6_hdr *ip6 = l3hdr;
1511
1512 KASSERT(ip6->ip6_nxt == IPPROTO_TCP,
1513 ("%s: CSUM_TSO with ip6_nxt %d",
1514 __func__, ip6->ip6_nxt));
1515
1516 tso_info |= F_LSO_IPV6;
1517 tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2);
1518 tcp = (struct tcphdr *)(ip6 + 1);
1519 } else
1520 panic("%s: CSUM_TSO but neither ip nor ip6", __func__);
1521
1522 tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off);
1523 hdr->lso_info = htonl(tso_info);
1524
1525 if (__predict_false(mlen <= PIO_LEN)) {
1526 /*
1527 * pkt not undersized but fits in PIO_LEN
1528 * Indicates a TSO bug at the higher levels.
1529 */
1530 txsd->m = NULL;
1531 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1532 flits = (mlen + 7) / 8 + 3;
1533 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1534 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1535 F_WR_SOP | F_WR_EOP | txqs.compl);
1536 wr_lo = htonl(V_WR_LEN(flits) |
1537 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1538 set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
1539 wmb();
1540 ETHER_BPF_MTAP(pi->ifp, m0);
1541 wr_gen2(txd, txqs.gen);
1542 check_ring_tx_db(sc, txq, 0);
1543 m_freem(m0);
1544 return (0);
1545 }
1546 flits = 3;
1547 } else {
1548 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1549
1550 GET_VTAG(cntrl, m0);
1551 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1552 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1553 cntrl |= F_TXPKT_IPCSUM_DIS;
1554 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP |
1555 CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
1556 cntrl |= F_TXPKT_L4CSUM_DIS;
1557 cpl->cntrl = htonl(cntrl);
1558 cpl->len = htonl(mlen | 0x80000000);
1559
1560 if (mlen <= PIO_LEN) {
1561 txsd->m = NULL;
1562 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1563 flits = (mlen + 7) / 8 + 2;
1564
1565 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1566 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1567 F_WR_SOP | F_WR_EOP | txqs.compl);
1568 wr_lo = htonl(V_WR_LEN(flits) |
1569 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1570 set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
1571 wmb();
1572 ETHER_BPF_MTAP(pi->ifp, m0);
1573 wr_gen2(txd, txqs.gen);
1574 check_ring_tx_db(sc, txq, 0);
1575 m_freem(m0);
1576 return (0);
1577 }
1578 flits = 2;
1579 }
1580 wrp = (struct work_request_hdr *)txd;
1581 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1582 make_sgl(sgp, segs, nsegs);
1583
1584 sgl_flits = sgl_len(nsegs);
1585
1586 ETHER_BPF_MTAP(pi->ifp, m0);
1587
1588 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
1589 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1590 wr_lo = htonl(V_WR_TID(txq->token));
1591 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
1592 sgl_flits, wr_hi, wr_lo);
1593 check_ring_tx_db(sc, txq, 0);
1594
1595 return (0);
1596 }
1597
1598 #ifdef NETDUMP
1599 int
1600 cxgb_netdump_encap(struct sge_qset *qs, struct mbuf **m)
1601 {
1602 int error;
1603
1604 error = t3_encap(qs, m);
1605 if (error == 0)
1606 check_ring_tx_db(qs->port->adapter, &qs->txq[TXQ_ETH], 1);
1607 else if (*m != NULL) {
1608 m_freem(*m);
1609 *m = NULL;
1610 }
1611 return (error);
1612 }
1613 #endif
1614
1615 void
1616 cxgb_tx_watchdog(void *arg)
1617 {
1618 struct sge_qset *qs = arg;
1619 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1620
1621 if (qs->coalescing != 0 &&
1622 (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
1623 TXQ_RING_EMPTY(qs))
1624 qs->coalescing = 0;
1625 else if (qs->coalescing == 0 &&
1626 (txq->in_use >= cxgb_tx_coalesce_enable_start))
1627 qs->coalescing = 1;
1628 if (TXQ_TRYLOCK(qs)) {
1629 qs->qs_flags |= QS_FLUSHING;
1630 cxgb_start_locked(qs);
1631 qs->qs_flags &= ~QS_FLUSHING;
1632 TXQ_UNLOCK(qs);
1633 }
1634 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
1635 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
1636 qs, txq->txq_watchdog.c_cpu);
1637 }
1638
1639 static void
1640 cxgb_tx_timeout(void *arg)
1641 {
1642 struct sge_qset *qs = arg;
1643 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1644
1645 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
1646 qs->coalescing = 1;
1647 if (TXQ_TRYLOCK(qs)) {
1648 qs->qs_flags |= QS_TIMEOUT;
1649 cxgb_start_locked(qs);
1650 qs->qs_flags &= ~QS_TIMEOUT;
1651 TXQ_UNLOCK(qs);
1652 }
1653 }
1654
1655 static void
1656 cxgb_start_locked(struct sge_qset *qs)
1657 {
1658 struct mbuf *m_head = NULL;
1659 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1660 struct port_info *pi = qs->port;
1661 struct ifnet *ifp = pi->ifp;
1662
1663 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
1664 reclaim_completed_tx(qs, 0, TXQ_ETH);
1665
1666 if (!pi->link_config.link_ok) {
1667 TXQ_RING_FLUSH(qs);
1668 return;
1669 }
1670 TXQ_LOCK_ASSERT(qs);
1671 while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
1672 pi->link_config.link_ok) {
1673 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1674
1675 if (txq->size - txq->in_use <= TX_MAX_DESC)
1676 break;
1677
1678 if ((m_head = cxgb_dequeue(qs)) == NULL)
1679 break;
1680 /*
1681 * Encapsulation can modify our pointer, and or make it
1682 * NULL on failure. In that event, we can't requeue.
1683 */
1684 if (t3_encap(qs, &m_head) || m_head == NULL)
1685 break;
1686
1687 m_head = NULL;
1688 }
1689
1690 if (txq->db_pending)
1691 check_ring_tx_db(pi->adapter, txq, 1);
1692
1693 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
1694 pi->link_config.link_ok)
1695 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1696 qs, txq->txq_timer.c_cpu);
1697 if (m_head != NULL)
1698 m_freem(m_head);
1699 }
1700
1701 static int
1702 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
1703 {
1704 struct port_info *pi = qs->port;
1705 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1706 struct buf_ring *br = txq->txq_mr;
1707 int error, avail;
1708
1709 avail = txq->size - txq->in_use;
1710 TXQ_LOCK_ASSERT(qs);
1711
1712 /*
1713 * We can only do a direct transmit if the following are true:
1714 * - we aren't coalescing (ring < 3/4 full)
1715 * - the link is up -- checked in caller
1716 * - there are no packets enqueued already
1717 * - there is space in hardware transmit queue
1718 */
1719 if (check_pkt_coalesce(qs) == 0 &&
1720 !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
1721 if (t3_encap(qs, &m)) {
1722 if (m != NULL &&
1723 (error = drbr_enqueue(ifp, br, m)) != 0)
1724 return (error);
1725 } else {
1726 if (txq->db_pending)
1727 check_ring_tx_db(pi->adapter, txq, 1);
1728
1729 /*
1730 * We've bypassed the buf ring so we need to update
1731 * the stats directly
1732 */
1733 txq->txq_direct_packets++;
1734 txq->txq_direct_bytes += m->m_pkthdr.len;
1735 }
1736 } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
1737 return (error);
1738
1739 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1740 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
1741 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
1742 cxgb_start_locked(qs);
1743 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
1744 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1745 qs, txq->txq_timer.c_cpu);
1746 return (0);
1747 }
1748
1749 int
1750 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
1751 {
1752 struct sge_qset *qs;
1753 struct port_info *pi = ifp->if_softc;
1754 int error, qidx = pi->first_qset;
1755
1756 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
1757 ||(!pi->link_config.link_ok)) {
1758 m_freem(m);
1759 return (0);
1760 }
1761
1762 /* check if flowid is set */
1763 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1764 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
1765
1766 qs = &pi->adapter->sge.qs[qidx];
1767
1768 if (TXQ_TRYLOCK(qs)) {
1769 /* XXX running */
1770 error = cxgb_transmit_locked(ifp, qs, m);
1771 TXQ_UNLOCK(qs);
1772 } else
1773 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
1774 return (error);
1775 }
1776
1777 void
1778 cxgb_qflush(struct ifnet *ifp)
1779 {
1780 /*
1781 * flush any enqueued mbufs in the buf_rings
1782 * and in the transmit queues
1783 * no-op for now
1784 */
1785 return;
1786 }
1787
1788 /**
1789 * write_imm - write a packet into a Tx descriptor as immediate data
1790 * @d: the Tx descriptor to write
1791 * @m: the packet
1792 * @len: the length of packet data to write as immediate data
1793 * @gen: the generation bit value to write
1794 *
1795 * Writes a packet as immediate data into a Tx descriptor. The packet
1796 * contains a work request at its beginning. We must write the packet
1797 * carefully so the SGE doesn't read accidentally before it's written in
1798 * its entirety.
1799 */
1800 static __inline void
1801 write_imm(struct tx_desc *d, caddr_t src,
1802 unsigned int len, unsigned int gen)
1803 {
1804 struct work_request_hdr *from = (struct work_request_hdr *)src;
1805 struct work_request_hdr *to = (struct work_request_hdr *)d;
1806 uint32_t wr_hi, wr_lo;
1807
1808 KASSERT(len <= WR_LEN && len >= sizeof(*from),
1809 ("%s: invalid len %d", __func__, len));
1810
1811 memcpy(&to[1], &from[1], len - sizeof(*from));
1812 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
1813 V_WR_BCNTLFLT(len & 7));
1814 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8));
1815 set_wr_hdr(to, wr_hi, wr_lo);
1816 wmb();
1817 wr_gen2(d, gen);
1818 }
1819
1820 /**
1821 * check_desc_avail - check descriptor availability on a send queue
1822 * @adap: the adapter
1823 * @q: the TX queue
1824 * @m: the packet needing the descriptors
1825 * @ndesc: the number of Tx descriptors needed
1826 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1827 *
1828 * Checks if the requested number of Tx descriptors is available on an
1829 * SGE send queue. If the queue is already suspended or not enough
1830 * descriptors are available the packet is queued for later transmission.
1831 * Must be called with the Tx queue locked.
1832 *
1833 * Returns 0 if enough descriptors are available, 1 if there aren't
1834 * enough descriptors and the packet has been queued, and 2 if the caller
1835 * needs to retry because there weren't enough descriptors at the
1836 * beginning of the call but some freed up in the mean time.
1837 */
1838 static __inline int
1839 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1840 struct mbuf *m, unsigned int ndesc,
1841 unsigned int qid)
1842 {
1843 /*
1844 * XXX We currently only use this for checking the control queue
1845 * the control queue is only used for binding qsets which happens
1846 * at init time so we are guaranteed enough descriptors
1847 */
1848 if (__predict_false(mbufq_len(&q->sendq))) {
1849 addq_exit: (void )mbufq_enqueue(&q->sendq, m);
1850 return 1;
1851 }
1852 if (__predict_false(q->size - q->in_use < ndesc)) {
1853
1854 struct sge_qset *qs = txq_to_qset(q, qid);
1855
1856 setbit(&qs->txq_stopped, qid);
1857 if (should_restart_tx(q) &&
1858 test_and_clear_bit(qid, &qs->txq_stopped))
1859 return 2;
1860
1861 q->stops++;
1862 goto addq_exit;
1863 }
1864 return 0;
1865 }
1866
1867
1868 /**
1869 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1870 * @q: the SGE control Tx queue
1871 *
1872 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1873 * that send only immediate data (presently just the control queues) and
1874 * thus do not have any mbufs
1875 */
1876 static __inline void
1877 reclaim_completed_tx_imm(struct sge_txq *q)
1878 {
1879 unsigned int reclaim = q->processed - q->cleaned;
1880
1881 q->in_use -= reclaim;
1882 q->cleaned += reclaim;
1883 }
1884
1885 /**
1886 * ctrl_xmit - send a packet through an SGE control Tx queue
1887 * @adap: the adapter
1888 * @q: the control queue
1889 * @m: the packet
1890 *
1891 * Send a packet through an SGE control Tx queue. Packets sent through
1892 * a control queue must fit entirely as immediate data in a single Tx
1893 * descriptor and have no page fragments.
1894 */
1895 static int
1896 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
1897 {
1898 int ret;
1899 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1900 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1901
1902 KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__));
1903
1904 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
1905 wrp->wrh_lo = htonl(V_WR_TID(q->token));
1906
1907 TXQ_LOCK(qs);
1908 again: reclaim_completed_tx_imm(q);
1909
1910 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1911 if (__predict_false(ret)) {
1912 if (ret == 1) {
1913 TXQ_UNLOCK(qs);
1914 return (ENOSPC);
1915 }
1916 goto again;
1917 }
1918 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
1919
1920 q->in_use++;
1921 if (++q->pidx >= q->size) {
1922 q->pidx = 0;
1923 q->gen ^= 1;
1924 }
1925 TXQ_UNLOCK(qs);
1926 wmb();
1927 t3_write_reg(adap, A_SG_KDOORBELL,
1928 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1929
1930 m_free(m);
1931 return (0);
1932 }
1933
1934
1935 /**
1936 * restart_ctrlq - restart a suspended control queue
1937 * @qs: the queue set cotaining the control queue
1938 *
1939 * Resumes transmission on a suspended Tx control queue.
1940 */
1941 static void
1942 restart_ctrlq(void *data, int npending)
1943 {
1944 struct mbuf *m;
1945 struct sge_qset *qs = (struct sge_qset *)data;
1946 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1947 adapter_t *adap = qs->port->adapter;
1948
1949 TXQ_LOCK(qs);
1950 again: reclaim_completed_tx_imm(q);
1951
1952 while (q->in_use < q->size &&
1953 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1954
1955 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
1956 m_free(m);
1957
1958 if (++q->pidx >= q->size) {
1959 q->pidx = 0;
1960 q->gen ^= 1;
1961 }
1962 q->in_use++;
1963 }
1964 if (mbufq_len(&q->sendq)) {
1965 setbit(&qs->txq_stopped, TXQ_CTRL);
1966
1967 if (should_restart_tx(q) &&
1968 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1969 goto again;
1970 q->stops++;
1971 }
1972 TXQ_UNLOCK(qs);
1973 t3_write_reg(adap, A_SG_KDOORBELL,
1974 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1975 }
1976
1977
1978 /*
1979 * Send a management message through control queue 0
1980 */
1981 int
1982 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1983 {
1984 return ctrl_xmit(adap, &adap->sge.qs[0], m);
1985 }
1986
1987 /**
1988 * free_qset - free the resources of an SGE queue set
1989 * @sc: the controller owning the queue set
1990 * @q: the queue set
1991 *
1992 * Release the HW and SW resources associated with an SGE queue set, such
1993 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
1994 * queue set must be quiesced prior to calling this.
1995 */
1996 static void
1997 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1998 {
1999 int i;
2000
2001 reclaim_completed_tx(q, 0, TXQ_ETH);
2002 if (q->txq[TXQ_ETH].txq_mr != NULL)
2003 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
2004 if (q->txq[TXQ_ETH].txq_ifq != NULL) {
2005 ifq_delete(q->txq[TXQ_ETH].txq_ifq);
2006 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
2007 }
2008
2009 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2010 if (q->fl[i].desc) {
2011 mtx_lock_spin(&sc->sge.reg_lock);
2012 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
2013 mtx_unlock_spin(&sc->sge.reg_lock);
2014 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
2015 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
2016 q->fl[i].desc_map);
2017 bus_dma_tag_destroy(q->fl[i].desc_tag);
2018 bus_dma_tag_destroy(q->fl[i].entry_tag);
2019 }
2020 if (q->fl[i].sdesc) {
2021 free_rx_bufs(sc, &q->fl[i]);
2022 free(q->fl[i].sdesc, M_DEVBUF);
2023 }
2024 }
2025
2026 mtx_unlock(&q->lock);
2027 MTX_DESTROY(&q->lock);
2028 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2029 if (q->txq[i].desc) {
2030 mtx_lock_spin(&sc->sge.reg_lock);
2031 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
2032 mtx_unlock_spin(&sc->sge.reg_lock);
2033 bus_dmamap_unload(q->txq[i].desc_tag,
2034 q->txq[i].desc_map);
2035 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
2036 q->txq[i].desc_map);
2037 bus_dma_tag_destroy(q->txq[i].desc_tag);
2038 bus_dma_tag_destroy(q->txq[i].entry_tag);
2039 }
2040 if (q->txq[i].sdesc) {
2041 free(q->txq[i].sdesc, M_DEVBUF);
2042 }
2043 }
2044
2045 if (q->rspq.desc) {
2046 mtx_lock_spin(&sc->sge.reg_lock);
2047 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
2048 mtx_unlock_spin(&sc->sge.reg_lock);
2049
2050 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
2051 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
2052 q->rspq.desc_map);
2053 bus_dma_tag_destroy(q->rspq.desc_tag);
2054 MTX_DESTROY(&q->rspq.lock);
2055 }
2056
2057 #if defined(INET6) || defined(INET)
2058 tcp_lro_free(&q->lro.ctrl);
2059 #endif
2060
2061 bzero(q, sizeof(*q));
2062 }
2063
2064 /**
2065 * t3_free_sge_resources - free SGE resources
2066 * @sc: the adapter softc
2067 *
2068 * Frees resources used by the SGE queue sets.
2069 */
2070 void
2071 t3_free_sge_resources(adapter_t *sc, int nqsets)
2072 {
2073 int i;
2074
2075 for (i = 0; i < nqsets; ++i) {
2076 TXQ_LOCK(&sc->sge.qs[i]);
2077 t3_free_qset(sc, &sc->sge.qs[i]);
2078 }
2079 }
2080
2081 /**
2082 * t3_sge_start - enable SGE
2083 * @sc: the controller softc
2084 *
2085 * Enables the SGE for DMAs. This is the last step in starting packet
2086 * transfers.
2087 */
2088 void
2089 t3_sge_start(adapter_t *sc)
2090 {
2091 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2092 }
2093
2094 /**
2095 * t3_sge_stop - disable SGE operation
2096 * @sc: the adapter
2097 *
2098 * Disables the DMA engine. This can be called in emeregencies (e.g.,
2099 * from error interrupts) or from normal process context. In the latter
2100 * case it also disables any pending queue restart tasklets. Note that
2101 * if it is called in interrupt context it cannot disable the restart
2102 * tasklets as it cannot wait, however the tasklets will have no effect
2103 * since the doorbells are disabled and the driver will call this again
2104 * later from process context, at which time the tasklets will be stopped
2105 * if they are still running.
2106 */
2107 void
2108 t3_sge_stop(adapter_t *sc)
2109 {
2110 int i, nqsets;
2111
2112 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
2113
2114 if (sc->tq == NULL)
2115 return;
2116
2117 for (nqsets = i = 0; i < (sc)->params.nports; i++)
2118 nqsets += sc->port[i].nqsets;
2119 #ifdef notyet
2120 /*
2121 *
2122 * XXX
2123 */
2124 for (i = 0; i < nqsets; ++i) {
2125 struct sge_qset *qs = &sc->sge.qs[i];
2126
2127 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2128 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2129 }
2130 #endif
2131 }
2132
2133 /**
2134 * t3_free_tx_desc - reclaims Tx descriptors and their buffers
2135 * @adapter: the adapter
2136 * @q: the Tx queue to reclaim descriptors from
2137 * @reclaimable: the number of descriptors to reclaim
2138 * @m_vec_size: maximum number of buffers to reclaim
2139 * @desc_reclaimed: returns the number of descriptors reclaimed
2140 *
2141 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
2142 * Tx buffers. Called with the Tx queue lock held.
2143 *
2144 * Returns number of buffers of reclaimed
2145 */
2146 void
2147 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
2148 {
2149 struct tx_sw_desc *txsd;
2150 unsigned int cidx, mask;
2151 struct sge_txq *q = &qs->txq[queue];
2152
2153 #ifdef T3_TRACE
2154 T3_TRACE2(sc->tb[q->cntxt_id & 7],
2155 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
2156 #endif
2157 cidx = q->cidx;
2158 mask = q->size - 1;
2159 txsd = &q->sdesc[cidx];
2160
2161 mtx_assert(&qs->lock, MA_OWNED);
2162 while (reclaimable--) {
2163 prefetch(q->sdesc[(cidx + 1) & mask].m);
2164 prefetch(q->sdesc[(cidx + 2) & mask].m);
2165
2166 if (txsd->m != NULL) {
2167 if (txsd->flags & TX_SW_DESC_MAPPED) {
2168 bus_dmamap_unload(q->entry_tag, txsd->map);
2169 txsd->flags &= ~TX_SW_DESC_MAPPED;
2170 }
2171 m_freem_list(txsd->m);
2172 txsd->m = NULL;
2173 } else
2174 q->txq_skipped++;
2175
2176 ++txsd;
2177 if (++cidx == q->size) {
2178 cidx = 0;
2179 txsd = q->sdesc;
2180 }
2181 }
2182 q->cidx = cidx;
2183
2184 }
2185
2186 /**
2187 * is_new_response - check if a response is newly written
2188 * @r: the response descriptor
2189 * @q: the response queue
2190 *
2191 * Returns true if a response descriptor contains a yet unprocessed
2192 * response.
2193 */
2194 static __inline int
2195 is_new_response(const struct rsp_desc *r,
2196 const struct sge_rspq *q)
2197 {
2198 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2199 }
2200
2201 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2202 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2203 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2204 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2205 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2206
2207 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2208 #define NOMEM_INTR_DELAY 2500
2209
2210 #ifdef TCP_OFFLOAD
2211 /**
2212 * write_ofld_wr - write an offload work request
2213 * @adap: the adapter
2214 * @m: the packet to send
2215 * @q: the Tx queue
2216 * @pidx: index of the first Tx descriptor to write
2217 * @gen: the generation value to use
2218 * @ndesc: number of descriptors the packet will occupy
2219 *
2220 * Write an offload work request to send the supplied packet. The packet
2221 * data already carry the work request with most fields populated.
2222 */
2223 static void
2224 write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q,
2225 unsigned int pidx, unsigned int gen, unsigned int ndesc)
2226 {
2227 unsigned int sgl_flits, flits;
2228 int i, idx, nsegs, wrlen;
2229 struct work_request_hdr *from;
2230 struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1];
2231 struct tx_desc *d = &q->desc[pidx];
2232 struct txq_state txqs;
2233 struct sglist_seg *segs;
2234 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2235 struct sglist *sgl;
2236
2237 from = (void *)(oh + 1); /* Start of WR within mbuf */
2238 wrlen = m->m_len - sizeof(*oh);
2239
2240 if (!(oh->flags & F_HDR_SGL)) {
2241 write_imm(d, (caddr_t)from, wrlen, gen);
2242
2243 /*
2244 * mbuf with "real" immediate tx data will be enqueue_wr'd by
2245 * t3_push_frames and freed in wr_ack. Others, like those sent
2246 * down by close_conn, t3_send_reset, etc. should be freed here.
2247 */
2248 if (!(oh->flags & F_HDR_DF))
2249 m_free(m);
2250 return;
2251 }
2252
2253 memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from));
2254
2255 sgl = oh->sgl;
2256 flits = wrlen / 8;
2257 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl;
2258
2259 nsegs = sgl->sg_nseg;
2260 segs = sgl->sg_segs;
2261 for (idx = 0, i = 0; i < nsegs; i++) {
2262 KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__));
2263 if (i && idx == 0)
2264 ++sgp;
2265 sgp->len[idx] = htobe32(segs[i].ss_len);
2266 sgp->addr[idx] = htobe64(segs[i].ss_paddr);
2267 idx ^= 1;
2268 }
2269 if (idx) {
2270 sgp->len[idx] = 0;
2271 sgp->addr[idx] = 0;
2272 }
2273
2274 sgl_flits = sgl_len(nsegs);
2275 txqs.gen = gen;
2276 txqs.pidx = pidx;
2277 txqs.compl = 0;
2278
2279 write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits,
2280 from->wrh_hi, from->wrh_lo);
2281 }
2282
2283 /**
2284 * ofld_xmit - send a packet through an offload queue
2285 * @adap: the adapter
2286 * @q: the Tx offload queue
2287 * @m: the packet
2288 *
2289 * Send an offload packet through an SGE offload queue.
2290 */
2291 static int
2292 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
2293 {
2294 int ret;
2295 unsigned int ndesc;
2296 unsigned int pidx, gen;
2297 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2298 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2299
2300 ndesc = G_HDR_NDESC(oh->flags);
2301
2302 TXQ_LOCK(qs);
2303 again: reclaim_completed_tx(qs, 16, TXQ_OFLD);
2304 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2305 if (__predict_false(ret)) {
2306 if (ret == 1) {
2307 TXQ_UNLOCK(qs);
2308 return (EINTR);
2309 }
2310 goto again;
2311 }
2312
2313 gen = q->gen;
2314 q->in_use += ndesc;
2315 pidx = q->pidx;
2316 q->pidx += ndesc;
2317 if (q->pidx >= q->size) {
2318 q->pidx -= q->size;
2319 q->gen ^= 1;
2320 }
2321
2322 write_ofld_wr(adap, m, q, pidx, gen, ndesc);
2323 check_ring_tx_db(adap, q, 1);
2324 TXQ_UNLOCK(qs);
2325
2326 return (0);
2327 }
2328
2329 /**
2330 * restart_offloadq - restart a suspended offload queue
2331 * @qs: the queue set cotaining the offload queue
2332 *
2333 * Resumes transmission on a suspended Tx offload queue.
2334 */
2335 static void
2336 restart_offloadq(void *data, int npending)
2337 {
2338 struct mbuf *m;
2339 struct sge_qset *qs = data;
2340 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2341 adapter_t *adap = qs->port->adapter;
2342 int cleaned;
2343
2344 TXQ_LOCK(qs);
2345 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
2346
2347 while ((m = mbufq_first(&q->sendq)) != NULL) {
2348 unsigned int gen, pidx;
2349 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2350 unsigned int ndesc = G_HDR_NDESC(oh->flags);
2351
2352 if (__predict_false(q->size - q->in_use < ndesc)) {
2353 setbit(&qs->txq_stopped, TXQ_OFLD);
2354 if (should_restart_tx(q) &&
2355 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2356 goto again;
2357 q->stops++;
2358 break;
2359 }
2360
2361 gen = q->gen;
2362 q->in_use += ndesc;
2363 pidx = q->pidx;
2364 q->pidx += ndesc;
2365 if (q->pidx >= q->size) {
2366 q->pidx -= q->size;
2367 q->gen ^= 1;
2368 }
2369
2370 (void)mbufq_dequeue(&q->sendq);
2371 TXQ_UNLOCK(qs);
2372 write_ofld_wr(adap, m, q, pidx, gen, ndesc);
2373 TXQ_LOCK(qs);
2374 }
2375 #if USE_GTS
2376 set_bit(TXQ_RUNNING, &q->flags);
2377 set_bit(TXQ_LAST_PKT_DB, &q->flags);
2378 #endif
2379 TXQ_UNLOCK(qs);
2380 wmb();
2381 t3_write_reg(adap, A_SG_KDOORBELL,
2382 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2383 }
2384
2385 /**
2386 * t3_offload_tx - send an offload packet
2387 * @m: the packet
2388 *
2389 * Sends an offload packet. We use the packet priority to select the
2390 * appropriate Tx queue as follows: bit 0 indicates whether the packet
2391 * should be sent as regular or control, bits 1-3 select the queue set.
2392 */
2393 int
2394 t3_offload_tx(struct adapter *sc, struct mbuf *m)
2395 {
2396 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2397 struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)];
2398
2399 if (oh->flags & F_HDR_CTRL) {
2400 m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */
2401 return (ctrl_xmit(sc, qs, m));
2402 } else
2403 return (ofld_xmit(sc, qs, m));
2404 }
2405 #endif
2406
2407 static void
2408 restart_tx(struct sge_qset *qs)
2409 {
2410 struct adapter *sc = qs->port->adapter;
2411
2412 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2413 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2414 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2415 qs->txq[TXQ_OFLD].restarts++;
2416 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2417 }
2418
2419 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2420 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2421 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2422 qs->txq[TXQ_CTRL].restarts++;
2423 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2424 }
2425 }
2426
2427 /**
2428 * t3_sge_alloc_qset - initialize an SGE queue set
2429 * @sc: the controller softc
2430 * @id: the queue set id
2431 * @nports: how many Ethernet ports will be using this queue set
2432 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2433 * @p: configuration parameters for this queue set
2434 * @ntxq: number of Tx queues for the queue set
2435 * @pi: port info for queue set
2436 *
2437 * Allocate resources and initialize an SGE queue set. A queue set
2438 * comprises a response queue, two Rx free-buffer queues, and up to 3
2439 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2440 * queue, offload queue, and control queue.
2441 */
2442 int
2443 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2444 const struct qset_params *p, int ntxq, struct port_info *pi)
2445 {
2446 struct sge_qset *q = &sc->sge.qs[id];
2447 int i, ret = 0;
2448
2449 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
2450 q->port = pi;
2451 q->adap = sc;
2452
2453 if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
2454 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
2455 device_printf(sc->dev, "failed to allocate mbuf ring\n");
2456 goto err;
2457 }
2458 if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
2459 M_NOWAIT | M_ZERO)) == NULL) {
2460 device_printf(sc->dev, "failed to allocate ifq\n");
2461 goto err;
2462 }
2463 ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);
2464 callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
2465 callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
2466 q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
2467 q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
2468
2469 init_qset_cntxt(q, id);
2470 q->idx = id;
2471 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2472 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2473 &q->fl[0].desc, &q->fl[0].sdesc,
2474 &q->fl[0].desc_tag, &q->fl[0].desc_map,
2475 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2476 printf("error %d from alloc ring fl0\n", ret);
2477 goto err;
2478 }
2479
2480 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2481 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2482 &q->fl[1].desc, &q->fl[1].sdesc,
2483 &q->fl[1].desc_tag, &q->fl[1].desc_map,
2484 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2485 printf("error %d from alloc ring fl1\n", ret);
2486 goto err;
2487 }
2488
2489 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2490 &q->rspq.phys_addr, &q->rspq.desc, NULL,
2491 &q->rspq.desc_tag, &q->rspq.desc_map,
2492 NULL, NULL)) != 0) {
2493 printf("error %d from alloc ring rspq\n", ret);
2494 goto err;
2495 }
2496
2497 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2498 device_get_unit(sc->dev), irq_vec_idx);
2499 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2500
2501 for (i = 0; i < ntxq; ++i) {
2502 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2503
2504 if ((ret = alloc_ring(sc, p->txq_size[i],
2505 sizeof(struct tx_desc), sz,
2506 &q->txq[i].phys_addr, &q->txq[i].desc,
2507 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2508 &q->txq[i].desc_map,
2509 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2510 printf("error %d from alloc ring tx %i\n", ret, i);
2511 goto err;
2512 }
2513 mbufq_init(&q->txq[i].sendq, INT_MAX);
2514 q->txq[i].gen = 1;
2515 q->txq[i].size = p->txq_size[i];
2516 }
2517
2518 #ifdef TCP_OFFLOAD
2519 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2520 #endif
2521 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2522 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2523 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2524
2525 q->fl[0].gen = q->fl[1].gen = 1;
2526 q->fl[0].size = p->fl_size;
2527 q->fl[1].size = p->jumbo_size;
2528
2529 q->rspq.gen = 1;
2530 q->rspq.cidx = 0;
2531 q->rspq.size = p->rspq_size;
2532
2533 q->txq[TXQ_ETH].stop_thres = nports *
2534 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2535
2536 q->fl[0].buf_size = MCLBYTES;
2537 q->fl[0].zone = zone_pack;
2538 q->fl[0].type = EXT_PACKET;
2539
2540 if (p->jumbo_buf_size == MJUM16BYTES) {
2541 q->fl[1].zone = zone_jumbo16;
2542 q->fl[1].type = EXT_JUMBO16;
2543 } else if (p->jumbo_buf_size == MJUM9BYTES) {
2544 q->fl[1].zone = zone_jumbo9;
2545 q->fl[1].type = EXT_JUMBO9;
2546 } else if (p->jumbo_buf_size == MJUMPAGESIZE) {
2547 q->fl[1].zone = zone_jumbop;
2548 q->fl[1].type = EXT_JUMBOP;
2549 } else {
2550 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
2551 ret = EDOOFUS;
2552 goto err;
2553 }
2554 q->fl[1].buf_size = p->jumbo_buf_size;
2555
2556 /* Allocate and setup the lro_ctrl structure */
2557 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
2558 #if defined(INET6) || defined(INET)
2559 ret = tcp_lro_init(&q->lro.ctrl);
2560 if (ret) {
2561 printf("error %d from tcp_lro_init\n", ret);
2562 goto err;
2563 }
2564 #endif
2565 q->lro.ctrl.ifp = pi->ifp;
2566
2567 mtx_lock_spin(&sc->sge.reg_lock);
2568 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2569 q->rspq.phys_addr, q->rspq.size,
2570 q->fl[0].buf_size, 1, 0);
2571 if (ret) {
2572 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2573 goto err_unlock;
2574 }
2575
2576 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2577 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2578 q->fl[i].phys_addr, q->fl[i].size,
2579 q->fl[i].buf_size, p->cong_thres, 1,
2580 0);
2581 if (ret) {
2582 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2583 goto err_unlock;
2584 }
2585 }
2586
2587 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2588 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2589 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2590 1, 0);
2591 if (ret) {
2592 printf("error %d from t3_sge_init_ecntxt\n", ret);
2593 goto err_unlock;
2594 }
2595
2596 if (ntxq > 1) {
2597 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2598 USE_GTS, SGE_CNTXT_OFLD, id,
2599 q->txq[TXQ_OFLD].phys_addr,
2600 q->txq[TXQ_OFLD].size, 0, 1, 0);
2601 if (ret) {
2602 printf("error %d from t3_sge_init_ecntxt\n", ret);
2603 goto err_unlock;
2604 }
2605 }
2606
2607 if (ntxq > 2) {
2608 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2609 SGE_CNTXT_CTRL, id,
2610 q->txq[TXQ_CTRL].phys_addr,
2611 q->txq[TXQ_CTRL].size,
2612 q->txq[TXQ_CTRL].token, 1, 0);
2613 if (ret) {
2614 printf("error %d from t3_sge_init_ecntxt\n", ret);
2615 goto err_unlock;
2616 }
2617 }
2618
2619 mtx_unlock_spin(&sc->sge.reg_lock);
2620 t3_update_qset_coalesce(q, p);
2621
2622 refill_fl(sc, &q->fl[0], q->fl[0].size);
2623 refill_fl(sc, &q->fl[1], q->fl[1].size);
2624 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2625
2626 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2627 V_NEWTIMER(q->rspq.holdoff_tmr));
2628
2629 return (0);
2630
2631 err_unlock:
2632 mtx_unlock_spin(&sc->sge.reg_lock);
2633 err:
2634 TXQ_LOCK(q);
2635 t3_free_qset(sc, q);
2636
2637 return (ret);
2638 }
2639
2640 /*
2641 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2642 * ethernet data. Hardware assistance with various checksums and any vlan tag
2643 * will also be taken into account here.
2644 */
2645 void
2646 t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad)
2647 {
2648 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2649 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2650 struct ifnet *ifp = pi->ifp;
2651
2652 if (cpl->vlan_valid) {
2653 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2654 m->m_flags |= M_VLANTAG;
2655 }
2656
2657 m->m_pkthdr.rcvif = ifp;
2658 /*
2659 * adjust after conversion to mbuf chain
2660 */
2661 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2662 m->m_len -= (sizeof(*cpl) + ethpad);
2663 m->m_data += (sizeof(*cpl) + ethpad);
2664
2665 if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) {
2666 struct ether_header *eh = mtod(m, void *);
2667 uint16_t eh_type;
2668
2669 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2670 struct ether_vlan_header *evh = mtod(m, void *);
2671
2672 eh_type = evh->evl_proto;
2673 } else
2674 eh_type = eh->ether_type;
2675
2676 if (ifp->if_capenable & IFCAP_RXCSUM &&
2677 eh_type == htons(ETHERTYPE_IP)) {
2678 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
2679 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2680 m->m_pkthdr.csum_data = 0xffff;
2681 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
2682 eh_type == htons(ETHERTYPE_IPV6)) {
2683 m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
2684 CSUM_PSEUDO_HDR);
2685 m->m_pkthdr.csum_data = 0xffff;
2686 }
2687 }
2688 }
2689
2690 /**
2691 * get_packet - return the next ingress packet buffer from a free list
2692 * @adap: the adapter that received the packet
2693 * @drop_thres: # of remaining buffers before we start dropping packets
2694 * @qs: the qset that the SGE free list holding the packet belongs to
2695 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2696 * @r: response descriptor
2697 *
2698 * Get the next packet from a free list and complete setup of the
2699 * sk_buff. If the packet is small we make a copy and recycle the
2700 * original buffer, otherwise we use the original buffer itself. If a
2701 * positive drop threshold is supplied packets are dropped and their
2702 * buffers recycled if (a) the number of remaining buffers is under the
2703 * threshold and the packet is too big to copy, or (b) the packet should
2704 * be copied but there is no memory for the copy.
2705 */
2706 static int
2707 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2708 struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2709 {
2710
2711 unsigned int len_cq = ntohl(r->len_cq);
2712 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2713 int mask, cidx = fl->cidx;
2714 struct rx_sw_desc *sd = &fl->sdesc[cidx];
2715 uint32_t len = G_RSPD_LEN(len_cq);
2716 uint32_t flags = M_EXT;
2717 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
2718 caddr_t cl;
2719 struct mbuf *m;
2720 int ret = 0;
2721
2722 mask = fl->size - 1;
2723 prefetch(fl->sdesc[(cidx + 1) & mask].m);
2724 prefetch(fl->sdesc[(cidx + 2) & mask].m);
2725 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
2726 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl);
2727
2728 fl->credits--;
2729 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2730
2731 if (recycle_enable && len <= SGE_RX_COPY_THRES &&
2732 sopeop == RSPQ_SOP_EOP) {
2733 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
2734 goto skip_recycle;
2735 cl = mtod(m, void *);
2736 memcpy(cl, sd->rxsd_cl, len);
2737 recycle_rx_buf(adap, fl, fl->cidx);
2738 m->m_pkthdr.len = m->m_len = len;
2739 m->m_flags = 0;
2740 mh->mh_head = mh->mh_tail = m;
2741 ret = 1;
2742 goto done;
2743 } else {
2744 skip_recycle:
2745 bus_dmamap_unload(fl->entry_tag, sd->map);
2746 cl = sd->rxsd_cl;
2747 m = sd->m;
2748
2749 if ((sopeop == RSPQ_SOP_EOP) ||
2750 (sopeop == RSPQ_SOP))
2751 flags |= M_PKTHDR;
2752 m_init(m, M_NOWAIT, MT_DATA, flags);
2753 if (fl->zone == zone_pack) {
2754 /*
2755 * restore clobbered data pointer
2756 */
2757 m->m_data = m->m_ext.ext_buf;
2758 } else {
2759 m_cljset(m, cl, fl->type);
2760 }
2761 m->m_len = len;
2762 }
2763 switch(sopeop) {
2764 case RSPQ_SOP_EOP:
2765 ret = 1;
2766 /* FALLTHROUGH */
2767 case RSPQ_SOP:
2768 mh->mh_head = mh->mh_tail = m;
2769 m->m_pkthdr.len = len;
2770 break;
2771 case RSPQ_EOP:
2772 ret = 1;
2773 /* FALLTHROUGH */
2774 case RSPQ_NSOP_NEOP:
2775 if (mh->mh_tail == NULL) {
2776 log(LOG_ERR, "discarding intermediate descriptor entry\n");
2777 m_freem(m);
2778 break;
2779 }
2780 mh->mh_tail->m_next = m;
2781 mh->mh_tail = m;
2782 mh->mh_head->m_pkthdr.len += len;
2783 break;
2784 }
2785 if (cxgb_debug)
2786 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
2787 done:
2788 if (++fl->cidx == fl->size)
2789 fl->cidx = 0;
2790
2791 return (ret);
2792 }
2793
2794 /**
2795 * handle_rsp_cntrl_info - handles control information in a response
2796 * @qs: the queue set corresponding to the response
2797 * @flags: the response control flags
2798 *
2799 * Handles the control information of an SGE response, such as GTS
2800 * indications and completion credits for the queue set's Tx queues.
2801 * HW coalesces credits, we don't do any extra SW coalescing.
2802 */
2803 static __inline void
2804 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2805 {
2806 unsigned int credits;
2807
2808 #if USE_GTS
2809 if (flags & F_RSPD_TXQ0_GTS)
2810 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2811 #endif
2812 credits = G_RSPD_TXQ0_CR(flags);
2813 if (credits)
2814 qs->txq[TXQ_ETH].processed += credits;
2815
2816 credits = G_RSPD_TXQ2_CR(flags);
2817 if (credits)
2818 qs->txq[TXQ_CTRL].processed += credits;
2819
2820 # if USE_GTS
2821 if (flags & F_RSPD_TXQ1_GTS)
2822 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2823 # endif
2824 credits = G_RSPD_TXQ1_CR(flags);
2825 if (credits)
2826 qs->txq[TXQ_OFLD].processed += credits;
2827
2828 }
2829
2830 static void
2831 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2832 unsigned int sleeping)
2833 {
2834 ;
2835 }
2836
2837 /**
2838 * process_responses - process responses from an SGE response queue
2839 * @adap: the adapter
2840 * @qs: the queue set to which the response queue belongs
2841 * @budget: how many responses can be processed in this round
2842 *
2843 * Process responses from an SGE response queue up to the supplied budget.
2844 * Responses include received packets as well as credits and other events
2845 * for the queues that belong to the response queue's queue set.
2846 * A negative budget is effectively unlimited.
2847 *
2848 * Additionally choose the interrupt holdoff time for the next interrupt
2849 * on this queue. If the system is under memory shortage use a fairly
2850 * long delay to help recovery.
2851 */
2852 static int
2853 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2854 {
2855 struct sge_rspq *rspq = &qs->rspq;
2856 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2857 int budget_left = budget;
2858 unsigned int sleeping = 0;
2859 #if defined(INET6) || defined(INET)
2860 int lro_enabled = qs->lro.enabled;
2861 int skip_lro;
2862 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2863 #endif
2864 struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
2865 #ifdef DEBUG
2866 static int last_holdoff = 0;
2867 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2868 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2869 last_holdoff = rspq->holdoff_tmr;
2870 }
2871 #endif
2872 rspq->next_holdoff = rspq->holdoff_tmr;
2873
2874 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2875 int eth, eop = 0, ethpad = 0;
2876 uint32_t flags = ntohl(r->flags);
2877 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2878 uint8_t opcode = r->rss_hdr.opcode;
2879
2880 eth = (opcode == CPL_RX_PKT);
2881
2882 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2883 struct mbuf *m;
2884
2885 if (cxgb_debug)
2886 printf("async notification\n");
2887
2888 if (mh->mh_head == NULL) {
2889 mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA);
2890 m = mh->mh_head;
2891 } else {
2892 m = m_gethdr(M_NOWAIT, MT_DATA);
2893 }
2894 if (m == NULL)
2895 goto no_mem;
2896
2897 memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2898 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2899 *mtod(m, uint8_t *) = CPL_ASYNC_NOTIF;
2900 opcode = CPL_ASYNC_NOTIF;
2901 eop = 1;
2902 rspq->async_notif++;
2903 goto skip;
2904 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2905 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
2906
2907 if (m == NULL) {
2908 no_mem:
2909 rspq->next_holdoff = NOMEM_INTR_DELAY;
2910 budget_left--;
2911 break;
2912 }
2913 if (mh->mh_head == NULL)
2914 mh->mh_head = m;
2915 else
2916 mh->mh_tail->m_next = m;
2917 mh->mh_tail = m;
2918
2919 get_imm_packet(adap, r, m);
2920 mh->mh_head->m_pkthdr.len += m->m_len;
2921 eop = 1;
2922 rspq->imm_data++;
2923 } else if (r->len_cq) {
2924 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2925
2926 eop = get_packet(adap, drop_thresh, qs, mh, r);
2927 if (eop) {
2928 if (r->rss_hdr.hash_type && !adap->timestamp) {
2929 M_HASHTYPE_SET(mh->mh_head,
2930 M_HASHTYPE_OPAQUE_HASH);
2931 mh->mh_head->m_pkthdr.flowid = rss_hash;
2932 }
2933 }
2934
2935 ethpad = 2;
2936 } else {
2937 rspq->pure_rsps++;
2938 }
2939 skip:
2940 if (flags & RSPD_CTRL_MASK) {
2941 sleeping |= flags & RSPD_GTS_MASK;
2942 handle_rsp_cntrl_info(qs, flags);
2943 }
2944
2945 if (!eth && eop) {
2946 rspq->offload_pkts++;
2947 #ifdef TCP_OFFLOAD
2948 adap->cpl_handler[opcode](qs, r, mh->mh_head);
2949 #else
2950 m_freem(mh->mh_head);
2951 #endif
2952 mh->mh_head = NULL;
2953 } else if (eth && eop) {
2954 struct mbuf *m = mh->mh_head;
2955
2956 t3_rx_eth(adap, m, ethpad);
2957
2958 /*
2959 * The T304 sends incoming packets on any qset. If LRO
2960 * is also enabled, we could end up sending packet up
2961 * lro_ctrl->ifp's input. That is incorrect.
2962 *
2963 * The mbuf's rcvif was derived from the cpl header and
2964 * is accurate. Skip LRO and just use that.
2965 */
2966 #if defined(INET6) || defined(INET)
2967 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
2968
2969 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
2970 && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
2971 ) {
2972 /* successfully queue'd for LRO */
2973 } else
2974 #endif
2975 {
2976 /*
2977 * LRO not enabled, packet unsuitable for LRO,
2978 * or unable to queue. Pass it up right now in
2979 * either case.
2980 */
2981 struct ifnet *ifp = m->m_pkthdr.rcvif;
2982 (*ifp->if_input)(ifp, m);
2983 }
2984 mh->mh_head = NULL;
2985
2986 }
2987
2988 r++;
2989 if (__predict_false(++rspq->cidx == rspq->size)) {
2990 rspq->cidx = 0;
2991 rspq->gen ^= 1;
2992 r = rspq->desc;
2993 }
2994
2995 if (++rspq->credits >= 64) {
2996 refill_rspq(adap, rspq, rspq->credits);
2997 rspq->credits = 0;
2998 }
2999 __refill_fl_lt(adap, &qs->fl[0], 32);
3000 __refill_fl_lt(adap, &qs->fl[1], 32);
3001 --budget_left;
3002 }
3003
3004 #if defined(INET6) || defined(INET)
3005 /* Flush LRO */
3006 tcp_lro_flush_all(lro_ctrl);
3007 #endif
3008
3009 if (sleeping)
3010 check_ring_db(adap, qs, sleeping);
3011
3012 mb(); /* commit Tx queue processed updates */
3013 if (__predict_false(qs->txq_stopped > 1))
3014 restart_tx(qs);
3015
3016 __refill_fl_lt(adap, &qs->fl[0], 512);
3017 __refill_fl_lt(adap, &qs->fl[1], 512);
3018 budget -= budget_left;
3019 return (budget);
3020 }
3021
3022 /*
3023 * A helper function that processes responses and issues GTS.
3024 */
3025 static __inline int
3026 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
3027 {
3028 int work;
3029 static int last_holdoff = 0;
3030
3031 work = process_responses(adap, rspq_to_qset(rq), -1);
3032
3033 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
3034 printf("next_holdoff=%d\n", rq->next_holdoff);
3035 last_holdoff = rq->next_holdoff;
3036 }
3037 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
3038 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
3039
3040 return (work);
3041 }
3042
3043 #ifdef NETDUMP
3044 int
3045 cxgb_netdump_poll_rx(adapter_t *adap, struct sge_qset *qs)
3046 {
3047
3048 return (process_responses_gts(adap, &qs->rspq));
3049 }
3050 #endif
3051
3052 /*
3053 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
3054 * Handles data events from SGE response queues as well as error and other
3055 * async events as they all use the same interrupt pin. We use one SGE
3056 * response queue per port in this mode and protect all response queues with
3057 * queue 0's lock.
3058 */
3059 void
3060 t3b_intr(void *data)
3061 {
3062 uint32_t i, map;
3063 adapter_t *adap = data;
3064 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3065
3066 t3_write_reg(adap, A_PL_CLI, 0);
3067 map = t3_read_reg(adap, A_SG_DATA_INTR);
3068
3069 if (!map)
3070 return;
3071
3072 if (__predict_false(map & F_ERRINTR)) {
3073 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3074 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3075 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3076 }
3077
3078 mtx_lock(&q0->lock);
3079 for_each_port(adap, i)
3080 if (map & (1 << i))
3081 process_responses_gts(adap, &adap->sge.qs[i].rspq);
3082 mtx_unlock(&q0->lock);
3083 }
3084
3085 /*
3086 * The MSI interrupt handler. This needs to handle data events from SGE
3087 * response queues as well as error and other async events as they all use
3088 * the same MSI vector. We use one SGE response queue per port in this mode
3089 * and protect all response queues with queue 0's lock.
3090 */
3091 void
3092 t3_intr_msi(void *data)
3093 {
3094 adapter_t *adap = data;
3095 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3096 int i, new_packets = 0;
3097
3098 mtx_lock(&q0->lock);
3099
3100 for_each_port(adap, i)
3101 if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3102 new_packets = 1;
3103 mtx_unlock(&q0->lock);
3104 if (new_packets == 0) {
3105 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3106 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3107 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3108 }
3109 }
3110
3111 void
3112 t3_intr_msix(void *data)
3113 {
3114 struct sge_qset *qs = data;
3115 adapter_t *adap = qs->port->adapter;
3116 struct sge_rspq *rspq = &qs->rspq;
3117
3118 if (process_responses_gts(adap, rspq) == 0)
3119 rspq->unhandled_irqs++;
3120 }
3121
3122 #define QDUMP_SBUF_SIZE 32 * 400
3123 static int
3124 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3125 {
3126 struct sge_rspq *rspq;
3127 struct sge_qset *qs;
3128 int i, err, dump_end, idx;
3129 struct sbuf *sb;
3130 struct rsp_desc *rspd;
3131 uint32_t data[4];
3132
3133 rspq = arg1;
3134 qs = rspq_to_qset(rspq);
3135 if (rspq->rspq_dump_count == 0)
3136 return (0);
3137 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3138 log(LOG_WARNING,
3139 "dump count is too large %d\n", rspq->rspq_dump_count);
3140 rspq->rspq_dump_count = 0;
3141 return (EINVAL);
3142 }
3143 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3144 log(LOG_WARNING,
3145 "dump start of %d is greater than queue size\n",
3146 rspq->rspq_dump_start);
3147 rspq->rspq_dump_start = 0;
3148 return (EINVAL);
3149 }
3150 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3151 if (err)
3152 return (err);
3153 err = sysctl_wire_old_buffer(req, 0);
3154 if (err)
3155 return (err);
3156 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3157
3158 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3159 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3160 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3161 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3162 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3163
3164 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3165 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3166
3167 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3168 for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3169 idx = i & (RSPQ_Q_SIZE-1);
3170
3171 rspd = &rspq->desc[idx];
3172 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3173 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3174 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3175 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3176 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3177 be32toh(rspd->len_cq), rspd->intr_gen);
3178 }
3179
3180 err = sbuf_finish(sb);
3181 sbuf_delete(sb);
3182 return (err);
3183 }
3184
3185 static int
3186 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3187 {
3188 struct sge_txq *txq;
3189 struct sge_qset *qs;
3190 int i, j, err, dump_end;
3191 struct sbuf *sb;
3192 struct tx_desc *txd;
3193 uint32_t *WR, wr_hi, wr_lo, gen;
3194 uint32_t data[4];
3195
3196 txq = arg1;
3197 qs = txq_to_qset(txq, TXQ_ETH);
3198 if (txq->txq_dump_count == 0) {
3199 return (0);
3200 }
3201 if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3202 log(LOG_WARNING,
3203 "dump count is too large %d\n", txq->txq_dump_count);
3204 txq->txq_dump_count = 1;
3205 return (EINVAL);
3206 }
3207 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3208 log(LOG_WARNING,
3209 "dump start of %d is greater than queue size\n",
3210 txq->txq_dump_start);
3211 txq->txq_dump_start = 0;
3212 return (EINVAL);
3213 }
3214 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3215 if (err)
3216 return (err);
3217 err = sysctl_wire_old_buffer(req, 0);
3218 if (err)
3219 return (err);
3220 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3221
3222 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3223 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3224 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3225 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3226 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3227 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3228 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3229 txq->txq_dump_start,
3230 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3231
3232 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3233 for (i = txq->txq_dump_start; i < dump_end; i++) {
3234 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3235 WR = (uint32_t *)txd->flit;
3236 wr_hi = ntohl(WR[0]);
3237 wr_lo = ntohl(WR[1]);
3238 gen = G_WR_GEN(wr_lo);
3239
3240 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3241 wr_hi, wr_lo, gen);
3242 for (j = 2; j < 30; j += 4)
3243 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3244 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3245
3246 }
3247 err = sbuf_finish(sb);
3248 sbuf_delete(sb);
3249 return (err);
3250 }
3251
3252 static int
3253 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3254 {
3255 struct sge_txq *txq;
3256 struct sge_qset *qs;
3257 int i, j, err, dump_end;
3258 struct sbuf *sb;
3259 struct tx_desc *txd;
3260 uint32_t *WR, wr_hi, wr_lo, gen;
3261
3262 txq = arg1;
3263 qs = txq_to_qset(txq, TXQ_CTRL);
3264 if (txq->txq_dump_count == 0) {
3265 return (0);
3266 }
3267 if (txq->txq_dump_count > 256) {
3268 log(LOG_WARNING,
3269 "dump count is too large %d\n", txq->txq_dump_count);
3270 txq->txq_dump_count = 1;
3271 return (EINVAL);
3272 }
3273 if (txq->txq_dump_start > 255) {
3274 log(LOG_WARNING,
3275 "dump start of %d is greater than queue size\n",
3276 txq->txq_dump_start);
3277 txq->txq_dump_start = 0;
3278 return (EINVAL);
3279 }
3280
3281 err = sysctl_wire_old_buffer(req, 0);
3282 if (err != 0)
3283 return (err);
3284 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3285 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3286 txq->txq_dump_start,
3287 (txq->txq_dump_start + txq->txq_dump_count) & 255);
3288
3289 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3290 for (i = txq->txq_dump_start; i < dump_end; i++) {
3291 txd = &txq->desc[i & (255)];
3292 WR = (uint32_t *)txd->flit;
3293 wr_hi = ntohl(WR[0]);
3294 wr_lo = ntohl(WR[1]);
3295 gen = G_WR_GEN(wr_lo);
3296
3297 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3298 wr_hi, wr_lo, gen);
3299 for (j = 2; j < 30; j += 4)
3300 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3301 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3302
3303 }
3304 err = sbuf_finish(sb);
3305 sbuf_delete(sb);
3306 return (err);
3307 }
3308
3309 static int
3310 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3311 {
3312 adapter_t *sc = arg1;
3313 struct qset_params *qsp = &sc->params.sge.qset[0];
3314 int coalesce_usecs;
3315 struct sge_qset *qs;
3316 int i, j, err, nqsets = 0;
3317 struct mtx *lock;
3318
3319 if ((sc->flags & FULL_INIT_DONE) == 0)
3320 return (ENXIO);
3321
3322 coalesce_usecs = qsp->coalesce_usecs;
3323 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3324
3325 if (err != 0) {
3326 return (err);
3327 }
3328 if (coalesce_usecs == qsp->coalesce_usecs)
3329 return (0);
3330
3331 for (i = 0; i < sc->params.nports; i++)
3332 for (j = 0; j < sc->port[i].nqsets; j++)
3333 nqsets++;
3334
3335 coalesce_usecs = max(1, coalesce_usecs);
3336
3337 for (i = 0; i < nqsets; i++) {
3338 qs = &sc->sge.qs[i];
3339 qsp = &sc->params.sge.qset[i];
3340 qsp->coalesce_usecs = coalesce_usecs;
3341
3342 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3343 &sc->sge.qs[0].rspq.lock;
3344
3345 mtx_lock(lock);
3346 t3_update_qset_coalesce(qs, qsp);
3347 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3348 V_NEWTIMER(qs->rspq.holdoff_tmr));
3349 mtx_unlock(lock);
3350 }
3351
3352 return (0);
3353 }
3354
3355 static int
3356 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)
3357 {
3358 adapter_t *sc = arg1;
3359 int rc, timestamp;
3360
3361 if ((sc->flags & FULL_INIT_DONE) == 0)
3362 return (ENXIO);
3363
3364 timestamp = sc->timestamp;
3365 rc = sysctl_handle_int(oidp, ×tamp, arg2, req);
3366
3367 if (rc != 0)
3368 return (rc);
3369
3370 if (timestamp != sc->timestamp) {
3371 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS,
3372 timestamp ? F_ENABLERXPKTTMSTPRSS : 0);
3373 sc->timestamp = timestamp;
3374 }
3375
3376 return (0);
3377 }
3378
3379 void
3380 t3_add_attach_sysctls(adapter_t *sc)
3381 {
3382 struct sysctl_ctx_list *ctx;
3383 struct sysctl_oid_list *children;
3384
3385 ctx = device_get_sysctl_ctx(sc->dev);
3386 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3387
3388 /* random information */
3389 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3390 "firmware_version",
3391 CTLFLAG_RD, sc->fw_version,
3392 0, "firmware version");
3393 SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
3394 "hw_revision",
3395 CTLFLAG_RD, &sc->params.rev,
3396 0, "chip model");
3397 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3398 "port_types",
3399 CTLFLAG_RD, sc->port_types,
3400 0, "type of ports");
3401 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3402 "enable_debug",
3403 CTLFLAG_RW, &cxgb_debug,
3404 0, "enable verbose debugging output");
3405 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce",
3406 CTLFLAG_RD, &sc->tunq_coalesce,
3407 "#tunneled packets freed");
3408 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3409 "txq_overrun",
3410 CTLFLAG_RD, &txq_fills,
3411 0, "#times txq overrun");
3412 SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
3413 "core_clock",
3414 CTLFLAG_RD, &sc->params.vpd.cclk,
3415 0, "core clock frequency (in KHz)");
3416 }
3417
3418
3419 static const char *rspq_name = "rspq";
3420 static const char *txq_names[] =
3421 {
3422 "txq_eth",
3423 "txq_ofld",
3424 "txq_ctrl"
3425 };
3426
3427 static int
3428 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3429 {
3430 struct port_info *p = arg1;
3431 uint64_t *parg;
3432
3433 if (!p)
3434 return (EINVAL);
3435
3436 cxgb_refresh_stats(p);
3437 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3438
3439 return (sysctl_handle_64(oidp, parg, 0, req));
3440 }
3441
3442 void
3443 t3_add_configured_sysctls(adapter_t *sc)
3444 {
3445 struct sysctl_ctx_list *ctx;
3446 struct sysctl_oid_list *children;
3447 int i, j;
3448
3449 ctx = device_get_sysctl_ctx(sc->dev);
3450 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3451
3452 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3453 "intr_coal",
3454 CTLTYPE_INT|CTLFLAG_RW, sc,
3455 0, t3_set_coalesce_usecs,
3456 "I", "interrupt coalescing timer (us)");
3457
3458 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3459 "pkt_timestamp",
3460 CTLTYPE_INT | CTLFLAG_RW, sc,
3461 0, t3_pkt_timestamp,
3462 "I", "provide packet timestamp instead of connection hash");
3463
3464 for (i = 0; i < sc->params.nports; i++) {
3465 struct port_info *pi = &sc->port[i];
3466 struct sysctl_oid *poid;
3467 struct sysctl_oid_list *poidlist;
3468 struct mac_stats *mstats = &pi->mac.stats;
3469
3470 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3471 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3472 pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3473 poidlist = SYSCTL_CHILDREN(poid);
3474 SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO,
3475 "nqsets", CTLFLAG_RD, &pi->nqsets,
3476 0, "#queue sets");
3477
3478 for (j = 0; j < pi->nqsets; j++) {
3479 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3480 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
3481 *ctrlqpoid, *lropoid;
3482 struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
3483 *txqpoidlist, *ctrlqpoidlist,
3484 *lropoidlist;
3485 struct sge_txq *txq = &qs->txq[TXQ_ETH];
3486
3487 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3488
3489 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3490 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3491 qspoidlist = SYSCTL_CHILDREN(qspoid);
3492
3493 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
3494 CTLFLAG_RD, &qs->fl[0].empty, 0,
3495 "freelist #0 empty");
3496 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
3497 CTLFLAG_RD, &qs->fl[1].empty, 0,
3498 "freelist #1 empty");
3499
3500 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3501 rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3502 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3503
3504 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3505 txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3506 txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3507
3508 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3509 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3510 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3511
3512 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3513 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
3514 lropoidlist = SYSCTL_CHILDREN(lropoid);
3515
3516 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3517 CTLFLAG_RD, &qs->rspq.size,
3518 0, "#entries in response queue");
3519 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3520 CTLFLAG_RD, &qs->rspq.cidx,
3521 0, "consumer index");
3522 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3523 CTLFLAG_RD, &qs->rspq.credits,
3524 0, "#credits");
3525 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
3526 CTLFLAG_RD, &qs->rspq.starved,
3527 0, "#times starved");
3528 SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3529 CTLFLAG_RD, &qs->rspq.phys_addr,
3530 "physical_address_of the queue");
3531 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3532 CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3533 0, "start rspq dump entry");
3534 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3535 CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3536 0, "#rspq entries to dump");
3537 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3538 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3539 0, t3_dump_rspq, "A", "dump of the response queue");
3540
3541 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
3542 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
3543 "#tunneled packets dropped");
3544 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3545 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.mq_len,
3546 0, "#tunneled packets waiting to be sent");
3547 #if 0
3548 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3549 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3550 0, "#tunneled packets queue producer index");
3551 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3552 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3553 0, "#tunneled packets queue consumer index");
3554 #endif
3555 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed",
3556 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3557 0, "#tunneled packets processed by the card");
3558 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3559 CTLFLAG_RD, &txq->cleaned,
3560 0, "#tunneled packets cleaned");
3561 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3562 CTLFLAG_RD, &txq->in_use,
3563 0, "#tunneled packet slots in use");
3564 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees",
3565 CTLFLAG_RD, &txq->txq_frees,
3566 "#tunneled packets freed");
3567 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3568 CTLFLAG_RD, &txq->txq_skipped,
3569 0, "#tunneled packet descriptors skipped");
3570 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
3571 CTLFLAG_RD, &txq->txq_coalesced,
3572 "#tunneled packets coalesced");
3573 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3574 CTLFLAG_RD, &txq->txq_enqueued,
3575 0, "#tunneled packets enqueued to hardware");
3576 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3577 CTLFLAG_RD, &qs->txq_stopped,
3578 0, "tx queues stopped");
3579 SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3580 CTLFLAG_RD, &txq->phys_addr,
3581 "physical_address_of the queue");
3582 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3583 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3584 0, "txq generation");
3585 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3586 CTLFLAG_RD, &txq->cidx,
3587 0, "hardware queue cidx");
3588 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3589 CTLFLAG_RD, &txq->pidx,
3590 0, "hardware queue pidx");
3591 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3592 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3593 0, "txq start idx for dump");
3594 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3595 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3596 0, "txq #entries to dump");
3597 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3598 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3599 0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3600
3601 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3602 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3603 0, "ctrlq start idx for dump");
3604 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3605 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3606 0, "ctrl #entries to dump");
3607 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3608 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3609 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3610
3611 SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_queued",
3612 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3613 SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3614 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3615 SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3616 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3617 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3618 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3619 }
3620
3621 /* Now add a node for mac stats. */
3622 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3623 CTLFLAG_RD, NULL, "MAC statistics");
3624 poidlist = SYSCTL_CHILDREN(poid);
3625
3626 /*
3627 * We (ab)use the length argument (arg2) to pass on the offset
3628 * of the data that we are interested in. This is only required
3629 * for the quad counters that are updated from the hardware (we
3630 * make sure that we return the latest value).
3631 * sysctl_handle_macstat first updates *all* the counters from
3632 * the hardware, and then returns the latest value of the
3633 * requested counter. Best would be to update only the
3634 * requested counter from hardware, but t3_mac_update_stats()
3635 * hides all the register details and we don't want to dive into
3636 * all that here.
3637 */
3638 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3639 (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
3640 sysctl_handle_macstat, "QU", 0)
3641 CXGB_SYSCTL_ADD_QUAD(tx_octets);
3642 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3643 CXGB_SYSCTL_ADD_QUAD(tx_frames);
3644 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3645 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3646 CXGB_SYSCTL_ADD_QUAD(tx_pause);
3647 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3648 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3649 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3650 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3651 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3652 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3653 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3654 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3655 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3656 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3657 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3658 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3659 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3660 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3661 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3662 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3663 CXGB_SYSCTL_ADD_QUAD(rx_octets);
3664 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3665 CXGB_SYSCTL_ADD_QUAD(rx_frames);
3666 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3667 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3668 CXGB_SYSCTL_ADD_QUAD(rx_pause);
3669 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
3670 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3671 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3672 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3673 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3674 CXGB_SYSCTL_ADD_QUAD(rx_runt);
3675 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3676 CXGB_SYSCTL_ADD_QUAD(rx_short);
3677 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3678 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3679 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3680 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3681 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3682 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3683 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3684 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3685 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3686 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3687 #undef CXGB_SYSCTL_ADD_QUAD
3688
3689 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3690 CTLFLAG_RD, &mstats->a, 0)
3691 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3692 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3693 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3694 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3695 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3696 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3697 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3698 CXGB_SYSCTL_ADD_ULONG(num_toggled);
3699 CXGB_SYSCTL_ADD_ULONG(num_resets);
3700 CXGB_SYSCTL_ADD_ULONG(link_faults);
3701 #undef CXGB_SYSCTL_ADD_ULONG
3702 }
3703 }
3704
3705 /**
3706 * t3_get_desc - dump an SGE descriptor for debugging purposes
3707 * @qs: the queue set
3708 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3709 * @idx: the descriptor index in the queue
3710 * @data: where to dump the descriptor contents
3711 *
3712 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3713 * size of the descriptor.
3714 */
3715 int
3716 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3717 unsigned char *data)
3718 {
3719 if (qnum >= 6)
3720 return (EINVAL);
3721
3722 if (qnum < 3) {
3723 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3724 return -EINVAL;
3725 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3726 return sizeof(struct tx_desc);
3727 }
3728
3729 if (qnum == 3) {
3730 if (!qs->rspq.desc || idx >= qs->rspq.size)
3731 return (EINVAL);
3732 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3733 return sizeof(struct rsp_desc);
3734 }
3735
3736 qnum -= 4;
3737 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3738 return (EINVAL);
3739 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3740 return sizeof(struct rx_desc);
3741 }
Cache object: b5ff541e0bcdd8d83a27616611d9b690
|