1 /**************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3
4 Copyright (c) 2007-2009, Chelsio Inc.
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
12
13 2. Neither the name of the Chelsio Corporation nor the names of its
14 contributors may be used to endorse or promote products derived from
15 this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28
29 ***************************************************************************/
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33
34 #include "opt_inet6.h"
35 #include "opt_inet.h"
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/module.h>
41 #include <sys/bus.h>
42 #include <sys/conf.h>
43 #include <machine/bus.h>
44 #include <machine/resource.h>
45 #include <sys/rman.h>
46 #include <sys/queue.h>
47 #include <sys/sysctl.h>
48 #include <sys/taskqueue.h>
49
50 #include <sys/proc.h>
51 #include <sys/sbuf.h>
52 #include <sys/sched.h>
53 #include <sys/smp.h>
54 #include <sys/systm.h>
55 #include <sys/syslog.h>
56 #include <sys/socket.h>
57 #include <sys/sglist.h>
58
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/bpf.h>
62 #include <net/ethernet.h>
63 #include <net/if_vlan_var.h>
64
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip6.h>
69 #include <netinet/tcp.h>
70
71 #include <dev/pci/pcireg.h>
72 #include <dev/pci/pcivar.h>
73
74 #include <vm/vm.h>
75 #include <vm/pmap.h>
76
77 #include <cxgb_include.h>
78 #include <sys/mvec.h>
79
80 int txq_fills = 0;
81 int multiq_tx_enable = 1;
82
83 #ifdef TCP_OFFLOAD
84 CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS);
85 #endif
86
87 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
88 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
89 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
90 "size of per-queue mbuf ring");
91
92 static int cxgb_tx_coalesce_force = 0;
93 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RWTUN,
94 &cxgb_tx_coalesce_force, 0,
95 "coalesce small packets into a single work request regardless of ring state");
96
97 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1
98 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
99 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2
100 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5
101 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5
102 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2
103 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6
104
105
106 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
107 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RWTUN,
108 &cxgb_tx_coalesce_enable_start, 0,
109 "coalesce enable threshold");
110 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
111 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RWTUN,
112 &cxgb_tx_coalesce_enable_stop, 0,
113 "coalesce disable threshold");
114 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
115 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RWTUN,
116 &cxgb_tx_reclaim_threshold, 0,
117 "tx cleaning minimum threshold");
118
119 /*
120 * XXX don't re-enable this until TOE stops assuming
121 * we have an m_ext
122 */
123 static int recycle_enable = 0;
124
125 extern int cxgb_use_16k_clusters;
126 extern int nmbjumbop;
127 extern int nmbjumbo9;
128 extern int nmbjumbo16;
129
130 #define USE_GTS 0
131
132 #define SGE_RX_SM_BUF_SIZE 1536
133 #define SGE_RX_DROP_THRES 16
134 #define SGE_RX_COPY_THRES 128
135
136 /*
137 * Period of the Tx buffer reclaim timer. This timer does not need to run
138 * frequently as Tx buffers are usually reclaimed by new Tx packets.
139 */
140 #define TX_RECLAIM_PERIOD (hz >> 1)
141
142 /*
143 * Values for sge_txq.flags
144 */
145 enum {
146 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
147 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
148 };
149
150 struct tx_desc {
151 uint64_t flit[TX_DESC_FLITS];
152 } __packed;
153
154 struct rx_desc {
155 uint32_t addr_lo;
156 uint32_t len_gen;
157 uint32_t gen2;
158 uint32_t addr_hi;
159 } __packed;
160
161 struct rsp_desc { /* response queue descriptor */
162 struct rss_header rss_hdr;
163 uint32_t flags;
164 uint32_t len_cq;
165 uint8_t imm_data[47];
166 uint8_t intr_gen;
167 } __packed;
168
169 #define RX_SW_DESC_MAP_CREATED (1 << 0)
170 #define TX_SW_DESC_MAP_CREATED (1 << 1)
171 #define RX_SW_DESC_INUSE (1 << 3)
172 #define TX_SW_DESC_MAPPED (1 << 4)
173
174 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
175 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
176 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
177 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
178
179 struct tx_sw_desc { /* SW state per Tx descriptor */
180 struct mbuf *m;
181 bus_dmamap_t map;
182 int flags;
183 };
184
185 struct rx_sw_desc { /* SW state per Rx descriptor */
186 caddr_t rxsd_cl;
187 struct mbuf *m;
188 bus_dmamap_t map;
189 int flags;
190 };
191
192 struct txq_state {
193 unsigned int compl;
194 unsigned int gen;
195 unsigned int pidx;
196 };
197
198 struct refill_fl_cb_arg {
199 int error;
200 bus_dma_segment_t seg;
201 int nseg;
202 };
203
204
205 /*
206 * Maps a number of flits to the number of Tx descriptors that can hold them.
207 * The formula is
208 *
209 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
210 *
211 * HW allows up to 4 descriptors to be combined into a WR.
212 */
213 static uint8_t flit_desc_map[] = {
214 0,
215 #if SGE_NUM_GENBITS == 1
216 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
217 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
218 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
219 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
220 #elif SGE_NUM_GENBITS == 2
221 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
222 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
223 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
224 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
225 #else
226 # error "SGE_NUM_GENBITS must be 1 or 2"
227 #endif
228 };
229
230 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED)
231 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock)
232 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock)
233 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock)
234 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
235 #define TXQ_RING_NEEDS_ENQUEUE(qs) \
236 drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
237 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
238 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \
239 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
240 #define TXQ_RING_DEQUEUE(qs) \
241 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
242
243 int cxgb_debug = 0;
244
245 static void sge_timer_cb(void *arg);
246 static void sge_timer_reclaim(void *arg, int ncount);
247 static void sge_txq_reclaim_handler(void *arg, int ncount);
248 static void cxgb_start_locked(struct sge_qset *qs);
249
250 /*
251 * XXX need to cope with bursty scheduling by looking at a wider
252 * window than we are now for determining the need for coalescing
253 *
254 */
255 static __inline uint64_t
256 check_pkt_coalesce(struct sge_qset *qs)
257 {
258 struct adapter *sc;
259 struct sge_txq *txq;
260 uint8_t *fill;
261
262 if (__predict_false(cxgb_tx_coalesce_force))
263 return (1);
264 txq = &qs->txq[TXQ_ETH];
265 sc = qs->port->adapter;
266 fill = &sc->tunq_fill[qs->idx];
267
268 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
269 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
270 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
271 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
272 /*
273 * if the hardware transmit queue is more than 1/8 full
274 * we mark it as coalescing - we drop back from coalescing
275 * when we go below 1/32 full and there are no packets enqueued,
276 * this provides us with some degree of hysteresis
277 */
278 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
279 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
280 *fill = 0;
281 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
282 *fill = 1;
283
284 return (sc->tunq_coalesce);
285 }
286
287 #ifdef __LP64__
288 static void
289 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
290 {
291 uint64_t wr_hilo;
292 #if _BYTE_ORDER == _LITTLE_ENDIAN
293 wr_hilo = wr_hi;
294 wr_hilo |= (((uint64_t)wr_lo)<<32);
295 #else
296 wr_hilo = wr_lo;
297 wr_hilo |= (((uint64_t)wr_hi)<<32);
298 #endif
299 wrp->wrh_hilo = wr_hilo;
300 }
301 #else
302 static void
303 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
304 {
305
306 wrp->wrh_hi = wr_hi;
307 wmb();
308 wrp->wrh_lo = wr_lo;
309 }
310 #endif
311
312 struct coalesce_info {
313 int count;
314 int nbytes;
315 int noncoal;
316 };
317
318 static int
319 coalesce_check(struct mbuf *m, void *arg)
320 {
321 struct coalesce_info *ci = arg;
322
323 if ((m->m_next != NULL) ||
324 ((mtod(m, vm_offset_t) & PAGE_MASK) + m->m_len > PAGE_SIZE))
325 ci->noncoal = 1;
326
327 if ((ci->count == 0) || (ci->noncoal == 0 && (ci->count < 7) &&
328 (ci->nbytes + m->m_len <= 10500))) {
329 ci->count++;
330 ci->nbytes += m->m_len;
331 return (1);
332 }
333 return (0);
334 }
335
336 static struct mbuf *
337 cxgb_dequeue(struct sge_qset *qs)
338 {
339 struct mbuf *m, *m_head, *m_tail;
340 struct coalesce_info ci;
341
342
343 if (check_pkt_coalesce(qs) == 0)
344 return TXQ_RING_DEQUEUE(qs);
345
346 m_head = m_tail = NULL;
347 ci.count = ci.nbytes = ci.noncoal = 0;
348 do {
349 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
350 if (m_head == NULL) {
351 m_tail = m_head = m;
352 } else if (m != NULL) {
353 m_tail->m_nextpkt = m;
354 m_tail = m;
355 }
356 } while (m != NULL);
357 if (ci.count > 7)
358 panic("trying to coalesce %d packets in to one WR", ci.count);
359 return (m_head);
360 }
361
362 /**
363 * reclaim_completed_tx - reclaims completed Tx descriptors
364 * @adapter: the adapter
365 * @q: the Tx queue to reclaim completed descriptors from
366 *
367 * Reclaims Tx descriptors that the SGE has indicated it has processed,
368 * and frees the associated buffers if possible. Called with the Tx
369 * queue's lock held.
370 */
371 static __inline int
372 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
373 {
374 struct sge_txq *q = &qs->txq[queue];
375 int reclaim = desc_reclaimable(q);
376
377 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
378 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
379 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
380
381 if (reclaim < reclaim_min)
382 return (0);
383
384 mtx_assert(&qs->lock, MA_OWNED);
385 if (reclaim > 0) {
386 t3_free_tx_desc(qs, reclaim, queue);
387 q->cleaned += reclaim;
388 q->in_use -= reclaim;
389 }
390 if (isset(&qs->txq_stopped, TXQ_ETH))
391 clrbit(&qs->txq_stopped, TXQ_ETH);
392
393 return (reclaim);
394 }
395
396 #ifdef NETDUMP
397 int
398 cxgb_netdump_poll_tx(struct sge_qset *qs)
399 {
400
401 return (reclaim_completed_tx(qs, TX_RECLAIM_MAX, TXQ_ETH));
402 }
403 #endif
404
405 /**
406 * should_restart_tx - are there enough resources to restart a Tx queue?
407 * @q: the Tx queue
408 *
409 * Checks if there are enough descriptors to restart a suspended Tx queue.
410 */
411 static __inline int
412 should_restart_tx(const struct sge_txq *q)
413 {
414 unsigned int r = q->processed - q->cleaned;
415
416 return q->in_use - r < (q->size >> 1);
417 }
418
419 /**
420 * t3_sge_init - initialize SGE
421 * @adap: the adapter
422 * @p: the SGE parameters
423 *
424 * Performs SGE initialization needed every time after a chip reset.
425 * We do not initialize any of the queue sets here, instead the driver
426 * top-level must request those individually. We also do not enable DMA
427 * here, that should be done after the queues have been set up.
428 */
429 void
430 t3_sge_init(adapter_t *adap, struct sge_params *p)
431 {
432 u_int ctrl, ups;
433
434 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
435
436 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
437 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
438 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
439 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
440 #if SGE_NUM_GENBITS == 1
441 ctrl |= F_EGRGENCTRL;
442 #endif
443 if (adap->params.rev > 0) {
444 if (!(adap->flags & (USING_MSIX | USING_MSI)))
445 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
446 }
447 t3_write_reg(adap, A_SG_CONTROL, ctrl);
448 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
449 V_LORCQDRBTHRSH(512));
450 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
451 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
452 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
453 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
454 adap->params.rev < T3_REV_C ? 1000 : 500);
455 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
456 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
457 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
458 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
459 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
460 }
461
462
463 /**
464 * sgl_len - calculates the size of an SGL of the given capacity
465 * @n: the number of SGL entries
466 *
467 * Calculates the number of flits needed for a scatter/gather list that
468 * can hold the given number of entries.
469 */
470 static __inline unsigned int
471 sgl_len(unsigned int n)
472 {
473 return ((3 * n) / 2 + (n & 1));
474 }
475
476 /**
477 * get_imm_packet - return the next ingress packet buffer from a response
478 * @resp: the response descriptor containing the packet data
479 *
480 * Return a packet containing the immediate data of the given response.
481 */
482 static int
483 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
484 {
485
486 if (resp->rss_hdr.opcode == CPL_RX_DATA) {
487 const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0];
488 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
489 } else if (resp->rss_hdr.opcode == CPL_RX_PKT) {
490 const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0];
491 m->m_len = sizeof(*cpl) + ntohs(cpl->len);
492 } else
493 m->m_len = IMMED_PKT_SIZE;
494 m->m_ext.ext_buf = NULL;
495 m->m_ext.ext_type = 0;
496 memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len);
497 return (0);
498 }
499
500 static __inline u_int
501 flits_to_desc(u_int n)
502 {
503 return (flit_desc_map[n]);
504 }
505
506 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
507 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
508 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
509 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
510 F_HIRCQPARITYERROR)
511 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
512 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
513 F_RSPQDISABLED)
514
515 /**
516 * t3_sge_err_intr_handler - SGE async event interrupt handler
517 * @adapter: the adapter
518 *
519 * Interrupt handler for SGE asynchronous (non-data) events.
520 */
521 void
522 t3_sge_err_intr_handler(adapter_t *adapter)
523 {
524 unsigned int v, status;
525
526 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
527 if (status & SGE_PARERR)
528 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
529 status & SGE_PARERR);
530 if (status & SGE_FRAMINGERR)
531 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
532 status & SGE_FRAMINGERR);
533 if (status & F_RSPQCREDITOVERFOW)
534 CH_ALERT(adapter, "SGE response queue credit overflow\n");
535
536 if (status & F_RSPQDISABLED) {
537 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
538
539 CH_ALERT(adapter,
540 "packet delivered to disabled response queue (0x%x)\n",
541 (v >> S_RSPQ0DISABLED) & 0xff);
542 }
543
544 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
545 if (status & SGE_FATALERR)
546 t3_fatal_err(adapter);
547 }
548
549 void
550 t3_sge_prep(adapter_t *adap, struct sge_params *p)
551 {
552 int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
553
554 nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
555 nqsets *= adap->params.nports;
556
557 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
558
559 while (!powerof2(fl_q_size))
560 fl_q_size--;
561
562 use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
563 is_offload(adap);
564
565 #if __FreeBSD_version >= 700111
566 if (use_16k) {
567 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
568 jumbo_buf_size = MJUM16BYTES;
569 } else {
570 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
571 jumbo_buf_size = MJUM9BYTES;
572 }
573 #else
574 jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE);
575 jumbo_buf_size = MJUMPAGESIZE;
576 #endif
577 while (!powerof2(jumbo_q_size))
578 jumbo_q_size--;
579
580 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
581 device_printf(adap->dev,
582 "Insufficient clusters and/or jumbo buffers.\n");
583
584 p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
585
586 for (i = 0; i < SGE_QSETS; ++i) {
587 struct qset_params *q = p->qset + i;
588
589 if (adap->params.nports > 2) {
590 q->coalesce_usecs = 50;
591 } else {
592 #ifdef INVARIANTS
593 q->coalesce_usecs = 10;
594 #else
595 q->coalesce_usecs = 5;
596 #endif
597 }
598 q->polling = 0;
599 q->rspq_size = RSPQ_Q_SIZE;
600 q->fl_size = fl_q_size;
601 q->jumbo_size = jumbo_q_size;
602 q->jumbo_buf_size = jumbo_buf_size;
603 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
604 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
605 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
606 q->cong_thres = 0;
607 }
608 }
609
610 int
611 t3_sge_alloc(adapter_t *sc)
612 {
613
614 /* The parent tag. */
615 if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */
616 1, 0, /* algnmnt, boundary */
617 BUS_SPACE_MAXADDR, /* lowaddr */
618 BUS_SPACE_MAXADDR, /* highaddr */
619 NULL, NULL, /* filter, filterarg */
620 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
621 BUS_SPACE_UNRESTRICTED, /* nsegments */
622 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
623 0, /* flags */
624 NULL, NULL, /* lock, lockarg */
625 &sc->parent_dmat)) {
626 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
627 return (ENOMEM);
628 }
629
630 /*
631 * DMA tag for normal sized RX frames
632 */
633 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
634 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
635 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
636 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
637 return (ENOMEM);
638 }
639
640 /*
641 * DMA tag for jumbo sized RX frames.
642 */
643 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
644 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
645 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
646 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
647 return (ENOMEM);
648 }
649
650 /*
651 * DMA tag for TX frames.
652 */
653 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
654 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
655 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
656 NULL, NULL, &sc->tx_dmat)) {
657 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
658 return (ENOMEM);
659 }
660
661 return (0);
662 }
663
664 int
665 t3_sge_free(struct adapter * sc)
666 {
667
668 if (sc->tx_dmat != NULL)
669 bus_dma_tag_destroy(sc->tx_dmat);
670
671 if (sc->rx_jumbo_dmat != NULL)
672 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
673
674 if (sc->rx_dmat != NULL)
675 bus_dma_tag_destroy(sc->rx_dmat);
676
677 if (sc->parent_dmat != NULL)
678 bus_dma_tag_destroy(sc->parent_dmat);
679
680 return (0);
681 }
682
683 void
684 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
685 {
686
687 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
688 qs->rspq.polling = 0 /* p->polling */;
689 }
690
691 #if !defined(__i386__) && !defined(__amd64__)
692 static void
693 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
694 {
695 struct refill_fl_cb_arg *cb_arg = arg;
696
697 cb_arg->error = error;
698 cb_arg->seg = segs[0];
699 cb_arg->nseg = nseg;
700
701 }
702 #endif
703 /**
704 * refill_fl - refill an SGE free-buffer list
705 * @sc: the controller softc
706 * @q: the free-list to refill
707 * @n: the number of new buffers to allocate
708 *
709 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
710 * The caller must assure that @n does not exceed the queue's capacity.
711 */
712 static void
713 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
714 {
715 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
716 struct rx_desc *d = &q->desc[q->pidx];
717 struct refill_fl_cb_arg cb_arg;
718 struct mbuf *m;
719 caddr_t cl;
720 int err;
721
722 cb_arg.error = 0;
723 while (n--) {
724 /*
725 * We allocate an uninitialized mbuf + cluster, mbuf is
726 * initialized after rx.
727 */
728 if (q->zone == zone_pack) {
729 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
730 break;
731 cl = m->m_ext.ext_buf;
732 } else {
733 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
734 break;
735 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
736 uma_zfree(q->zone, cl);
737 break;
738 }
739 }
740 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
741 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
742 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
743 uma_zfree(q->zone, cl);
744 goto done;
745 }
746 sd->flags |= RX_SW_DESC_MAP_CREATED;
747 }
748 #if !defined(__i386__) && !defined(__amd64__)
749 err = bus_dmamap_load(q->entry_tag, sd->map,
750 cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
751
752 if (err != 0 || cb_arg.error) {
753 if (q->zone != zone_pack)
754 uma_zfree(q->zone, cl);
755 m_free(m);
756 goto done;
757 }
758 #else
759 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
760 #endif
761 sd->flags |= RX_SW_DESC_INUSE;
762 sd->rxsd_cl = cl;
763 sd->m = m;
764 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
765 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
766 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
767 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
768
769 d++;
770 sd++;
771
772 if (++q->pidx == q->size) {
773 q->pidx = 0;
774 q->gen ^= 1;
775 sd = q->sdesc;
776 d = q->desc;
777 }
778 q->credits++;
779 q->db_pending++;
780 }
781
782 done:
783 if (q->db_pending >= 32) {
784 q->db_pending = 0;
785 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
786 }
787 }
788
789
790 /**
791 * free_rx_bufs - free the Rx buffers on an SGE free list
792 * @sc: the controle softc
793 * @q: the SGE free list to clean up
794 *
795 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
796 * this queue should be stopped before calling this function.
797 */
798 static void
799 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
800 {
801 u_int cidx = q->cidx;
802
803 while (q->credits--) {
804 struct rx_sw_desc *d = &q->sdesc[cidx];
805
806 if (d->flags & RX_SW_DESC_INUSE) {
807 bus_dmamap_unload(q->entry_tag, d->map);
808 bus_dmamap_destroy(q->entry_tag, d->map);
809 if (q->zone == zone_pack) {
810 m_init(d->m, M_NOWAIT, MT_DATA, M_EXT);
811 uma_zfree(zone_pack, d->m);
812 } else {
813 m_init(d->m, M_NOWAIT, MT_DATA, 0);
814 uma_zfree(zone_mbuf, d->m);
815 uma_zfree(q->zone, d->rxsd_cl);
816 }
817 }
818
819 d->rxsd_cl = NULL;
820 d->m = NULL;
821 if (++cidx == q->size)
822 cidx = 0;
823 }
824 }
825
826 static __inline void
827 __refill_fl(adapter_t *adap, struct sge_fl *fl)
828 {
829 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
830 }
831
832 static __inline void
833 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
834 {
835 uint32_t reclaimable = fl->size - fl->credits;
836
837 if (reclaimable > 0)
838 refill_fl(adap, fl, min(max, reclaimable));
839 }
840
841 /**
842 * recycle_rx_buf - recycle a receive buffer
843 * @adapter: the adapter
844 * @q: the SGE free list
845 * @idx: index of buffer to recycle
846 *
847 * Recycles the specified buffer on the given free list by adding it at
848 * the next available slot on the list.
849 */
850 static void
851 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
852 {
853 struct rx_desc *from = &q->desc[idx];
854 struct rx_desc *to = &q->desc[q->pidx];
855
856 q->sdesc[q->pidx] = q->sdesc[idx];
857 to->addr_lo = from->addr_lo; // already big endian
858 to->addr_hi = from->addr_hi; // likewise
859 wmb(); /* necessary ? */
860 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
861 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
862 q->credits++;
863
864 if (++q->pidx == q->size) {
865 q->pidx = 0;
866 q->gen ^= 1;
867 }
868 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
869 }
870
871 static void
872 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
873 {
874 uint32_t *addr;
875
876 addr = arg;
877 *addr = segs[0].ds_addr;
878 }
879
880 static int
881 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
882 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
883 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
884 {
885 size_t len = nelem * elem_size;
886 void *s = NULL;
887 void *p = NULL;
888 int err;
889
890 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
891 BUS_SPACE_MAXADDR_32BIT,
892 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
893 len, 0, NULL, NULL, tag)) != 0) {
894 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
895 return (ENOMEM);
896 }
897
898 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
899 map)) != 0) {
900 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
901 return (ENOMEM);
902 }
903
904 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
905 bzero(p, len);
906 *(void **)desc = p;
907
908 if (sw_size) {
909 len = nelem * sw_size;
910 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
911 *(void **)sdesc = s;
912 }
913 if (parent_entry_tag == NULL)
914 return (0);
915
916 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
917 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
918 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
919 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
920 NULL, NULL, entry_tag)) != 0) {
921 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
922 return (ENOMEM);
923 }
924 return (0);
925 }
926
927 static void
928 sge_slow_intr_handler(void *arg, int ncount)
929 {
930 adapter_t *sc = arg;
931
932 t3_slow_intr_handler(sc);
933 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
934 (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
935 }
936
937 /**
938 * sge_timer_cb - perform periodic maintenance of an SGE qset
939 * @data: the SGE queue set to maintain
940 *
941 * Runs periodically from a timer to perform maintenance of an SGE queue
942 * set. It performs two tasks:
943 *
944 * a) Cleans up any completed Tx descriptors that may still be pending.
945 * Normal descriptor cleanup happens when new packets are added to a Tx
946 * queue so this timer is relatively infrequent and does any cleanup only
947 * if the Tx queue has not seen any new packets in a while. We make a
948 * best effort attempt to reclaim descriptors, in that we don't wait
949 * around if we cannot get a queue's lock (which most likely is because
950 * someone else is queueing new packets and so will also handle the clean
951 * up). Since control queues use immediate data exclusively we don't
952 * bother cleaning them up here.
953 *
954 * b) Replenishes Rx queues that have run out due to memory shortage.
955 * Normally new Rx buffers are added when existing ones are consumed but
956 * when out of memory a queue can become empty. We try to add only a few
957 * buffers here, the queue will be replenished fully as these new buffers
958 * are used up if memory shortage has subsided.
959 *
960 * c) Return coalesced response queue credits in case a response queue is
961 * starved.
962 *
963 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell
964 * fifo overflows and the FW doesn't implement any recovery scheme yet.
965 */
966 static void
967 sge_timer_cb(void *arg)
968 {
969 adapter_t *sc = arg;
970 if ((sc->flags & USING_MSIX) == 0) {
971
972 struct port_info *pi;
973 struct sge_qset *qs;
974 struct sge_txq *txq;
975 int i, j;
976 int reclaim_ofl, refill_rx;
977
978 if (sc->open_device_map == 0)
979 return;
980
981 for (i = 0; i < sc->params.nports; i++) {
982 pi = &sc->port[i];
983 for (j = 0; j < pi->nqsets; j++) {
984 qs = &sc->sge.qs[pi->first_qset + j];
985 txq = &qs->txq[0];
986 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
987 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
988 (qs->fl[1].credits < qs->fl[1].size));
989 if (reclaim_ofl || refill_rx) {
990 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
991 break;
992 }
993 }
994 }
995 }
996
997 if (sc->params.nports > 2) {
998 int i;
999
1000 for_each_port(sc, i) {
1001 struct port_info *pi = &sc->port[i];
1002
1003 t3_write_reg(sc, A_SG_KDOORBELL,
1004 F_SELEGRCNTX |
1005 (FW_TUNNEL_SGEEC_START + pi->first_qset));
1006 }
1007 }
1008 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
1009 sc->open_device_map != 0)
1010 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1011 }
1012
1013 /*
1014 * This is meant to be a catch-all function to keep sge state private
1015 * to sge.c
1016 *
1017 */
1018 int
1019 t3_sge_init_adapter(adapter_t *sc)
1020 {
1021 callout_init(&sc->sge_timer_ch, 1);
1022 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1023 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
1024 return (0);
1025 }
1026
1027 int
1028 t3_sge_reset_adapter(adapter_t *sc)
1029 {
1030 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1031 return (0);
1032 }
1033
1034 int
1035 t3_sge_init_port(struct port_info *pi)
1036 {
1037 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
1038 return (0);
1039 }
1040
1041 /**
1042 * refill_rspq - replenish an SGE response queue
1043 * @adapter: the adapter
1044 * @q: the response queue to replenish
1045 * @credits: how many new responses to make available
1046 *
1047 * Replenishes a response queue by making the supplied number of responses
1048 * available to HW.
1049 */
1050 static __inline void
1051 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
1052 {
1053
1054 /* mbufs are allocated on demand when a rspq entry is processed. */
1055 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
1056 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
1057 }
1058
1059 static void
1060 sge_txq_reclaim_handler(void *arg, int ncount)
1061 {
1062 struct sge_qset *qs = arg;
1063 int i;
1064
1065 for (i = 0; i < 3; i++)
1066 reclaim_completed_tx(qs, 16, i);
1067 }
1068
1069 static void
1070 sge_timer_reclaim(void *arg, int ncount)
1071 {
1072 struct port_info *pi = arg;
1073 int i, nqsets = pi->nqsets;
1074 adapter_t *sc = pi->adapter;
1075 struct sge_qset *qs;
1076 struct mtx *lock;
1077
1078 KASSERT((sc->flags & USING_MSIX) == 0,
1079 ("can't call timer reclaim for msi-x"));
1080
1081 for (i = 0; i < nqsets; i++) {
1082 qs = &sc->sge.qs[pi->first_qset + i];
1083
1084 reclaim_completed_tx(qs, 16, TXQ_OFLD);
1085 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
1086 &sc->sge.qs[0].rspq.lock;
1087
1088 if (mtx_trylock(lock)) {
1089 /* XXX currently assume that we are *NOT* polling */
1090 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
1091
1092 if (qs->fl[0].credits < qs->fl[0].size - 16)
1093 __refill_fl(sc, &qs->fl[0]);
1094 if (qs->fl[1].credits < qs->fl[1].size - 16)
1095 __refill_fl(sc, &qs->fl[1]);
1096
1097 if (status & (1 << qs->rspq.cntxt_id)) {
1098 if (qs->rspq.credits) {
1099 refill_rspq(sc, &qs->rspq, 1);
1100 qs->rspq.credits--;
1101 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
1102 1 << qs->rspq.cntxt_id);
1103 }
1104 }
1105 mtx_unlock(lock);
1106 }
1107 }
1108 }
1109
1110 /**
1111 * init_qset_cntxt - initialize an SGE queue set context info
1112 * @qs: the queue set
1113 * @id: the queue set id
1114 *
1115 * Initializes the TIDs and context ids for the queues of a queue set.
1116 */
1117 static void
1118 init_qset_cntxt(struct sge_qset *qs, u_int id)
1119 {
1120
1121 qs->rspq.cntxt_id = id;
1122 qs->fl[0].cntxt_id = 2 * id;
1123 qs->fl[1].cntxt_id = 2 * id + 1;
1124 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
1125 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
1126 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
1127 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
1128 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
1129
1130 /* XXX: a sane limit is needed instead of INT_MAX */
1131 mbufq_init(&qs->txq[TXQ_ETH].sendq, INT_MAX);
1132 mbufq_init(&qs->txq[TXQ_OFLD].sendq, INT_MAX);
1133 mbufq_init(&qs->txq[TXQ_CTRL].sendq, INT_MAX);
1134 }
1135
1136
1137 static void
1138 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
1139 {
1140 txq->in_use += ndesc;
1141 /*
1142 * XXX we don't handle stopping of queue
1143 * presumably start handles this when we bump against the end
1144 */
1145 txqs->gen = txq->gen;
1146 txq->unacked += ndesc;
1147 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
1148 txq->unacked &= 31;
1149 txqs->pidx = txq->pidx;
1150 txq->pidx += ndesc;
1151 #ifdef INVARIANTS
1152 if (((txqs->pidx > txq->cidx) &&
1153 (txq->pidx < txqs->pidx) &&
1154 (txq->pidx >= txq->cidx)) ||
1155 ((txqs->pidx < txq->cidx) &&
1156 (txq->pidx >= txq-> cidx)) ||
1157 ((txqs->pidx < txq->cidx) &&
1158 (txq->cidx < txqs->pidx)))
1159 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
1160 txqs->pidx, txq->pidx, txq->cidx);
1161 #endif
1162 if (txq->pidx >= txq->size) {
1163 txq->pidx -= txq->size;
1164 txq->gen ^= 1;
1165 }
1166
1167 }
1168
1169 /**
1170 * calc_tx_descs - calculate the number of Tx descriptors for a packet
1171 * @m: the packet mbufs
1172 * @nsegs: the number of segments
1173 *
1174 * Returns the number of Tx descriptors needed for the given Ethernet
1175 * packet. Ethernet packets require addition of WR and CPL headers.
1176 */
1177 static __inline unsigned int
1178 calc_tx_descs(const struct mbuf *m, int nsegs)
1179 {
1180 unsigned int flits;
1181
1182 if (m->m_pkthdr.len <= PIO_LEN)
1183 return 1;
1184
1185 flits = sgl_len(nsegs) + 2;
1186 if (m->m_pkthdr.csum_flags & CSUM_TSO)
1187 flits++;
1188
1189 return flits_to_desc(flits);
1190 }
1191
1192 /**
1193 * make_sgl - populate a scatter/gather list for a packet
1194 * @sgp: the SGL to populate
1195 * @segs: the packet dma segments
1196 * @nsegs: the number of segments
1197 *
1198 * Generates a scatter/gather list for the buffers that make up a packet
1199 * and returns the SGL size in 8-byte words. The caller must size the SGL
1200 * appropriately.
1201 */
1202 static __inline void
1203 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1204 {
1205 int i, idx;
1206
1207 for (idx = 0, i = 0; i < nsegs; i++) {
1208 /*
1209 * firmware doesn't like empty segments
1210 */
1211 if (segs[i].ds_len == 0)
1212 continue;
1213 if (i && idx == 0)
1214 ++sgp;
1215
1216 sgp->len[idx] = htobe32(segs[i].ds_len);
1217 sgp->addr[idx] = htobe64(segs[i].ds_addr);
1218 idx ^= 1;
1219 }
1220
1221 if (idx) {
1222 sgp->len[idx] = 0;
1223 sgp->addr[idx] = 0;
1224 }
1225 }
1226
1227 /**
1228 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1229 * @adap: the adapter
1230 * @q: the Tx queue
1231 *
1232 * Ring the doorbell if a Tx queue is asleep. There is a natural race,
1233 * where the HW is going to sleep just after we checked, however,
1234 * then the interrupt handler will detect the outstanding TX packet
1235 * and ring the doorbell for us.
1236 *
1237 * When GTS is disabled we unconditionally ring the doorbell.
1238 */
1239 static __inline void
1240 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
1241 {
1242 #if USE_GTS
1243 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1244 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1245 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1246 #ifdef T3_TRACE
1247 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1248 q->cntxt_id);
1249 #endif
1250 t3_write_reg(adap, A_SG_KDOORBELL,
1251 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1252 }
1253 #else
1254 if (mustring || ++q->db_pending >= 32) {
1255 wmb(); /* write descriptors before telling HW */
1256 t3_write_reg(adap, A_SG_KDOORBELL,
1257 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1258 q->db_pending = 0;
1259 }
1260 #endif
1261 }
1262
1263 static __inline void
1264 wr_gen2(struct tx_desc *d, unsigned int gen)
1265 {
1266 #if SGE_NUM_GENBITS == 2
1267 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1268 #endif
1269 }
1270
1271 /**
1272 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1273 * @ndesc: number of Tx descriptors spanned by the SGL
1274 * @txd: first Tx descriptor to be written
1275 * @txqs: txq state (generation and producer index)
1276 * @txq: the SGE Tx queue
1277 * @sgl: the SGL
1278 * @flits: number of flits to the start of the SGL in the first descriptor
1279 * @sgl_flits: the SGL size in flits
1280 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1281 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1282 *
1283 * Write a work request header and an associated SGL. If the SGL is
1284 * small enough to fit into one Tx descriptor it has already been written
1285 * and we just need to write the WR header. Otherwise we distribute the
1286 * SGL across the number of descriptors it spans.
1287 */
1288 static void
1289 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1290 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1291 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1292 {
1293
1294 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1295
1296 if (__predict_true(ndesc == 1)) {
1297 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1298 V_WR_SGLSFLT(flits)) | wr_hi,
1299 htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) |
1300 wr_lo);
1301
1302 wr_gen2(txd, txqs->gen);
1303
1304 } else {
1305 unsigned int ogen = txqs->gen;
1306 const uint64_t *fp = (const uint64_t *)sgl;
1307 struct work_request_hdr *wp = wrp;
1308
1309 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1310 V_WR_SGLSFLT(flits)) | wr_hi;
1311
1312 while (sgl_flits) {
1313 unsigned int avail = WR_FLITS - flits;
1314
1315 if (avail > sgl_flits)
1316 avail = sgl_flits;
1317 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1318 sgl_flits -= avail;
1319 ndesc--;
1320 if (!sgl_flits)
1321 break;
1322
1323 fp += avail;
1324 txd++;
1325 if (++txqs->pidx == txq->size) {
1326 txqs->pidx = 0;
1327 txqs->gen ^= 1;
1328 txd = txq->desc;
1329 }
1330
1331 /*
1332 * when the head of the mbuf chain
1333 * is freed all clusters will be freed
1334 * with it
1335 */
1336 wrp = (struct work_request_hdr *)txd;
1337 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
1338 V_WR_SGLSFLT(1)) | wr_hi;
1339 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
1340 sgl_flits + 1)) |
1341 V_WR_GEN(txqs->gen)) | wr_lo;
1342 wr_gen2(txd, txqs->gen);
1343 flits = 1;
1344 }
1345 wrp->wrh_hi |= htonl(F_WR_EOP);
1346 wmb();
1347 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1348 wr_gen2((struct tx_desc *)wp, ogen);
1349 }
1350 }
1351
1352 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
1353 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
1354
1355 #define GET_VTAG(cntrl, m) \
1356 do { \
1357 if ((m)->m_flags & M_VLANTAG) \
1358 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1359 } while (0)
1360
1361 static int
1362 t3_encap(struct sge_qset *qs, struct mbuf **m)
1363 {
1364 adapter_t *sc;
1365 struct mbuf *m0;
1366 struct sge_txq *txq;
1367 struct txq_state txqs;
1368 struct port_info *pi;
1369 unsigned int ndesc, flits, cntrl, mlen;
1370 int err, nsegs, tso_info = 0;
1371
1372 struct work_request_hdr *wrp;
1373 struct tx_sw_desc *txsd;
1374 struct sg_ent *sgp, *sgl;
1375 uint32_t wr_hi, wr_lo, sgl_flits;
1376 bus_dma_segment_t segs[TX_MAX_SEGS];
1377
1378 struct tx_desc *txd;
1379
1380 pi = qs->port;
1381 sc = pi->adapter;
1382 txq = &qs->txq[TXQ_ETH];
1383 txd = &txq->desc[txq->pidx];
1384 txsd = &txq->sdesc[txq->pidx];
1385 sgl = txq->txq_sgl;
1386
1387 prefetch(txd);
1388 m0 = *m;
1389
1390 mtx_assert(&qs->lock, MA_OWNED);
1391 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1392 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
1393
1394 if (m0->m_nextpkt == NULL && m0->m_next != NULL &&
1395 m0->m_pkthdr.csum_flags & (CSUM_TSO))
1396 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1397
1398 if (m0->m_nextpkt != NULL) {
1399 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
1400 ndesc = 1;
1401 mlen = 0;
1402 } else {
1403 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
1404 &m0, segs, &nsegs))) {
1405 if (cxgb_debug)
1406 printf("failed ... err=%d\n", err);
1407 return (err);
1408 }
1409 mlen = m0->m_pkthdr.len;
1410 ndesc = calc_tx_descs(m0, nsegs);
1411 }
1412 txq_prod(txq, ndesc, &txqs);
1413
1414 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
1415 txsd->m = m0;
1416
1417 if (m0->m_nextpkt != NULL) {
1418 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1419 int i, fidx;
1420
1421 if (nsegs > 7)
1422 panic("trying to coalesce %d packets in to one WR", nsegs);
1423 txq->txq_coalesced += nsegs;
1424 wrp = (struct work_request_hdr *)txd;
1425 flits = nsegs*2 + 1;
1426
1427 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
1428 struct cpl_tx_pkt_batch_entry *cbe;
1429 uint64_t flit;
1430 uint32_t *hflit = (uint32_t *)&flit;
1431 int cflags = m0->m_pkthdr.csum_flags;
1432
1433 cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1434 GET_VTAG(cntrl, m0);
1435 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1436 if (__predict_false(!(cflags & CSUM_IP)))
1437 cntrl |= F_TXPKT_IPCSUM_DIS;
1438 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP |
1439 CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
1440 cntrl |= F_TXPKT_L4CSUM_DIS;
1441
1442 hflit[0] = htonl(cntrl);
1443 hflit[1] = htonl(segs[i].ds_len | 0x80000000);
1444 flit |= htobe64(1 << 24);
1445 cbe = &cpl_batch->pkt_entry[i];
1446 cbe->cntrl = hflit[0];
1447 cbe->len = hflit[1];
1448 cbe->addr = htobe64(segs[i].ds_addr);
1449 }
1450
1451 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1452 V_WR_SGLSFLT(flits)) |
1453 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1454 wr_lo = htonl(V_WR_LEN(flits) |
1455 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1456 set_wr_hdr(wrp, wr_hi, wr_lo);
1457 wmb();
1458 ETHER_BPF_MTAP(pi->ifp, m0);
1459 wr_gen2(txd, txqs.gen);
1460 check_ring_tx_db(sc, txq, 0);
1461 return (0);
1462 } else if (tso_info) {
1463 uint16_t eth_type;
1464 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1465 struct ether_header *eh;
1466 void *l3hdr;
1467 struct tcphdr *tcp;
1468
1469 txd->flit[2] = 0;
1470 GET_VTAG(cntrl, m0);
1471 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1472 hdr->cntrl = htonl(cntrl);
1473 hdr->len = htonl(mlen | 0x80000000);
1474
1475 if (__predict_false(mlen < TCPPKTHDRSIZE)) {
1476 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x",
1477 m0, mlen, m0->m_pkthdr.tso_segsz,
1478 (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags);
1479 panic("tx tso packet too small");
1480 }
1481
1482 /* Make sure that ether, ip, tcp headers are all in m0 */
1483 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1484 m0 = m_pullup(m0, TCPPKTHDRSIZE);
1485 if (__predict_false(m0 == NULL)) {
1486 /* XXX panic probably an overreaction */
1487 panic("couldn't fit header into mbuf");
1488 }
1489 }
1490
1491 eh = mtod(m0, struct ether_header *);
1492 eth_type = eh->ether_type;
1493 if (eth_type == htons(ETHERTYPE_VLAN)) {
1494 struct ether_vlan_header *evh = (void *)eh;
1495
1496 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN);
1497 l3hdr = evh + 1;
1498 eth_type = evh->evl_proto;
1499 } else {
1500 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II);
1501 l3hdr = eh + 1;
1502 }
1503
1504 if (eth_type == htons(ETHERTYPE_IP)) {
1505 struct ip *ip = l3hdr;
1506
1507 tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl);
1508 tcp = (struct tcphdr *)(ip + 1);
1509 } else if (eth_type == htons(ETHERTYPE_IPV6)) {
1510 struct ip6_hdr *ip6 = l3hdr;
1511
1512 KASSERT(ip6->ip6_nxt == IPPROTO_TCP,
1513 ("%s: CSUM_TSO with ip6_nxt %d",
1514 __func__, ip6->ip6_nxt));
1515
1516 tso_info |= F_LSO_IPV6;
1517 tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2);
1518 tcp = (struct tcphdr *)(ip6 + 1);
1519 } else
1520 panic("%s: CSUM_TSO but neither ip nor ip6", __func__);
1521
1522 tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off);
1523 hdr->lso_info = htonl(tso_info);
1524
1525 if (__predict_false(mlen <= PIO_LEN)) {
1526 /*
1527 * pkt not undersized but fits in PIO_LEN
1528 * Indicates a TSO bug at the higher levels.
1529 */
1530 txsd->m = NULL;
1531 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1532 flits = (mlen + 7) / 8 + 3;
1533 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1534 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1535 F_WR_SOP | F_WR_EOP | txqs.compl);
1536 wr_lo = htonl(V_WR_LEN(flits) |
1537 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1538 set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
1539 wmb();
1540 ETHER_BPF_MTAP(pi->ifp, m0);
1541 wr_gen2(txd, txqs.gen);
1542 check_ring_tx_db(sc, txq, 0);
1543 m_freem(m0);
1544 return (0);
1545 }
1546 flits = 3;
1547 } else {
1548 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1549
1550 GET_VTAG(cntrl, m0);
1551 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1552 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1553 cntrl |= F_TXPKT_IPCSUM_DIS;
1554 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP |
1555 CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
1556 cntrl |= F_TXPKT_L4CSUM_DIS;
1557 cpl->cntrl = htonl(cntrl);
1558 cpl->len = htonl(mlen | 0x80000000);
1559
1560 if (mlen <= PIO_LEN) {
1561 txsd->m = NULL;
1562 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1563 flits = (mlen + 7) / 8 + 2;
1564
1565 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1566 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1567 F_WR_SOP | F_WR_EOP | txqs.compl);
1568 wr_lo = htonl(V_WR_LEN(flits) |
1569 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1570 set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
1571 wmb();
1572 ETHER_BPF_MTAP(pi->ifp, m0);
1573 wr_gen2(txd, txqs.gen);
1574 check_ring_tx_db(sc, txq, 0);
1575 m_freem(m0);
1576 return (0);
1577 }
1578 flits = 2;
1579 }
1580 wrp = (struct work_request_hdr *)txd;
1581 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1582 make_sgl(sgp, segs, nsegs);
1583
1584 sgl_flits = sgl_len(nsegs);
1585
1586 ETHER_BPF_MTAP(pi->ifp, m0);
1587
1588 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
1589 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1590 wr_lo = htonl(V_WR_TID(txq->token));
1591 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
1592 sgl_flits, wr_hi, wr_lo);
1593 check_ring_tx_db(sc, txq, 0);
1594
1595 return (0);
1596 }
1597
1598 #ifdef NETDUMP
1599 int
1600 cxgb_netdump_encap(struct sge_qset *qs, struct mbuf **m)
1601 {
1602 int error;
1603
1604 error = t3_encap(qs, m);
1605 if (error == 0)
1606 check_ring_tx_db(qs->port->adapter, &qs->txq[TXQ_ETH], 1);
1607 else if (*m != NULL) {
1608 m_freem(*m);
1609 *m = NULL;
1610 }
1611 return (error);
1612 }
1613 #endif
1614
1615 void
1616 cxgb_tx_watchdog(void *arg)
1617 {
1618 struct sge_qset *qs = arg;
1619 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1620
1621 if (qs->coalescing != 0 &&
1622 (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
1623 TXQ_RING_EMPTY(qs))
1624 qs->coalescing = 0;
1625 else if (qs->coalescing == 0 &&
1626 (txq->in_use >= cxgb_tx_coalesce_enable_start))
1627 qs->coalescing = 1;
1628 if (TXQ_TRYLOCK(qs)) {
1629 qs->qs_flags |= QS_FLUSHING;
1630 cxgb_start_locked(qs);
1631 qs->qs_flags &= ~QS_FLUSHING;
1632 TXQ_UNLOCK(qs);
1633 }
1634 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
1635 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
1636 qs, txq->txq_watchdog.c_cpu);
1637 }
1638
1639 static void
1640 cxgb_tx_timeout(void *arg)
1641 {
1642 struct sge_qset *qs = arg;
1643 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1644
1645 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
1646 qs->coalescing = 1;
1647 if (TXQ_TRYLOCK(qs)) {
1648 qs->qs_flags |= QS_TIMEOUT;
1649 cxgb_start_locked(qs);
1650 qs->qs_flags &= ~QS_TIMEOUT;
1651 TXQ_UNLOCK(qs);
1652 }
1653 }
1654
1655 static void
1656 cxgb_start_locked(struct sge_qset *qs)
1657 {
1658 struct mbuf *m_head = NULL;
1659 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1660 struct port_info *pi = qs->port;
1661 struct ifnet *ifp = pi->ifp;
1662
1663 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
1664 reclaim_completed_tx(qs, 0, TXQ_ETH);
1665
1666 if (!pi->link_config.link_ok) {
1667 TXQ_RING_FLUSH(qs);
1668 return;
1669 }
1670 TXQ_LOCK_ASSERT(qs);
1671 while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
1672 pi->link_config.link_ok) {
1673 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1674
1675 if (txq->size - txq->in_use <= TX_MAX_DESC)
1676 break;
1677
1678 if ((m_head = cxgb_dequeue(qs)) == NULL)
1679 break;
1680 /*
1681 * Encapsulation can modify our pointer, and or make it
1682 * NULL on failure. In that event, we can't requeue.
1683 */
1684 if (t3_encap(qs, &m_head) || m_head == NULL)
1685 break;
1686
1687 m_head = NULL;
1688 }
1689
1690 if (txq->db_pending)
1691 check_ring_tx_db(pi->adapter, txq, 1);
1692
1693 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
1694 pi->link_config.link_ok)
1695 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1696 qs, txq->txq_timer.c_cpu);
1697 if (m_head != NULL)
1698 m_freem(m_head);
1699 }
1700
1701 static int
1702 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
1703 {
1704 struct port_info *pi = qs->port;
1705 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1706 struct buf_ring *br = txq->txq_mr;
1707 int error, avail;
1708
1709 avail = txq->size - txq->in_use;
1710 TXQ_LOCK_ASSERT(qs);
1711
1712 /*
1713 * We can only do a direct transmit if the following are true:
1714 * - we aren't coalescing (ring < 3/4 full)
1715 * - the link is up -- checked in caller
1716 * - there are no packets enqueued already
1717 * - there is space in hardware transmit queue
1718 */
1719 if (check_pkt_coalesce(qs) == 0 &&
1720 !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
1721 if (t3_encap(qs, &m)) {
1722 if (m != NULL &&
1723 (error = drbr_enqueue(ifp, br, m)) != 0)
1724 return (error);
1725 } else {
1726 if (txq->db_pending)
1727 check_ring_tx_db(pi->adapter, txq, 1);
1728
1729 /*
1730 * We've bypassed the buf ring so we need to update
1731 * the stats directly
1732 */
1733 txq->txq_direct_packets++;
1734 txq->txq_direct_bytes += m->m_pkthdr.len;
1735 }
1736 } else if ((error = drbr_enqueue(ifp, br, m)) != 0)
1737 return (error);
1738
1739 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1740 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
1741 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
1742 cxgb_start_locked(qs);
1743 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
1744 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1745 qs, txq->txq_timer.c_cpu);
1746 return (0);
1747 }
1748
1749 int
1750 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
1751 {
1752 struct sge_qset *qs;
1753 struct port_info *pi = ifp->if_softc;
1754 int error, qidx = pi->first_qset;
1755
1756 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
1757 ||(!pi->link_config.link_ok)) {
1758 m_freem(m);
1759 return (0);
1760 }
1761
1762 /* check if flowid is set */
1763 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1764 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
1765
1766 qs = &pi->adapter->sge.qs[qidx];
1767
1768 if (TXQ_TRYLOCK(qs)) {
1769 /* XXX running */
1770 error = cxgb_transmit_locked(ifp, qs, m);
1771 TXQ_UNLOCK(qs);
1772 } else
1773 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
1774 return (error);
1775 }
1776
1777 void
1778 cxgb_qflush(struct ifnet *ifp)
1779 {
1780 /*
1781 * flush any enqueued mbufs in the buf_rings
1782 * and in the transmit queues
1783 * no-op for now
1784 */
1785 return;
1786 }
1787
1788 /**
1789 * write_imm - write a packet into a Tx descriptor as immediate data
1790 * @d: the Tx descriptor to write
1791 * @m: the packet
1792 * @len: the length of packet data to write as immediate data
1793 * @gen: the generation bit value to write
1794 *
1795 * Writes a packet as immediate data into a Tx descriptor. The packet
1796 * contains a work request at its beginning. We must write the packet
1797 * carefully so the SGE doesn't read accidentally before it's written in
1798 * its entirety.
1799 */
1800 static __inline void
1801 write_imm(struct tx_desc *d, caddr_t src,
1802 unsigned int len, unsigned int gen)
1803 {
1804 struct work_request_hdr *from = (struct work_request_hdr *)src;
1805 struct work_request_hdr *to = (struct work_request_hdr *)d;
1806 uint32_t wr_hi, wr_lo;
1807
1808 KASSERT(len <= WR_LEN && len >= sizeof(*from),
1809 ("%s: invalid len %d", __func__, len));
1810
1811 memcpy(&to[1], &from[1], len - sizeof(*from));
1812 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
1813 V_WR_BCNTLFLT(len & 7));
1814 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8));
1815 set_wr_hdr(to, wr_hi, wr_lo);
1816 wmb();
1817 wr_gen2(d, gen);
1818 }
1819
1820 /**
1821 * check_desc_avail - check descriptor availability on a send queue
1822 * @adap: the adapter
1823 * @q: the TX queue
1824 * @m: the packet needing the descriptors
1825 * @ndesc: the number of Tx descriptors needed
1826 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1827 *
1828 * Checks if the requested number of Tx descriptors is available on an
1829 * SGE send queue. If the queue is already suspended or not enough
1830 * descriptors are available the packet is queued for later transmission.
1831 * Must be called with the Tx queue locked.
1832 *
1833 * Returns 0 if enough descriptors are available, 1 if there aren't
1834 * enough descriptors and the packet has been queued, and 2 if the caller
1835 * needs to retry because there weren't enough descriptors at the
1836 * beginning of the call but some freed up in the mean time.
1837 */
1838 static __inline int
1839 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1840 struct mbuf *m, unsigned int ndesc,
1841 unsigned int qid)
1842 {
1843 /*
1844 * XXX We currently only use this for checking the control queue
1845 * the control queue is only used for binding qsets which happens
1846 * at init time so we are guaranteed enough descriptors
1847 */
1848 if (__predict_false(mbufq_len(&q->sendq))) {
1849 addq_exit: (void )mbufq_enqueue(&q->sendq, m);
1850 return 1;
1851 }
1852 if (__predict_false(q->size - q->in_use < ndesc)) {
1853
1854 struct sge_qset *qs = txq_to_qset(q, qid);
1855
1856 setbit(&qs->txq_stopped, qid);
1857 if (should_restart_tx(q) &&
1858 test_and_clear_bit(qid, &qs->txq_stopped))
1859 return 2;
1860
1861 q->stops++;
1862 goto addq_exit;
1863 }
1864 return 0;
1865 }
1866
1867
1868 /**
1869 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1870 * @q: the SGE control Tx queue
1871 *
1872 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1873 * that send only immediate data (presently just the control queues) and
1874 * thus do not have any mbufs
1875 */
1876 static __inline void
1877 reclaim_completed_tx_imm(struct sge_txq *q)
1878 {
1879 unsigned int reclaim = q->processed - q->cleaned;
1880
1881 q->in_use -= reclaim;
1882 q->cleaned += reclaim;
1883 }
1884
1885 /**
1886 * ctrl_xmit - send a packet through an SGE control Tx queue
1887 * @adap: the adapter
1888 * @q: the control queue
1889 * @m: the packet
1890 *
1891 * Send a packet through an SGE control Tx queue. Packets sent through
1892 * a control queue must fit entirely as immediate data in a single Tx
1893 * descriptor and have no page fragments.
1894 */
1895 static int
1896 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
1897 {
1898 int ret;
1899 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1900 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1901
1902 KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__));
1903
1904 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
1905 wrp->wrh_lo = htonl(V_WR_TID(q->token));
1906
1907 TXQ_LOCK(qs);
1908 again: reclaim_completed_tx_imm(q);
1909
1910 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1911 if (__predict_false(ret)) {
1912 if (ret == 1) {
1913 TXQ_UNLOCK(qs);
1914 return (ENOSPC);
1915 }
1916 goto again;
1917 }
1918 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
1919
1920 q->in_use++;
1921 if (++q->pidx >= q->size) {
1922 q->pidx = 0;
1923 q->gen ^= 1;
1924 }
1925 TXQ_UNLOCK(qs);
1926 wmb();
1927 t3_write_reg(adap, A_SG_KDOORBELL,
1928 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1929
1930 m_free(m);
1931 return (0);
1932 }
1933
1934
1935 /**
1936 * restart_ctrlq - restart a suspended control queue
1937 * @qs: the queue set cotaining the control queue
1938 *
1939 * Resumes transmission on a suspended Tx control queue.
1940 */
1941 static void
1942 restart_ctrlq(void *data, int npending)
1943 {
1944 struct mbuf *m;
1945 struct sge_qset *qs = (struct sge_qset *)data;
1946 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1947 adapter_t *adap = qs->port->adapter;
1948
1949 TXQ_LOCK(qs);
1950 again: reclaim_completed_tx_imm(q);
1951
1952 while (q->in_use < q->size &&
1953 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1954
1955 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
1956 m_free(m);
1957
1958 if (++q->pidx >= q->size) {
1959 q->pidx = 0;
1960 q->gen ^= 1;
1961 }
1962 q->in_use++;
1963 }
1964 if (mbufq_len(&q->sendq)) {
1965 setbit(&qs->txq_stopped, TXQ_CTRL);
1966
1967 if (should_restart_tx(q) &&
1968 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1969 goto again;
1970 q->stops++;
1971 }
1972 TXQ_UNLOCK(qs);
1973 t3_write_reg(adap, A_SG_KDOORBELL,
1974 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1975 }
1976
1977
1978 /*
1979 * Send a management message through control queue 0
1980 */
1981 int
1982 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1983 {
1984 return ctrl_xmit(adap, &adap->sge.qs[0], m);
1985 }
1986
1987 /**
1988 * free_qset - free the resources of an SGE queue set
1989 * @sc: the controller owning the queue set
1990 * @q: the queue set
1991 *
1992 * Release the HW and SW resources associated with an SGE queue set, such
1993 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
1994 * queue set must be quiesced prior to calling this.
1995 */
1996 static void
1997 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1998 {
1999 int i;
2000
2001 reclaim_completed_tx(q, 0, TXQ_ETH);
2002 if (q->txq[TXQ_ETH].txq_mr != NULL)
2003 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
2004 if (q->txq[TXQ_ETH].txq_ifq != NULL) {
2005 ifq_delete(q->txq[TXQ_ETH].txq_ifq);
2006 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
2007 }
2008
2009 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2010 if (q->fl[i].desc) {
2011 mtx_lock_spin(&sc->sge.reg_lock);
2012 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
2013 mtx_unlock_spin(&sc->sge.reg_lock);
2014 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
2015 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
2016 q->fl[i].desc_map);
2017 bus_dma_tag_destroy(q->fl[i].desc_tag);
2018 bus_dma_tag_destroy(q->fl[i].entry_tag);
2019 }
2020 if (q->fl[i].sdesc) {
2021 free_rx_bufs(sc, &q->fl[i]);
2022 free(q->fl[i].sdesc, M_DEVBUF);
2023 }
2024 }
2025
2026 mtx_unlock(&q->lock);
2027 MTX_DESTROY(&q->lock);
2028 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2029 if (q->txq[i].desc) {
2030 mtx_lock_spin(&sc->sge.reg_lock);
2031 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
2032 mtx_unlock_spin(&sc->sge.reg_lock);
2033 bus_dmamap_unload(q->txq[i].desc_tag,
2034 q->txq[i].desc_map);
2035 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
2036 q->txq[i].desc_map);
2037 bus_dma_tag_destroy(q->txq[i].desc_tag);
2038 bus_dma_tag_destroy(q->txq[i].entry_tag);
2039 }
2040 if (q->txq[i].sdesc) {
2041 free(q->txq[i].sdesc, M_DEVBUF);
2042 }
2043 }
2044
2045 if (q->rspq.desc) {
2046 mtx_lock_spin(&sc->sge.reg_lock);
2047 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
2048 mtx_unlock_spin(&sc->sge.reg_lock);
2049
2050 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
2051 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
2052 q->rspq.desc_map);
2053 bus_dma_tag_destroy(q->rspq.desc_tag);
2054 MTX_DESTROY(&q->rspq.lock);
2055 }
2056
2057 #if defined(INET6) || defined(INET)
2058 tcp_lro_free(&q->lro.ctrl);
2059 #endif
2060
2061 bzero(q, sizeof(*q));
2062 }
2063
2064 /**
2065 * t3_free_sge_resources - free SGE resources
2066 * @sc: the adapter softc
2067 *
2068 * Frees resources used by the SGE queue sets.
2069 */
2070 void
2071 t3_free_sge_resources(adapter_t *sc, int nqsets)
2072 {
2073 int i;
2074
2075 for (i = 0; i < nqsets; ++i) {
2076 TXQ_LOCK(&sc->sge.qs[i]);
2077 t3_free_qset(sc, &sc->sge.qs[i]);
2078 }
2079 }
2080
2081 /**
2082 * t3_sge_start - enable SGE
2083 * @sc: the controller softc
2084 *
2085 * Enables the SGE for DMAs. This is the last step in starting packet
2086 * transfers.
2087 */
2088 void
2089 t3_sge_start(adapter_t *sc)
2090 {
2091 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2092 }
2093
2094 /**
2095 * t3_sge_stop - disable SGE operation
2096 * @sc: the adapter
2097 *
2098 * Disables the DMA engine. This can be called in emeregencies (e.g.,
2099 * from error interrupts) or from normal process context. In the latter
2100 * case it also disables any pending queue restart tasklets. Note that
2101 * if it is called in interrupt context it cannot disable the restart
2102 * tasklets as it cannot wait, however the tasklets will have no effect
2103 * since the doorbells are disabled and the driver will call this again
2104 * later from process context, at which time the tasklets will be stopped
2105 * if they are still running.
2106 */
2107 void
2108 t3_sge_stop(adapter_t *sc)
2109 {
2110 int i, nqsets;
2111
2112 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
2113
2114 if (sc->tq == NULL)
2115 return;
2116
2117 for (nqsets = i = 0; i < (sc)->params.nports; i++)
2118 nqsets += sc->port[i].nqsets;
2119 #ifdef notyet
2120 /*
2121 *
2122 * XXX
2123 */
2124 for (i = 0; i < nqsets; ++i) {
2125 struct sge_qset *qs = &sc->sge.qs[i];
2126
2127 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2128 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2129 }
2130 #endif
2131 }
2132
2133 /**
2134 * t3_free_tx_desc - reclaims Tx descriptors and their buffers
2135 * @adapter: the adapter
2136 * @q: the Tx queue to reclaim descriptors from
2137 * @reclaimable: the number of descriptors to reclaim
2138 * @m_vec_size: maximum number of buffers to reclaim
2139 * @desc_reclaimed: returns the number of descriptors reclaimed
2140 *
2141 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
2142 * Tx buffers. Called with the Tx queue lock held.
2143 *
2144 * Returns number of buffers of reclaimed
2145 */
2146 void
2147 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
2148 {
2149 struct tx_sw_desc *txsd;
2150 unsigned int cidx, mask;
2151 struct sge_txq *q = &qs->txq[queue];
2152
2153 #ifdef T3_TRACE
2154 T3_TRACE2(sc->tb[q->cntxt_id & 7],
2155 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
2156 #endif
2157 cidx = q->cidx;
2158 mask = q->size - 1;
2159 txsd = &q->sdesc[cidx];
2160
2161 mtx_assert(&qs->lock, MA_OWNED);
2162 while (reclaimable--) {
2163 prefetch(q->sdesc[(cidx + 1) & mask].m);
2164 prefetch(q->sdesc[(cidx + 2) & mask].m);
2165
2166 if (txsd->m != NULL) {
2167 if (txsd->flags & TX_SW_DESC_MAPPED) {
2168 bus_dmamap_unload(q->entry_tag, txsd->map);
2169 txsd->flags &= ~TX_SW_DESC_MAPPED;
2170 }
2171 m_freem_list(txsd->m);
2172 txsd->m = NULL;
2173 } else
2174 q->txq_skipped++;
2175
2176 ++txsd;
2177 if (++cidx == q->size) {
2178 cidx = 0;
2179 txsd = q->sdesc;
2180 }
2181 }
2182 q->cidx = cidx;
2183
2184 }
2185
2186 /**
2187 * is_new_response - check if a response is newly written
2188 * @r: the response descriptor
2189 * @q: the response queue
2190 *
2191 * Returns true if a response descriptor contains a yet unprocessed
2192 * response.
2193 */
2194 static __inline int
2195 is_new_response(const struct rsp_desc *r,
2196 const struct sge_rspq *q)
2197 {
2198 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2199 }
2200
2201 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2202 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2203 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2204 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2205 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2206
2207 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2208 #define NOMEM_INTR_DELAY 2500
2209
2210 #ifdef TCP_OFFLOAD
2211 /**
2212 * write_ofld_wr - write an offload work request
2213 * @adap: the adapter
2214 * @m: the packet to send
2215 * @q: the Tx queue
2216 * @pidx: index of the first Tx descriptor to write
2217 * @gen: the generation value to use
2218 * @ndesc: number of descriptors the packet will occupy
2219 *
2220 * Write an offload work request to send the supplied packet. The packet
2221 * data already carry the work request with most fields populated.
2222 */
2223 static void
2224 write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q,
2225 unsigned int pidx, unsigned int gen, unsigned int ndesc)
2226 {
2227 unsigned int sgl_flits, flits;
2228 int i, idx, nsegs, wrlen;
2229 struct work_request_hdr *from;
2230 struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1];
2231 struct tx_desc *d = &q->desc[pidx];
2232 struct txq_state txqs;
2233 struct sglist_seg *segs;
2234 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2235 struct sglist *sgl;
2236
2237 from = (void *)(oh + 1); /* Start of WR within mbuf */
2238 wrlen = m->m_len - sizeof(*oh);
2239
2240 if (!(oh->flags & F_HDR_SGL)) {
2241 write_imm(d, (caddr_t)from, wrlen, gen);
2242
2243 /*
2244 * mbuf with "real" immediate tx data will be enqueue_wr'd by
2245 * t3_push_frames and freed in wr_ack. Others, like those sent
2246 * down by close_conn, t3_send_reset, etc. should be freed here.
2247 */
2248 if (!(oh->flags & F_HDR_DF))
2249 m_free(m);
2250 return;
2251 }
2252
2253 memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from));
2254
2255 sgl = oh->sgl;
2256 flits = wrlen / 8;
2257 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl;
2258
2259 nsegs = sgl->sg_nseg;
2260 segs = sgl->sg_segs;
2261 for (idx = 0, i = 0; i < nsegs; i++) {
2262 KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__));
2263 if (i && idx == 0)
2264 ++sgp;
2265 sgp->len[idx] = htobe32(segs[i].ss_len);
2266 sgp->addr[idx] = htobe64(segs[i].ss_paddr);
2267 idx ^= 1;
2268 }
2269 if (idx) {
2270 sgp->len[idx] = 0;
2271 sgp->addr[idx] = 0;
2272 }
2273
2274 sgl_flits = sgl_len(nsegs);
2275 txqs.gen = gen;
2276 txqs.pidx = pidx;
2277 txqs.compl = 0;
2278
2279 write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits,
2280 from->wrh_hi, from->wrh_lo);
2281 }
2282
2283 /**
2284 * ofld_xmit - send a packet through an offload queue
2285 * @adap: the adapter
2286 * @q: the Tx offload queue
2287 * @m: the packet
2288 *
2289 * Send an offload packet through an SGE offload queue.
2290 */
2291 static int
2292 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
2293 {
2294 int ret;
2295 unsigned int ndesc;
2296 unsigned int pidx, gen;
2297 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2298 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2299
2300 ndesc = G_HDR_NDESC(oh->flags);
2301
2302 TXQ_LOCK(qs);
2303 again: reclaim_completed_tx(qs, 16, TXQ_OFLD);
2304 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2305 if (__predict_false(ret)) {
2306 if (ret == 1) {
2307 TXQ_UNLOCK(qs);
2308 return (EINTR);
2309 }
2310 goto again;
2311 }
2312
2313 gen = q->gen;
2314 q->in_use += ndesc;
2315 pidx = q->pidx;
2316 q->pidx += ndesc;
2317 if (q->pidx >= q->size) {
2318 q->pidx -= q->size;
2319 q->gen ^= 1;
2320 }
2321
2322 write_ofld_wr(adap, m, q, pidx, gen, ndesc);
2323 check_ring_tx_db(adap, q, 1);
2324 TXQ_UNLOCK(qs);
2325
2326 return (0);
2327 }
2328
2329 /**
2330 * restart_offloadq - restart a suspended offload queue
2331 * @qs: the queue set cotaining the offload queue
2332 *
2333 * Resumes transmission on a suspended Tx offload queue.
2334 */
2335 static void
2336 restart_offloadq(void *data, int npending)
2337 {
2338 struct mbuf *m;
2339 struct sge_qset *qs = data;
2340 struct sge_txq *q = &qs->txq[TXQ_OFLD];
2341 adapter_t *adap = qs->port->adapter;
2342 int cleaned;
2343
2344 TXQ_LOCK(qs);
2345 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
2346
2347 while ((m = mbufq_first(&q->sendq)) != NULL) {
2348 unsigned int gen, pidx;
2349 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2350 unsigned int ndesc = G_HDR_NDESC(oh->flags);
2351
2352 if (__predict_false(q->size - q->in_use < ndesc)) {
2353 setbit(&qs->txq_stopped, TXQ_OFLD);
2354 if (should_restart_tx(q) &&
2355 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2356 goto again;
2357 q->stops++;
2358 break;
2359 }
2360
2361 gen = q->gen;
2362 q->in_use += ndesc;
2363 pidx = q->pidx;
2364 q->pidx += ndesc;
2365 if (q->pidx >= q->size) {
2366 q->pidx -= q->size;
2367 q->gen ^= 1;
2368 }
2369
2370 (void)mbufq_dequeue(&q->sendq);
2371 TXQ_UNLOCK(qs);
2372 write_ofld_wr(adap, m, q, pidx, gen, ndesc);
2373 TXQ_LOCK(qs);
2374 }
2375 #if USE_GTS
2376 set_bit(TXQ_RUNNING, &q->flags);
2377 set_bit(TXQ_LAST_PKT_DB, &q->flags);
2378 #endif
2379 TXQ_UNLOCK(qs);
2380 wmb();
2381 t3_write_reg(adap, A_SG_KDOORBELL,
2382 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2383 }
2384
2385 /**
2386 * t3_offload_tx - send an offload packet
2387 * @m: the packet
2388 *
2389 * Sends an offload packet. We use the packet priority to select the
2390 * appropriate Tx queue as follows: bit 0 indicates whether the packet
2391 * should be sent as regular or control, bits 1-3 select the queue set.
2392 */
2393 int
2394 t3_offload_tx(struct adapter *sc, struct mbuf *m)
2395 {
2396 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2397 struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)];
2398
2399 if (oh->flags & F_HDR_CTRL) {
2400 m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */
2401 return (ctrl_xmit(sc, qs, m));
2402 } else
2403 return (ofld_xmit(sc, qs, m));
2404 }
2405 #endif
2406
2407 static void
2408 restart_tx(struct sge_qset *qs)
2409 {
2410 struct adapter *sc = qs->port->adapter;
2411
2412 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2413 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2414 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2415 qs->txq[TXQ_OFLD].restarts++;
2416 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2417 }
2418
2419 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2420 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2421 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2422 qs->txq[TXQ_CTRL].restarts++;
2423 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2424 }
2425 }
2426
2427 /**
2428 * t3_sge_alloc_qset - initialize an SGE queue set
2429 * @sc: the controller softc
2430 * @id: the queue set id
2431 * @nports: how many Ethernet ports will be using this queue set
2432 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2433 * @p: configuration parameters for this queue set
2434 * @ntxq: number of Tx queues for the queue set
2435 * @pi: port info for queue set
2436 *
2437 * Allocate resources and initialize an SGE queue set. A queue set
2438 * comprises a response queue, two Rx free-buffer queues, and up to 3
2439 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2440 * queue, offload queue, and control queue.
2441 */
2442 int
2443 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2444 const struct qset_params *p, int ntxq, struct port_info *pi)
2445 {
2446 struct sge_qset *q = &sc->sge.qs[id];
2447 int i, ret = 0;
2448
2449 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
2450 q->port = pi;
2451 q->adap = sc;
2452
2453 if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
2454 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
2455 device_printf(sc->dev, "failed to allocate mbuf ring\n");
2456 goto err;
2457 }
2458 if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
2459 M_NOWAIT | M_ZERO)) == NULL) {
2460 device_printf(sc->dev, "failed to allocate ifq\n");
2461 goto err;
2462 }
2463 ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);
2464 callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
2465 callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
2466 q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
2467 q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
2468
2469 init_qset_cntxt(q, id);
2470 q->idx = id;
2471 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2472 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2473 &q->fl[0].desc, &q->fl[0].sdesc,
2474 &q->fl[0].desc_tag, &q->fl[0].desc_map,
2475 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2476 printf("error %d from alloc ring fl0\n", ret);
2477 goto err;
2478 }
2479
2480 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2481 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2482 &q->fl[1].desc, &q->fl[1].sdesc,
2483 &q->fl[1].desc_tag, &q->fl[1].desc_map,
2484 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2485 printf("error %d from alloc ring fl1\n", ret);
2486 goto err;
2487 }
2488
2489 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2490 &q->rspq.phys_addr, &q->rspq.desc, NULL,
2491 &q->rspq.desc_tag, &q->rspq.desc_map,
2492 NULL, NULL)) != 0) {
2493 printf("error %d from alloc ring rspq\n", ret);
2494 goto err;
2495 }
2496
2497 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2498 device_get_unit(sc->dev), irq_vec_idx);
2499 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2500
2501 for (i = 0; i < ntxq; ++i) {
2502 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2503
2504 if ((ret = alloc_ring(sc, p->txq_size[i],
2505 sizeof(struct tx_desc), sz,
2506 &q->txq[i].phys_addr, &q->txq[i].desc,
2507 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2508 &q->txq[i].desc_map,
2509 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2510 printf("error %d from alloc ring tx %i\n", ret, i);
2511 goto err;
2512 }
2513 mbufq_init(&q->txq[i].sendq, INT_MAX);
2514 q->txq[i].gen = 1;
2515 q->txq[i].size = p->txq_size[i];
2516 }
2517
2518 #ifdef TCP_OFFLOAD
2519 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2520 #endif
2521 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2522 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2523 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2524
2525 q->fl[0].gen = q->fl[1].gen = 1;
2526 q->fl[0].size = p->fl_size;
2527 q->fl[1].size = p->jumbo_size;
2528
2529 q->rspq.gen = 1;
2530 q->rspq.cidx = 0;
2531 q->rspq.size = p->rspq_size;
2532
2533 q->txq[TXQ_ETH].stop_thres = nports *
2534 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2535
2536 q->fl[0].buf_size = MCLBYTES;
2537 q->fl[0].zone = zone_pack;
2538 q->fl[0].type = EXT_PACKET;
2539
2540 if (p->jumbo_buf_size == MJUM16BYTES) {
2541 q->fl[1].zone = zone_jumbo16;
2542 q->fl[1].type = EXT_JUMBO16;
2543 } else if (p->jumbo_buf_size == MJUM9BYTES) {
2544 q->fl[1].zone = zone_jumbo9;
2545 q->fl[1].type = EXT_JUMBO9;
2546 } else if (p->jumbo_buf_size == MJUMPAGESIZE) {
2547 q->fl[1].zone = zone_jumbop;
2548 q->fl[1].type = EXT_JUMBOP;
2549 } else {
2550 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
2551 ret = EDOOFUS;
2552 goto err;
2553 }
2554 q->fl[1].buf_size = p->jumbo_buf_size;
2555
2556 /* Allocate and setup the lro_ctrl structure */
2557 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
2558 #if defined(INET6) || defined(INET)
2559 ret = tcp_lro_init(&q->lro.ctrl);
2560 if (ret) {
2561 printf("error %d from tcp_lro_init\n", ret);
2562 goto err;
2563 }
2564 #endif
2565 q->lro.ctrl.ifp = pi->ifp;
2566
2567 mtx_lock_spin(&sc->sge.reg_lock);
2568 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2569 q->rspq.phys_addr, q->rspq.size,
2570 q->fl[0].buf_size, 1, 0);
2571 if (ret) {
2572 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2573 goto err_unlock;
2574 }
2575
2576 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2577 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2578 q->fl[i].phys_addr, q->fl[i].size,
2579 q->fl[i].buf_size, p->cong_thres, 1,
2580 0);
2581 if (ret) {
2582 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2583 goto err_unlock;
2584 }
2585 }
2586
2587 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2588 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2589 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2590 1, 0);
2591 if (ret) {
2592 printf("error %d from t3_sge_init_ecntxt\n", ret);
2593 goto err_unlock;
2594 }
2595
2596 if (ntxq > 1) {
2597 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2598 USE_GTS, SGE_CNTXT_OFLD, id,
2599 q->txq[TXQ_OFLD].phys_addr,
2600 q->txq[TXQ_OFLD].size, 0, 1, 0);
2601 if (ret) {
2602 printf("error %d from t3_sge_init_ecntxt\n", ret);
2603 goto err_unlock;
2604 }
2605 }
2606
2607 if (ntxq > 2) {
2608 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2609 SGE_CNTXT_CTRL, id,
2610 q->txq[TXQ_CTRL].phys_addr,
2611 q->txq[TXQ_CTRL].size,
2612 q->txq[TXQ_CTRL].token, 1, 0);
2613 if (ret) {
2614 printf("error %d from t3_sge_init_ecntxt\n", ret);
2615 goto err_unlock;
2616 }
2617 }
2618
2619 mtx_unlock_spin(&sc->sge.reg_lock);
2620 t3_update_qset_coalesce(q, p);
2621
2622 refill_fl(sc, &q->fl[0], q->fl[0].size);
2623 refill_fl(sc, &q->fl[1], q->fl[1].size);
2624 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2625
2626 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2627 V_NEWTIMER(q->rspq.holdoff_tmr));
2628
2629 return (0);
2630
2631 err_unlock:
2632 mtx_unlock_spin(&sc->sge.reg_lock);
2633 err:
2634 TXQ_LOCK(q);
2635 t3_free_qset(sc, q);
2636
2637 return (ret);
2638 }
2639
2640 /*
2641 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2642 * ethernet data. Hardware assistance with various checksums and any vlan tag
2643 * will also be taken into account here.
2644 */
2645 void
2646 t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad)
2647 {
2648 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2649 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2650 struct ifnet *ifp = pi->ifp;
2651
2652 if (cpl->vlan_valid) {
2653 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2654 m->m_flags |= M_VLANTAG;
2655 }
2656
2657 m->m_pkthdr.rcvif = ifp;
2658 /*
2659 * adjust after conversion to mbuf chain
2660 */
2661 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2662 m->m_len -= (sizeof(*cpl) + ethpad);
2663 m->m_data += (sizeof(*cpl) + ethpad);
2664
2665 if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) {
2666 struct ether_header *eh = mtod(m, void *);
2667 uint16_t eh_type;
2668
2669 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2670 struct ether_vlan_header *evh = mtod(m, void *);
2671
2672 eh_type = evh->evl_proto;
2673 } else
2674 eh_type = eh->ether_type;
2675
2676 if (ifp->if_capenable & IFCAP_RXCSUM &&
2677 eh_type == htons(ETHERTYPE_IP)) {
2678 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
2679 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2680 m->m_pkthdr.csum_data = 0xffff;
2681 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
2682 eh_type == htons(ETHERTYPE_IPV6)) {
2683 m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
2684 CSUM_PSEUDO_HDR);
2685 m->m_pkthdr.csum_data = 0xffff;
2686 }
2687 }
2688 }
2689
2690 /**
2691 * get_packet - return the next ingress packet buffer from a free list
2692 * @adap: the adapter that received the packet
2693 * @drop_thres: # of remaining buffers before we start dropping packets
2694 * @qs: the qset that the SGE free list holding the packet belongs to
2695 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2696 * @r: response descriptor
2697 *
2698 * Get the next packet from a free list and complete setup of the
2699 * sk_buff. If the packet is small we make a copy and recycle the
2700 * original buffer, otherwise we use the original buffer itself. If a
2701 * positive drop threshold is supplied packets are dropped and their
2702 * buffers recycled if (a) the number of remaining buffers is under the
2703 * threshold and the packet is too big to copy, or (b) the packet should
2704 * be copied but there is no memory for the copy.
2705 */
2706 static int
2707 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2708 struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2709 {
2710
2711 unsigned int len_cq = ntohl(r->len_cq);
2712 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2713 int mask, cidx = fl->cidx;
2714 struct rx_sw_desc *sd = &fl->sdesc[cidx];
2715 uint32_t len = G_RSPD_LEN(len_cq);
2716 uint32_t flags = M_EXT;
2717 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
2718 caddr_t cl;
2719 struct mbuf *m;
2720 int ret = 0;
2721
2722 mask = fl->size - 1;
2723 prefetch(fl->sdesc[(cidx + 1) & mask].m);
2724 prefetch(fl->sdesc[(cidx + 2) & mask].m);
2725 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
2726 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl);
2727
2728 fl->credits--;
2729 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2730
2731 if (recycle_enable && len <= SGE_RX_COPY_THRES &&
2732 sopeop == RSPQ_SOP_EOP) {
2733 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
2734 goto skip_recycle;
2735 cl = mtod(m, void *);
2736 memcpy(cl, sd->rxsd_cl, len);
2737 recycle_rx_buf(adap, fl, fl->cidx);
2738 m->m_pkthdr.len = m->m_len = len;
2739 m->m_flags = 0;
2740 mh->mh_head = mh->mh_tail = m;
2741 ret = 1;
2742 goto done;
2743 } else {
2744 skip_recycle:
2745 bus_dmamap_unload(fl->entry_tag, sd->map);
2746 cl = sd->rxsd_cl;
2747 m = sd->m;
2748
2749 if ((sopeop == RSPQ_SOP_EOP) ||
2750 (sopeop == RSPQ_SOP))
2751 flags |= M_PKTHDR;
2752 m_init(m, M_NOWAIT, MT_DATA, flags);
2753 if (fl->zone == zone_pack) {
2754 /*
2755 * restore clobbered data pointer
2756 */
2757 m->m_data = m->m_ext.ext_buf;
2758 } else {
2759 m_cljset(m, cl, fl->type);
2760 }
2761 m->m_len = len;
2762 }
2763 switch(sopeop) {
2764 case RSPQ_SOP_EOP:
2765 ret = 1;
2766 /* FALLTHROUGH */
2767 case RSPQ_SOP:
2768 mh->mh_head = mh->mh_tail = m;
2769 m->m_pkthdr.len = len;
2770 break;
2771 case RSPQ_EOP:
2772 ret = 1;
2773 /* FALLTHROUGH */
2774 case RSPQ_NSOP_NEOP:
2775 if (mh->mh_tail == NULL) {
2776 log(LOG_ERR, "discarding intermediate descriptor entry\n");
2777 m_freem(m);
2778 m = NULL;
2779 break;
2780 }
2781 mh->mh_tail->m_next = m;
2782 mh->mh_tail = m;
2783 mh->mh_head->m_pkthdr.len += len;
2784 break;
2785 }
2786 if (cxgb_debug && m != NULL)
2787 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
2788 done:
2789 if (++fl->cidx == fl->size)
2790 fl->cidx = 0;
2791
2792 return (ret);
2793 }
2794
2795 /**
2796 * handle_rsp_cntrl_info - handles control information in a response
2797 * @qs: the queue set corresponding to the response
2798 * @flags: the response control flags
2799 *
2800 * Handles the control information of an SGE response, such as GTS
2801 * indications and completion credits for the queue set's Tx queues.
2802 * HW coalesces credits, we don't do any extra SW coalescing.
2803 */
2804 static __inline void
2805 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2806 {
2807 unsigned int credits;
2808
2809 #if USE_GTS
2810 if (flags & F_RSPD_TXQ0_GTS)
2811 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2812 #endif
2813 credits = G_RSPD_TXQ0_CR(flags);
2814 if (credits)
2815 qs->txq[TXQ_ETH].processed += credits;
2816
2817 credits = G_RSPD_TXQ2_CR(flags);
2818 if (credits)
2819 qs->txq[TXQ_CTRL].processed += credits;
2820
2821 # if USE_GTS
2822 if (flags & F_RSPD_TXQ1_GTS)
2823 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2824 # endif
2825 credits = G_RSPD_TXQ1_CR(flags);
2826 if (credits)
2827 qs->txq[TXQ_OFLD].processed += credits;
2828
2829 }
2830
2831 static void
2832 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2833 unsigned int sleeping)
2834 {
2835 ;
2836 }
2837
2838 /**
2839 * process_responses - process responses from an SGE response queue
2840 * @adap: the adapter
2841 * @qs: the queue set to which the response queue belongs
2842 * @budget: how many responses can be processed in this round
2843 *
2844 * Process responses from an SGE response queue up to the supplied budget.
2845 * Responses include received packets as well as credits and other events
2846 * for the queues that belong to the response queue's queue set.
2847 * A negative budget is effectively unlimited.
2848 *
2849 * Additionally choose the interrupt holdoff time for the next interrupt
2850 * on this queue. If the system is under memory shortage use a fairly
2851 * long delay to help recovery.
2852 */
2853 static int
2854 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2855 {
2856 struct sge_rspq *rspq = &qs->rspq;
2857 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2858 int budget_left = budget;
2859 unsigned int sleeping = 0;
2860 #if defined(INET6) || defined(INET)
2861 int lro_enabled = qs->lro.enabled;
2862 int skip_lro;
2863 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2864 #endif
2865 struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
2866 #ifdef DEBUG
2867 static int last_holdoff = 0;
2868 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2869 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2870 last_holdoff = rspq->holdoff_tmr;
2871 }
2872 #endif
2873 rspq->next_holdoff = rspq->holdoff_tmr;
2874
2875 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2876 int eth, eop = 0, ethpad = 0;
2877 uint32_t flags = ntohl(r->flags);
2878 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2879 uint8_t opcode = r->rss_hdr.opcode;
2880
2881 eth = (opcode == CPL_RX_PKT);
2882
2883 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2884 struct mbuf *m;
2885
2886 if (cxgb_debug)
2887 printf("async notification\n");
2888
2889 if (mh->mh_head == NULL) {
2890 mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA);
2891 m = mh->mh_head;
2892 } else {
2893 m = m_gethdr(M_NOWAIT, MT_DATA);
2894 }
2895 if (m == NULL)
2896 goto no_mem;
2897
2898 memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2899 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2900 *mtod(m, uint8_t *) = CPL_ASYNC_NOTIF;
2901 opcode = CPL_ASYNC_NOTIF;
2902 eop = 1;
2903 rspq->async_notif++;
2904 goto skip;
2905 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2906 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
2907
2908 if (m == NULL) {
2909 no_mem:
2910 rspq->next_holdoff = NOMEM_INTR_DELAY;
2911 budget_left--;
2912 break;
2913 }
2914 if (mh->mh_head == NULL)
2915 mh->mh_head = m;
2916 else
2917 mh->mh_tail->m_next = m;
2918 mh->mh_tail = m;
2919
2920 get_imm_packet(adap, r, m);
2921 mh->mh_head->m_pkthdr.len += m->m_len;
2922 eop = 1;
2923 rspq->imm_data++;
2924 } else if (r->len_cq) {
2925 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2926
2927 eop = get_packet(adap, drop_thresh, qs, mh, r);
2928 if (eop) {
2929 if (r->rss_hdr.hash_type && !adap->timestamp) {
2930 M_HASHTYPE_SET(mh->mh_head,
2931 M_HASHTYPE_OPAQUE_HASH);
2932 mh->mh_head->m_pkthdr.flowid = rss_hash;
2933 }
2934 }
2935
2936 ethpad = 2;
2937 } else {
2938 rspq->pure_rsps++;
2939 }
2940 skip:
2941 if (flags & RSPD_CTRL_MASK) {
2942 sleeping |= flags & RSPD_GTS_MASK;
2943 handle_rsp_cntrl_info(qs, flags);
2944 }
2945
2946 if (!eth && eop) {
2947 rspq->offload_pkts++;
2948 #ifdef TCP_OFFLOAD
2949 adap->cpl_handler[opcode](qs, r, mh->mh_head);
2950 #else
2951 m_freem(mh->mh_head);
2952 #endif
2953 mh->mh_head = NULL;
2954 } else if (eth && eop) {
2955 struct mbuf *m = mh->mh_head;
2956
2957 t3_rx_eth(adap, m, ethpad);
2958
2959 /*
2960 * The T304 sends incoming packets on any qset. If LRO
2961 * is also enabled, we could end up sending packet up
2962 * lro_ctrl->ifp's input. That is incorrect.
2963 *
2964 * The mbuf's rcvif was derived from the cpl header and
2965 * is accurate. Skip LRO and just use that.
2966 */
2967 #if defined(INET6) || defined(INET)
2968 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
2969
2970 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
2971 && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
2972 ) {
2973 /* successfully queue'd for LRO */
2974 } else
2975 #endif
2976 {
2977 /*
2978 * LRO not enabled, packet unsuitable for LRO,
2979 * or unable to queue. Pass it up right now in
2980 * either case.
2981 */
2982 struct ifnet *ifp = m->m_pkthdr.rcvif;
2983 (*ifp->if_input)(ifp, m);
2984 }
2985 mh->mh_head = NULL;
2986
2987 }
2988
2989 r++;
2990 if (__predict_false(++rspq->cidx == rspq->size)) {
2991 rspq->cidx = 0;
2992 rspq->gen ^= 1;
2993 r = rspq->desc;
2994 }
2995
2996 if (++rspq->credits >= 64) {
2997 refill_rspq(adap, rspq, rspq->credits);
2998 rspq->credits = 0;
2999 }
3000 __refill_fl_lt(adap, &qs->fl[0], 32);
3001 __refill_fl_lt(adap, &qs->fl[1], 32);
3002 --budget_left;
3003 }
3004
3005 #if defined(INET6) || defined(INET)
3006 /* Flush LRO */
3007 tcp_lro_flush_all(lro_ctrl);
3008 #endif
3009
3010 if (sleeping)
3011 check_ring_db(adap, qs, sleeping);
3012
3013 mb(); /* commit Tx queue processed updates */
3014 if (__predict_false(qs->txq_stopped > 1))
3015 restart_tx(qs);
3016
3017 __refill_fl_lt(adap, &qs->fl[0], 512);
3018 __refill_fl_lt(adap, &qs->fl[1], 512);
3019 budget -= budget_left;
3020 return (budget);
3021 }
3022
3023 /*
3024 * A helper function that processes responses and issues GTS.
3025 */
3026 static __inline int
3027 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
3028 {
3029 int work;
3030 static int last_holdoff = 0;
3031
3032 work = process_responses(adap, rspq_to_qset(rq), -1);
3033
3034 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
3035 printf("next_holdoff=%d\n", rq->next_holdoff);
3036 last_holdoff = rq->next_holdoff;
3037 }
3038 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
3039 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
3040
3041 return (work);
3042 }
3043
3044 #ifdef NETDUMP
3045 int
3046 cxgb_netdump_poll_rx(adapter_t *adap, struct sge_qset *qs)
3047 {
3048
3049 return (process_responses_gts(adap, &qs->rspq));
3050 }
3051 #endif
3052
3053 /*
3054 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
3055 * Handles data events from SGE response queues as well as error and other
3056 * async events as they all use the same interrupt pin. We use one SGE
3057 * response queue per port in this mode and protect all response queues with
3058 * queue 0's lock.
3059 */
3060 void
3061 t3b_intr(void *data)
3062 {
3063 uint32_t i, map;
3064 adapter_t *adap = data;
3065 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3066
3067 t3_write_reg(adap, A_PL_CLI, 0);
3068 map = t3_read_reg(adap, A_SG_DATA_INTR);
3069
3070 if (!map)
3071 return;
3072
3073 if (__predict_false(map & F_ERRINTR)) {
3074 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3075 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3076 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3077 }
3078
3079 mtx_lock(&q0->lock);
3080 for_each_port(adap, i)
3081 if (map & (1 << i))
3082 process_responses_gts(adap, &adap->sge.qs[i].rspq);
3083 mtx_unlock(&q0->lock);
3084 }
3085
3086 /*
3087 * The MSI interrupt handler. This needs to handle data events from SGE
3088 * response queues as well as error and other async events as they all use
3089 * the same MSI vector. We use one SGE response queue per port in this mode
3090 * and protect all response queues with queue 0's lock.
3091 */
3092 void
3093 t3_intr_msi(void *data)
3094 {
3095 adapter_t *adap = data;
3096 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3097 int i, new_packets = 0;
3098
3099 mtx_lock(&q0->lock);
3100
3101 for_each_port(adap, i)
3102 if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3103 new_packets = 1;
3104 mtx_unlock(&q0->lock);
3105 if (new_packets == 0) {
3106 t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3107 (void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3108 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3109 }
3110 }
3111
3112 void
3113 t3_intr_msix(void *data)
3114 {
3115 struct sge_qset *qs = data;
3116 adapter_t *adap = qs->port->adapter;
3117 struct sge_rspq *rspq = &qs->rspq;
3118
3119 if (process_responses_gts(adap, rspq) == 0)
3120 rspq->unhandled_irqs++;
3121 }
3122
3123 #define QDUMP_SBUF_SIZE 32 * 400
3124 static int
3125 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3126 {
3127 struct sge_rspq *rspq;
3128 struct sge_qset *qs;
3129 int i, err, dump_end, idx;
3130 struct sbuf *sb;
3131 struct rsp_desc *rspd;
3132 uint32_t data[4];
3133
3134 rspq = arg1;
3135 qs = rspq_to_qset(rspq);
3136 if (rspq->rspq_dump_count == 0)
3137 return (0);
3138 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3139 log(LOG_WARNING,
3140 "dump count is too large %d\n", rspq->rspq_dump_count);
3141 rspq->rspq_dump_count = 0;
3142 return (EINVAL);
3143 }
3144 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3145 log(LOG_WARNING,
3146 "dump start of %d is greater than queue size\n",
3147 rspq->rspq_dump_start);
3148 rspq->rspq_dump_start = 0;
3149 return (EINVAL);
3150 }
3151 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3152 if (err)
3153 return (err);
3154 err = sysctl_wire_old_buffer(req, 0);
3155 if (err)
3156 return (err);
3157 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3158
3159 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3160 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3161 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3162 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3163 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3164
3165 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3166 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3167
3168 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3169 for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3170 idx = i & (RSPQ_Q_SIZE-1);
3171
3172 rspd = &rspq->desc[idx];
3173 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3174 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3175 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3176 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3177 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3178 be32toh(rspd->len_cq), rspd->intr_gen);
3179 }
3180
3181 err = sbuf_finish(sb);
3182 sbuf_delete(sb);
3183 return (err);
3184 }
3185
3186 static int
3187 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3188 {
3189 struct sge_txq *txq;
3190 struct sge_qset *qs;
3191 int i, j, err, dump_end;
3192 struct sbuf *sb;
3193 struct tx_desc *txd;
3194 uint32_t *WR, wr_hi, wr_lo, gen;
3195 uint32_t data[4];
3196
3197 txq = arg1;
3198 qs = txq_to_qset(txq, TXQ_ETH);
3199 if (txq->txq_dump_count == 0) {
3200 return (0);
3201 }
3202 if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3203 log(LOG_WARNING,
3204 "dump count is too large %d\n", txq->txq_dump_count);
3205 txq->txq_dump_count = 1;
3206 return (EINVAL);
3207 }
3208 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3209 log(LOG_WARNING,
3210 "dump start of %d is greater than queue size\n",
3211 txq->txq_dump_start);
3212 txq->txq_dump_start = 0;
3213 return (EINVAL);
3214 }
3215 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3216 if (err)
3217 return (err);
3218 err = sysctl_wire_old_buffer(req, 0);
3219 if (err)
3220 return (err);
3221 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3222
3223 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3224 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3225 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3226 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3227 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3228 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3229 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3230 txq->txq_dump_start,
3231 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3232
3233 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3234 for (i = txq->txq_dump_start; i < dump_end; i++) {
3235 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3236 WR = (uint32_t *)txd->flit;
3237 wr_hi = ntohl(WR[0]);
3238 wr_lo = ntohl(WR[1]);
3239 gen = G_WR_GEN(wr_lo);
3240
3241 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3242 wr_hi, wr_lo, gen);
3243 for (j = 2; j < 30; j += 4)
3244 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3245 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3246
3247 }
3248 err = sbuf_finish(sb);
3249 sbuf_delete(sb);
3250 return (err);
3251 }
3252
3253 static int
3254 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3255 {
3256 struct sge_txq *txq;
3257 struct sge_qset *qs;
3258 int i, j, err, dump_end;
3259 struct sbuf *sb;
3260 struct tx_desc *txd;
3261 uint32_t *WR, wr_hi, wr_lo, gen;
3262
3263 txq = arg1;
3264 qs = txq_to_qset(txq, TXQ_CTRL);
3265 if (txq->txq_dump_count == 0) {
3266 return (0);
3267 }
3268 if (txq->txq_dump_count > 256) {
3269 log(LOG_WARNING,
3270 "dump count is too large %d\n", txq->txq_dump_count);
3271 txq->txq_dump_count = 1;
3272 return (EINVAL);
3273 }
3274 if (txq->txq_dump_start > 255) {
3275 log(LOG_WARNING,
3276 "dump start of %d is greater than queue size\n",
3277 txq->txq_dump_start);
3278 txq->txq_dump_start = 0;
3279 return (EINVAL);
3280 }
3281
3282 err = sysctl_wire_old_buffer(req, 0);
3283 if (err != 0)
3284 return (err);
3285 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3286 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3287 txq->txq_dump_start,
3288 (txq->txq_dump_start + txq->txq_dump_count) & 255);
3289
3290 dump_end = txq->txq_dump_start + txq->txq_dump_count;
3291 for (i = txq->txq_dump_start; i < dump_end; i++) {
3292 txd = &txq->desc[i & (255)];
3293 WR = (uint32_t *)txd->flit;
3294 wr_hi = ntohl(WR[0]);
3295 wr_lo = ntohl(WR[1]);
3296 gen = G_WR_GEN(wr_lo);
3297
3298 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3299 wr_hi, wr_lo, gen);
3300 for (j = 2; j < 30; j += 4)
3301 sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3302 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3303
3304 }
3305 err = sbuf_finish(sb);
3306 sbuf_delete(sb);
3307 return (err);
3308 }
3309
3310 static int
3311 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3312 {
3313 adapter_t *sc = arg1;
3314 struct qset_params *qsp = &sc->params.sge.qset[0];
3315 int coalesce_usecs;
3316 struct sge_qset *qs;
3317 int i, j, err, nqsets = 0;
3318 struct mtx *lock;
3319
3320 if ((sc->flags & FULL_INIT_DONE) == 0)
3321 return (ENXIO);
3322
3323 coalesce_usecs = qsp->coalesce_usecs;
3324 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3325
3326 if (err != 0) {
3327 return (err);
3328 }
3329 if (coalesce_usecs == qsp->coalesce_usecs)
3330 return (0);
3331
3332 for (i = 0; i < sc->params.nports; i++)
3333 for (j = 0; j < sc->port[i].nqsets; j++)
3334 nqsets++;
3335
3336 coalesce_usecs = max(1, coalesce_usecs);
3337
3338 for (i = 0; i < nqsets; i++) {
3339 qs = &sc->sge.qs[i];
3340 qsp = &sc->params.sge.qset[i];
3341 qsp->coalesce_usecs = coalesce_usecs;
3342
3343 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3344 &sc->sge.qs[0].rspq.lock;
3345
3346 mtx_lock(lock);
3347 t3_update_qset_coalesce(qs, qsp);
3348 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3349 V_NEWTIMER(qs->rspq.holdoff_tmr));
3350 mtx_unlock(lock);
3351 }
3352
3353 return (0);
3354 }
3355
3356 static int
3357 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)
3358 {
3359 adapter_t *sc = arg1;
3360 int rc, timestamp;
3361
3362 if ((sc->flags & FULL_INIT_DONE) == 0)
3363 return (ENXIO);
3364
3365 timestamp = sc->timestamp;
3366 rc = sysctl_handle_int(oidp, ×tamp, arg2, req);
3367
3368 if (rc != 0)
3369 return (rc);
3370
3371 if (timestamp != sc->timestamp) {
3372 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS,
3373 timestamp ? F_ENABLERXPKTTMSTPRSS : 0);
3374 sc->timestamp = timestamp;
3375 }
3376
3377 return (0);
3378 }
3379
3380 void
3381 t3_add_attach_sysctls(adapter_t *sc)
3382 {
3383 struct sysctl_ctx_list *ctx;
3384 struct sysctl_oid_list *children;
3385
3386 ctx = device_get_sysctl_ctx(sc->dev);
3387 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3388
3389 /* random information */
3390 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3391 "firmware_version",
3392 CTLFLAG_RD, sc->fw_version,
3393 0, "firmware version");
3394 SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
3395 "hw_revision",
3396 CTLFLAG_RD, &sc->params.rev,
3397 0, "chip model");
3398 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3399 "port_types",
3400 CTLFLAG_RD, sc->port_types,
3401 0, "type of ports");
3402 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3403 "enable_debug",
3404 CTLFLAG_RW, &cxgb_debug,
3405 0, "enable verbose debugging output");
3406 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce",
3407 CTLFLAG_RD, &sc->tunq_coalesce,
3408 "#tunneled packets freed");
3409 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3410 "txq_overrun",
3411 CTLFLAG_RD, &txq_fills,
3412 0, "#times txq overrun");
3413 SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
3414 "core_clock",
3415 CTLFLAG_RD, &sc->params.vpd.cclk,
3416 0, "core clock frequency (in KHz)");
3417 }
3418
3419
3420 static const char *rspq_name = "rspq";
3421 static const char *txq_names[] =
3422 {
3423 "txq_eth",
3424 "txq_ofld",
3425 "txq_ctrl"
3426 };
3427
3428 static int
3429 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3430 {
3431 struct port_info *p = arg1;
3432 uint64_t *parg;
3433
3434 if (!p)
3435 return (EINVAL);
3436
3437 cxgb_refresh_stats(p);
3438 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3439
3440 return (sysctl_handle_64(oidp, parg, 0, req));
3441 }
3442
3443 void
3444 t3_add_configured_sysctls(adapter_t *sc)
3445 {
3446 struct sysctl_ctx_list *ctx;
3447 struct sysctl_oid_list *children;
3448 int i, j;
3449
3450 ctx = device_get_sysctl_ctx(sc->dev);
3451 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3452
3453 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3454 "intr_coal",
3455 CTLTYPE_INT|CTLFLAG_RW, sc,
3456 0, t3_set_coalesce_usecs,
3457 "I", "interrupt coalescing timer (us)");
3458
3459 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3460 "pkt_timestamp",
3461 CTLTYPE_INT | CTLFLAG_RW, sc,
3462 0, t3_pkt_timestamp,
3463 "I", "provide packet timestamp instead of connection hash");
3464
3465 for (i = 0; i < sc->params.nports; i++) {
3466 struct port_info *pi = &sc->port[i];
3467 struct sysctl_oid *poid;
3468 struct sysctl_oid_list *poidlist;
3469 struct mac_stats *mstats = &pi->mac.stats;
3470
3471 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3472 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3473 pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3474 poidlist = SYSCTL_CHILDREN(poid);
3475 SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO,
3476 "nqsets", CTLFLAG_RD, &pi->nqsets,
3477 0, "#queue sets");
3478
3479 for (j = 0; j < pi->nqsets; j++) {
3480 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3481 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
3482 *ctrlqpoid, *lropoid;
3483 struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
3484 *txqpoidlist, *ctrlqpoidlist,
3485 *lropoidlist;
3486 struct sge_txq *txq = &qs->txq[TXQ_ETH];
3487
3488 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3489
3490 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3491 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3492 qspoidlist = SYSCTL_CHILDREN(qspoid);
3493
3494 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
3495 CTLFLAG_RD, &qs->fl[0].empty, 0,
3496 "freelist #0 empty");
3497 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
3498 CTLFLAG_RD, &qs->fl[1].empty, 0,
3499 "freelist #1 empty");
3500
3501 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3502 rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3503 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3504
3505 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3506 txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3507 txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3508
3509 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3510 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3511 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3512
3513 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3514 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
3515 lropoidlist = SYSCTL_CHILDREN(lropoid);
3516
3517 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3518 CTLFLAG_RD, &qs->rspq.size,
3519 0, "#entries in response queue");
3520 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3521 CTLFLAG_RD, &qs->rspq.cidx,
3522 0, "consumer index");
3523 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3524 CTLFLAG_RD, &qs->rspq.credits,
3525 0, "#credits");
3526 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
3527 CTLFLAG_RD, &qs->rspq.starved,
3528 0, "#times starved");
3529 SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3530 CTLFLAG_RD, &qs->rspq.phys_addr,
3531 "physical_address_of the queue");
3532 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3533 CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3534 0, "start rspq dump entry");
3535 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3536 CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3537 0, "#rspq entries to dump");
3538 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3539 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3540 0, t3_dump_rspq, "A", "dump of the response queue");
3541
3542 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
3543 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
3544 "#tunneled packets dropped");
3545 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3546 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.mq_len,
3547 0, "#tunneled packets waiting to be sent");
3548 #if 0
3549 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3550 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3551 0, "#tunneled packets queue producer index");
3552 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3553 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3554 0, "#tunneled packets queue consumer index");
3555 #endif
3556 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed",
3557 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3558 0, "#tunneled packets processed by the card");
3559 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3560 CTLFLAG_RD, &txq->cleaned,
3561 0, "#tunneled packets cleaned");
3562 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3563 CTLFLAG_RD, &txq->in_use,
3564 0, "#tunneled packet slots in use");
3565 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees",
3566 CTLFLAG_RD, &txq->txq_frees,
3567 "#tunneled packets freed");
3568 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3569 CTLFLAG_RD, &txq->txq_skipped,
3570 0, "#tunneled packet descriptors skipped");
3571 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
3572 CTLFLAG_RD, &txq->txq_coalesced,
3573 "#tunneled packets coalesced");
3574 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3575 CTLFLAG_RD, &txq->txq_enqueued,
3576 0, "#tunneled packets enqueued to hardware");
3577 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3578 CTLFLAG_RD, &qs->txq_stopped,
3579 0, "tx queues stopped");
3580 SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3581 CTLFLAG_RD, &txq->phys_addr,
3582 "physical_address_of the queue");
3583 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3584 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3585 0, "txq generation");
3586 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3587 CTLFLAG_RD, &txq->cidx,
3588 0, "hardware queue cidx");
3589 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3590 CTLFLAG_RD, &txq->pidx,
3591 0, "hardware queue pidx");
3592 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3593 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3594 0, "txq start idx for dump");
3595 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3596 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3597 0, "txq #entries to dump");
3598 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3599 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3600 0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3601
3602 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3603 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3604 0, "ctrlq start idx for dump");
3605 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3606 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3607 0, "ctrl #entries to dump");
3608 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3609 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3610 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3611
3612 SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_queued",
3613 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3614 SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3615 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3616 SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3617 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3618 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3619 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3620 }
3621
3622 /* Now add a node for mac stats. */
3623 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3624 CTLFLAG_RD, NULL, "MAC statistics");
3625 poidlist = SYSCTL_CHILDREN(poid);
3626
3627 /*
3628 * We (ab)use the length argument (arg2) to pass on the offset
3629 * of the data that we are interested in. This is only required
3630 * for the quad counters that are updated from the hardware (we
3631 * make sure that we return the latest value).
3632 * sysctl_handle_macstat first updates *all* the counters from
3633 * the hardware, and then returns the latest value of the
3634 * requested counter. Best would be to update only the
3635 * requested counter from hardware, but t3_mac_update_stats()
3636 * hides all the register details and we don't want to dive into
3637 * all that here.
3638 */
3639 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3640 (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
3641 sysctl_handle_macstat, "QU", 0)
3642 CXGB_SYSCTL_ADD_QUAD(tx_octets);
3643 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3644 CXGB_SYSCTL_ADD_QUAD(tx_frames);
3645 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3646 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3647 CXGB_SYSCTL_ADD_QUAD(tx_pause);
3648 CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3649 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3650 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3651 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3652 CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3653 CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3654 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3655 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3656 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3657 CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3658 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3659 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3660 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3661 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3662 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3663 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3664 CXGB_SYSCTL_ADD_QUAD(rx_octets);
3665 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3666 CXGB_SYSCTL_ADD_QUAD(rx_frames);
3667 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3668 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3669 CXGB_SYSCTL_ADD_QUAD(rx_pause);
3670 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
3671 CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3672 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3673 CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3674 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3675 CXGB_SYSCTL_ADD_QUAD(rx_runt);
3676 CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3677 CXGB_SYSCTL_ADD_QUAD(rx_short);
3678 CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3679 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3680 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3681 CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3682 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3683 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3684 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3685 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3686 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3687 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3688 #undef CXGB_SYSCTL_ADD_QUAD
3689
3690 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3691 CTLFLAG_RD, &mstats->a, 0)
3692 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3693 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3694 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3695 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3696 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3697 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3698 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3699 CXGB_SYSCTL_ADD_ULONG(num_toggled);
3700 CXGB_SYSCTL_ADD_ULONG(num_resets);
3701 CXGB_SYSCTL_ADD_ULONG(link_faults);
3702 #undef CXGB_SYSCTL_ADD_ULONG
3703 }
3704 }
3705
3706 /**
3707 * t3_get_desc - dump an SGE descriptor for debugging purposes
3708 * @qs: the queue set
3709 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3710 * @idx: the descriptor index in the queue
3711 * @data: where to dump the descriptor contents
3712 *
3713 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3714 * size of the descriptor.
3715 */
3716 int
3717 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3718 unsigned char *data)
3719 {
3720 if (qnum >= 6)
3721 return (EINVAL);
3722
3723 if (qnum < 3) {
3724 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3725 return -EINVAL;
3726 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3727 return sizeof(struct tx_desc);
3728 }
3729
3730 if (qnum == 3) {
3731 if (!qs->rspq.desc || idx >= qs->rspq.size)
3732 return (EINVAL);
3733 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3734 return sizeof(struct rsp_desc);
3735 }
3736
3737 qnum -= 4;
3738 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3739 return (EINVAL);
3740 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3741 return sizeof(struct rx_desc);
3742 }
Cache object: 800e716d6fc0bddd459e0e371136e0e0
|