1 /**************************************************************************
2
3 Copyright (c) 2007, Chelsio Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/module.h>
37 #include <sys/bus.h>
38 #include <sys/conf.h>
39 #include <machine/bus.h>
40 #include <machine/resource.h>
41 #include <sys/bus_dma.h>
42 #include <sys/rman.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/taskqueue.h>
46
47 #include <sys/proc.h>
48 #include <sys/sched.h>
49 #include <sys/smp.h>
50 #include <sys/systm.h>
51
52 #include <netinet/in_systm.h>
53 #include <netinet/in.h>
54 #include <netinet/ip.h>
55 #include <netinet/tcp.h>
56
57 #include <dev/pci/pcireg.h>
58 #include <dev/pci/pcivar.h>
59
60 #ifdef CONFIG_DEFINED
61 #include <cxgb_include.h>
62 #else
63 #include <dev/cxgb/cxgb_include.h>
64 #endif
65
66 uint32_t collapse_free = 0;
67 uint32_t mb_free_vec_free = 0;
68 int txq_fills = 0;
69 int collapse_mbufs = 0;
70 static int bogus_imm = 0;
71 #ifndef DISABLE_MBUF_IOVEC
72 static int recycle_enable = 1;
73 #endif
74
75 #define USE_GTS 0
76
77 #define SGE_RX_SM_BUF_SIZE 1536
78 #define SGE_RX_DROP_THRES 16
79 #define SGE_RX_COPY_THRES 128
80
81 /*
82 * Period of the Tx buffer reclaim timer. This timer does not need to run
83 * frequently as Tx buffers are usually reclaimed by new Tx packets.
84 */
85 #define TX_RECLAIM_PERIOD (hz >> 1)
86
87 /*
88 * work request size in bytes
89 */
90 #define WR_LEN (WR_FLITS * 8)
91
92 /*
93 * Values for sge_txq.flags
94 */
95 enum {
96 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
97 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
98 };
99
100 struct tx_desc {
101 uint64_t flit[TX_DESC_FLITS];
102 } __packed;
103
104 struct rx_desc {
105 uint32_t addr_lo;
106 uint32_t len_gen;
107 uint32_t gen2;
108 uint32_t addr_hi;
109 } __packed;;
110
111 struct rsp_desc { /* response queue descriptor */
112 struct rss_header rss_hdr;
113 uint32_t flags;
114 uint32_t len_cq;
115 uint8_t imm_data[47];
116 uint8_t intr_gen;
117 } __packed;
118
119 #define RX_SW_DESC_MAP_CREATED (1 << 0)
120 #define TX_SW_DESC_MAP_CREATED (1 << 1)
121 #define RX_SW_DESC_INUSE (1 << 3)
122 #define TX_SW_DESC_MAPPED (1 << 4)
123
124 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
125 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
126 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
127 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
128
129 struct tx_sw_desc { /* SW state per Tx descriptor */
130 struct mbuf *m;
131 bus_dmamap_t map;
132 int flags;
133 };
134
135 struct rx_sw_desc { /* SW state per Rx descriptor */
136 void *cl;
137 bus_dmamap_t map;
138 int flags;
139 };
140
141 struct txq_state {
142 unsigned int compl;
143 unsigned int gen;
144 unsigned int pidx;
145 };
146
147 struct refill_fl_cb_arg {
148 int error;
149 bus_dma_segment_t seg;
150 int nseg;
151 };
152
153 /*
154 * Maps a number of flits to the number of Tx descriptors that can hold them.
155 * The formula is
156 *
157 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
158 *
159 * HW allows up to 4 descriptors to be combined into a WR.
160 */
161 static uint8_t flit_desc_map[] = {
162 0,
163 #if SGE_NUM_GENBITS == 1
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
166 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
167 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
168 #elif SGE_NUM_GENBITS == 2
169 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
170 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
171 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
172 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
173 #else
174 # error "SGE_NUM_GENBITS must be 1 or 2"
175 #endif
176 };
177
178
179 static int lro_default = 0;
180 int cxgb_debug = 0;
181
182 static void t3_free_qset(adapter_t *sc, struct sge_qset *q);
183 static void sge_timer_cb(void *arg);
184 static void sge_timer_reclaim(void *arg, int ncount);
185 static void sge_txq_reclaim_handler(void *arg, int ncount);
186 static int free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec);
187
188 /**
189 * reclaim_completed_tx - reclaims completed Tx descriptors
190 * @adapter: the adapter
191 * @q: the Tx queue to reclaim completed descriptors from
192 *
193 * Reclaims Tx descriptors that the SGE has indicated it has processed,
194 * and frees the associated buffers if possible. Called with the Tx
195 * queue's lock held.
196 */
197 static __inline int
198 reclaim_completed_tx(struct sge_txq *q, int nbufs, struct mbuf **mvec)
199 {
200 int reclaimed, reclaim = desc_reclaimable(q);
201 int n = 0;
202
203 mtx_assert(&q->lock, MA_OWNED);
204 if (reclaim > 0) {
205 n = free_tx_desc(q, min(reclaim, nbufs), mvec);
206 reclaimed = min(reclaim, nbufs);
207 q->cleaned += reclaimed;
208 q->in_use -= reclaimed;
209 }
210 return (n);
211 }
212
213 /**
214 * should_restart_tx - are there enough resources to restart a Tx queue?
215 * @q: the Tx queue
216 *
217 * Checks if there are enough descriptors to restart a suspended Tx queue.
218 */
219 static __inline int
220 should_restart_tx(const struct sge_txq *q)
221 {
222 unsigned int r = q->processed - q->cleaned;
223
224 return q->in_use - r < (q->size >> 1);
225 }
226
227 /**
228 * t3_sge_init - initialize SGE
229 * @adap: the adapter
230 * @p: the SGE parameters
231 *
232 * Performs SGE initialization needed every time after a chip reset.
233 * We do not initialize any of the queue sets here, instead the driver
234 * top-level must request those individually. We also do not enable DMA
235 * here, that should be done after the queues have been set up.
236 */
237 void
238 t3_sge_init(adapter_t *adap, struct sge_params *p)
239 {
240 u_int ctrl, ups;
241
242 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
243
244 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
245 F_CQCRDTCTRL |
246 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
247 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
248 #if SGE_NUM_GENBITS == 1
249 ctrl |= F_EGRGENCTRL;
250 #endif
251 if (adap->params.rev > 0) {
252 if (!(adap->flags & (USING_MSIX | USING_MSI)))
253 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
254 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL;
255 }
256 t3_write_reg(adap, A_SG_CONTROL, ctrl);
257 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
258 V_LORCQDRBTHRSH(512));
259 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
260 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
261 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
262 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000);
263 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
264 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
265 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
266 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
267 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
268 }
269
270
271 /**
272 * sgl_len - calculates the size of an SGL of the given capacity
273 * @n: the number of SGL entries
274 *
275 * Calculates the number of flits needed for a scatter/gather list that
276 * can hold the given number of entries.
277 */
278 static __inline unsigned int
279 sgl_len(unsigned int n)
280 {
281 return ((3 * n) / 2 + (n & 1));
282 }
283
284 /**
285 * get_imm_packet - return the next ingress packet buffer from a response
286 * @resp: the response descriptor containing the packet data
287 *
288 * Return a packet containing the immediate data of the given response.
289 */
290 #ifdef DISABLE_MBUF_IOVEC
291 static __inline int
292 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct t3_mbuf_hdr *mh)
293 {
294 struct mbuf *m;
295 int len;
296 uint32_t flags = ntohl(resp->flags);
297 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
298
299 /*
300 * would be a firmware bug
301 */
302 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP)
303 return (0);
304
305 m = m_gethdr(M_NOWAIT, MT_DATA);
306 len = G_RSPD_LEN(ntohl(resp->len_cq));
307
308 if (m) {
309 MH_ALIGN(m, IMMED_PKT_SIZE);
310 memcpy(m->m_data, resp->imm_data, IMMED_PKT_SIZE);
311 m->m_len = len;
312
313 switch (sopeop) {
314 case RSPQ_SOP_EOP:
315 mh->mh_head = mh->mh_tail = m;
316 m->m_pkthdr.len = len;
317 m->m_flags |= M_PKTHDR;
318 break;
319 case RSPQ_EOP:
320 m->m_flags &= ~M_PKTHDR;
321 mh->mh_head->m_pkthdr.len += len;
322 mh->mh_tail->m_next = m;
323 mh->mh_tail = m;
324 break;
325 }
326 }
327 return (m != NULL);
328 }
329
330 #else
331 static int
332 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl, uint32_t flags)
333 {
334 int len, error;
335 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
336
337 /*
338 * would be a firmware bug
339 */
340 len = G_RSPD_LEN(ntohl(resp->len_cq));
341 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) {
342 if (cxgb_debug)
343 device_printf(sc->dev, "unexpected value sopeop=%d flags=0x%x len=%din get_imm_packet\n", sopeop, flags, len);
344 bogus_imm++;
345 return (EINVAL);
346 }
347 error = 0;
348 switch (sopeop) {
349 case RSPQ_SOP_EOP:
350 m->m_len = m->m_pkthdr.len = len;
351 memcpy(mtod(m, uint8_t *), resp->imm_data, len);
352 break;
353 case RSPQ_EOP:
354 memcpy(cl, resp->imm_data, len);
355 m_iovappend(m, cl, MSIZE, len, 0);
356 break;
357 default:
358 bogus_imm++;
359 error = EINVAL;
360 }
361
362 return (error);
363 }
364 #endif
365
366 static __inline u_int
367 flits_to_desc(u_int n)
368 {
369 return (flit_desc_map[n]);
370 }
371
372 void
373 t3_sge_err_intr_handler(adapter_t *adapter)
374 {
375 unsigned int v, status;
376
377
378 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
379
380 if (status & F_RSPQCREDITOVERFOW)
381 CH_ALERT(adapter, "SGE response queue credit overflow\n");
382
383 if (status & F_RSPQDISABLED) {
384 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
385
386 CH_ALERT(adapter,
387 "packet delivered to disabled response queue (0x%x)\n",
388 (v >> S_RSPQ0DISABLED) & 0xff);
389 }
390
391 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
392 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
393 t3_fatal_err(adapter);
394 }
395
396 void
397 t3_sge_prep(adapter_t *adap, struct sge_params *p)
398 {
399 int i;
400
401 /* XXX Does ETHER_ALIGN need to be accounted for here? */
402 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data);
403
404 for (i = 0; i < SGE_QSETS; ++i) {
405 struct qset_params *q = p->qset + i;
406
407 q->polling = adap->params.rev > 0;
408
409 if (adap->params.nports > 2)
410 q->coalesce_nsecs = 50000;
411 else
412 q->coalesce_nsecs = 5000;
413
414 q->rspq_size = RSPQ_Q_SIZE;
415 q->fl_size = FL_Q_SIZE;
416 q->jumbo_size = JUMBO_Q_SIZE;
417 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
418 q->txq_size[TXQ_OFLD] = 1024;
419 q->txq_size[TXQ_CTRL] = 256;
420 q->cong_thres = 0;
421 }
422 }
423
424 int
425 t3_sge_alloc(adapter_t *sc)
426 {
427
428 /* The parent tag. */
429 if (bus_dma_tag_create( NULL, /* parent */
430 1, 0, /* algnmnt, boundary */
431 BUS_SPACE_MAXADDR, /* lowaddr */
432 BUS_SPACE_MAXADDR, /* highaddr */
433 NULL, NULL, /* filter, filterarg */
434 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
435 BUS_SPACE_UNRESTRICTED, /* nsegments */
436 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
437 0, /* flags */
438 NULL, NULL, /* lock, lockarg */
439 &sc->parent_dmat)) {
440 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
441 return (ENOMEM);
442 }
443
444 /*
445 * DMA tag for normal sized RX frames
446 */
447 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
448 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
449 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
450 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
451 return (ENOMEM);
452 }
453
454 /*
455 * DMA tag for jumbo sized RX frames.
456 */
457 if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR,
458 BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE,
459 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
460 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
461 return (ENOMEM);
462 }
463
464 /*
465 * DMA tag for TX frames.
466 */
467 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
468 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
469 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
470 NULL, NULL, &sc->tx_dmat)) {
471 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
472 return (ENOMEM);
473 }
474
475 return (0);
476 }
477
478 int
479 t3_sge_free(struct adapter * sc)
480 {
481
482 if (sc->tx_dmat != NULL)
483 bus_dma_tag_destroy(sc->tx_dmat);
484
485 if (sc->rx_jumbo_dmat != NULL)
486 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
487
488 if (sc->rx_dmat != NULL)
489 bus_dma_tag_destroy(sc->rx_dmat);
490
491 if (sc->parent_dmat != NULL)
492 bus_dma_tag_destroy(sc->parent_dmat);
493
494 return (0);
495 }
496
497 void
498 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
499 {
500
501 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U);
502 qs->rspq.polling = 0 /* p->polling */;
503 }
504
505 static void
506 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
507 {
508 struct refill_fl_cb_arg *cb_arg = arg;
509
510 cb_arg->error = error;
511 cb_arg->seg = segs[0];
512 cb_arg->nseg = nseg;
513
514 }
515
516 /**
517 * refill_fl - refill an SGE free-buffer list
518 * @sc: the controller softc
519 * @q: the free-list to refill
520 * @n: the number of new buffers to allocate
521 *
522 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
523 * The caller must assure that @n does not exceed the queue's capacity.
524 */
525 static void
526 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
527 {
528 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
529 struct rx_desc *d = &q->desc[q->pidx];
530 struct refill_fl_cb_arg cb_arg;
531 void *cl;
532 int err;
533
534 cb_arg.error = 0;
535 while (n--) {
536 /*
537 * We only allocate a cluster, mbuf allocation happens after rx
538 */
539 if ((cl = m_cljget(NULL, M_DONTWAIT, q->buf_size)) == NULL) {
540 log(LOG_WARNING, "Failed to allocate cluster\n");
541 goto done;
542 }
543 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
544 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
545 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
546 uma_zfree(q->zone, cl);
547 goto done;
548 }
549 sd->flags |= RX_SW_DESC_MAP_CREATED;
550 }
551 err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size,
552 refill_fl_cb, &cb_arg, 0);
553
554 if (err != 0 || cb_arg.error) {
555 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error);
556 /*
557 * XXX free cluster
558 */
559 return;
560 }
561
562 sd->flags |= RX_SW_DESC_INUSE;
563 sd->cl = cl;
564 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
565 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
566 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
567 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
568
569 d++;
570 sd++;
571
572 if (++q->pidx == q->size) {
573 q->pidx = 0;
574 q->gen ^= 1;
575 sd = q->sdesc;
576 d = q->desc;
577 }
578 q->credits++;
579 }
580
581 done:
582 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
583 }
584
585
586 /**
587 * free_rx_bufs - free the Rx buffers on an SGE free list
588 * @sc: the controle softc
589 * @q: the SGE free list to clean up
590 *
591 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
592 * this queue should be stopped before calling this function.
593 */
594 static void
595 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
596 {
597 u_int cidx = q->cidx;
598
599 while (q->credits--) {
600 struct rx_sw_desc *d = &q->sdesc[cidx];
601
602 if (d->flags & RX_SW_DESC_INUSE) {
603 bus_dmamap_unload(q->entry_tag, d->map);
604 bus_dmamap_destroy(q->entry_tag, d->map);
605 uma_zfree(q->zone, d->cl);
606 }
607 d->cl = NULL;
608 if (++cidx == q->size)
609 cidx = 0;
610 }
611 }
612
613 static __inline void
614 __refill_fl(adapter_t *adap, struct sge_fl *fl)
615 {
616 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
617 }
618
619 #ifndef DISABLE_MBUF_IOVEC
620 /**
621 * recycle_rx_buf - recycle a receive buffer
622 * @adapter: the adapter
623 * @q: the SGE free list
624 * @idx: index of buffer to recycle
625 *
626 * Recycles the specified buffer on the given free list by adding it at
627 * the next available slot on the list.
628 */
629 static void
630 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
631 {
632 struct rx_desc *from = &q->desc[idx];
633 struct rx_desc *to = &q->desc[q->pidx];
634
635 q->sdesc[q->pidx] = q->sdesc[idx];
636 to->addr_lo = from->addr_lo; // already big endian
637 to->addr_hi = from->addr_hi; // likewise
638 wmb();
639 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
640 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
641 q->credits++;
642
643 if (++q->pidx == q->size) {
644 q->pidx = 0;
645 q->gen ^= 1;
646 }
647 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
648 }
649 #endif
650
651 static void
652 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
653 {
654 uint32_t *addr;
655
656 addr = arg;
657 *addr = segs[0].ds_addr;
658 }
659
660 static int
661 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
662 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
663 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
664 {
665 size_t len = nelem * elem_size;
666 void *s = NULL;
667 void *p = NULL;
668 int err;
669
670 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
671 BUS_SPACE_MAXADDR_32BIT,
672 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
673 len, 0, NULL, NULL, tag)) != 0) {
674 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
675 return (ENOMEM);
676 }
677
678 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
679 map)) != 0) {
680 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
681 return (ENOMEM);
682 }
683
684 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
685 bzero(p, len);
686 *(void **)desc = p;
687
688 if (sw_size) {
689 len = nelem * sw_size;
690 s = malloc(len, M_DEVBUF, M_WAITOK);
691 bzero(s, len);
692 *(void **)sdesc = s;
693 }
694 if (parent_entry_tag == NULL)
695 return (0);
696
697 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
698 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
699 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
700 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
701 NULL, NULL, entry_tag)) != 0) {
702 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
703 return (ENOMEM);
704 }
705 return (0);
706 }
707
708 static void
709 sge_slow_intr_handler(void *arg, int ncount)
710 {
711 adapter_t *sc = arg;
712
713 t3_slow_intr_handler(sc);
714 }
715
716 /**
717 * sge_timer_cb - perform periodic maintenance of an SGE qset
718 * @data: the SGE queue set to maintain
719 *
720 * Runs periodically from a timer to perform maintenance of an SGE queue
721 * set. It performs two tasks:
722 *
723 * a) Cleans up any completed Tx descriptors that may still be pending.
724 * Normal descriptor cleanup happens when new packets are added to a Tx
725 * queue so this timer is relatively infrequent and does any cleanup only
726 * if the Tx queue has not seen any new packets in a while. We make a
727 * best effort attempt to reclaim descriptors, in that we don't wait
728 * around if we cannot get a queue's lock (which most likely is because
729 * someone else is queueing new packets and so will also handle the clean
730 * up). Since control queues use immediate data exclusively we don't
731 * bother cleaning them up here.
732 *
733 * b) Replenishes Rx queues that have run out due to memory shortage.
734 * Normally new Rx buffers are added when existing ones are consumed but
735 * when out of memory a queue can become empty. We try to add only a few
736 * buffers here, the queue will be replenished fully as these new buffers
737 * are used up if memory shortage has subsided.
738 *
739 * c) Return coalesced response queue credits in case a response queue is
740 * starved.
741 *
742 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell
743 * fifo overflows and the FW doesn't implement any recovery scheme yet.
744 */
745 static void
746 sge_timer_cb(void *arg)
747 {
748 adapter_t *sc = arg;
749 struct port_info *p;
750 struct sge_qset *qs;
751 struct sge_txq *txq;
752 int i, j;
753 int reclaim_eth, reclaim_ofl, refill_rx;
754
755 for (i = 0; i < sc->params.nports; i++)
756 for (j = 0; j < sc->port[i].nqsets; j++) {
757 qs = &sc->sge.qs[i + j];
758 txq = &qs->txq[0];
759 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned;
760 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
761 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
762 (qs->fl[1].credits < qs->fl[1].size));
763 if (reclaim_eth || reclaim_ofl || refill_rx) {
764 p = &sc->port[i];
765 taskqueue_enqueue(p->tq, &p->timer_reclaim_task);
766 break;
767 }
768 }
769 if (sc->params.nports > 2) {
770 int i;
771
772 for_each_port(sc, i) {
773 struct port_info *pi = &sc->port[i];
774
775 t3_write_reg(sc, A_SG_KDOORBELL,
776 F_SELEGRCNTX |
777 (FW_TUNNEL_SGEEC_START + pi->first_qset));
778 }
779 }
780 if (sc->open_device_map != 0)
781 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
782 }
783
784 /*
785 * This is meant to be a catch-all function to keep sge state private
786 * to sge.c
787 *
788 */
789 int
790 t3_sge_init_adapter(adapter_t *sc)
791 {
792 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
793 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
794 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
795 return (0);
796 }
797
798 int
799 t3_sge_init_port(struct port_info *p)
800 {
801 TASK_INIT(&p->timer_reclaim_task, 0, sge_timer_reclaim, p);
802 return (0);
803 }
804
805 void
806 t3_sge_deinit_sw(adapter_t *sc)
807 {
808 int i;
809
810 callout_drain(&sc->sge_timer_ch);
811 if (sc->tq)
812 taskqueue_drain(sc->tq, &sc->slow_intr_task);
813 for (i = 0; i < sc->params.nports; i++)
814 if (sc->port[i].tq != NULL)
815 taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task);
816 }
817
818 /**
819 * refill_rspq - replenish an SGE response queue
820 * @adapter: the adapter
821 * @q: the response queue to replenish
822 * @credits: how many new responses to make available
823 *
824 * Replenishes a response queue by making the supplied number of responses
825 * available to HW.
826 */
827 static __inline void
828 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
829 {
830
831 /* mbufs are allocated on demand when a rspq entry is processed. */
832 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
833 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
834 }
835
836 static __inline void
837 sge_txq_reclaim_(struct sge_txq *txq)
838 {
839 int reclaimable, i, n;
840 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
841 struct port_info *p;
842
843 p = txq->port;
844 reclaim_more:
845 n = 0;
846 reclaimable = desc_reclaimable(txq);
847 if (reclaimable > 0 && mtx_trylock(&txq->lock)) {
848 n = reclaim_completed_tx(txq, TX_CLEAN_MAX_DESC, m_vec);
849 mtx_unlock(&txq->lock);
850 }
851 if (n == 0)
852 return;
853
854 for (i = 0; i < n; i++) {
855 m_freem_vec(m_vec[i]);
856 }
857 if (p && p->ifp->if_drv_flags & IFF_DRV_OACTIVE &&
858 txq->size - txq->in_use >= TX_START_MAX_DESC) {
859 txq_fills++;
860 p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
861 taskqueue_enqueue(p->tq, &p->start_task);
862 }
863
864 if (n)
865 goto reclaim_more;
866 }
867
868 static void
869 sge_txq_reclaim_handler(void *arg, int ncount)
870 {
871 struct sge_txq *q = arg;
872
873 sge_txq_reclaim_(q);
874 }
875
876 static void
877 sge_timer_reclaim(void *arg, int ncount)
878 {
879 struct port_info *p = arg;
880 int i, nqsets = p->nqsets;
881 adapter_t *sc = p->adapter;
882 struct sge_qset *qs;
883 struct sge_txq *txq;
884 struct mtx *lock;
885
886 for (i = 0; i < nqsets; i++) {
887 qs = &sc->sge.qs[i];
888 txq = &qs->txq[TXQ_ETH];
889 sge_txq_reclaim_(txq);
890
891 txq = &qs->txq[TXQ_OFLD];
892 sge_txq_reclaim_(txq);
893
894 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
895 &sc->sge.qs[0].rspq.lock;
896
897 if (mtx_trylock(lock)) {
898 /* XXX currently assume that we are *NOT* polling */
899 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
900
901 if (qs->fl[0].credits < qs->fl[0].size - 16)
902 __refill_fl(sc, &qs->fl[0]);
903 if (qs->fl[1].credits < qs->fl[1].size - 16)
904 __refill_fl(sc, &qs->fl[1]);
905
906 if (status & (1 << qs->rspq.cntxt_id)) {
907 if (qs->rspq.credits) {
908 refill_rspq(sc, &qs->rspq, 1);
909 qs->rspq.credits--;
910 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
911 1 << qs->rspq.cntxt_id);
912 }
913 }
914 mtx_unlock(lock);
915 }
916 }
917 }
918
919 /**
920 * init_qset_cntxt - initialize an SGE queue set context info
921 * @qs: the queue set
922 * @id: the queue set id
923 *
924 * Initializes the TIDs and context ids for the queues of a queue set.
925 */
926 static void
927 init_qset_cntxt(struct sge_qset *qs, u_int id)
928 {
929
930 qs->rspq.cntxt_id = id;
931 qs->fl[0].cntxt_id = 2 * id;
932 qs->fl[1].cntxt_id = 2 * id + 1;
933 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
934 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
935 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
936 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
937 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
938 }
939
940
941 static void
942 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
943 {
944 txq->in_use += ndesc;
945 /*
946 * XXX we don't handle stopping of queue
947 * presumably start handles this when we bump against the end
948 */
949 txqs->gen = txq->gen;
950 txq->unacked += ndesc;
951 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3);
952 txq->unacked &= 7;
953 txqs->pidx = txq->pidx;
954 txq->pidx += ndesc;
955
956 if (txq->pidx >= txq->size) {
957 txq->pidx -= txq->size;
958 txq->gen ^= 1;
959 }
960
961 }
962
963 /**
964 * calc_tx_descs - calculate the number of Tx descriptors for a packet
965 * @m: the packet mbufs
966 * @nsegs: the number of segments
967 *
968 * Returns the number of Tx descriptors needed for the given Ethernet
969 * packet. Ethernet packets require addition of WR and CPL headers.
970 */
971 static __inline unsigned int
972 calc_tx_descs(const struct mbuf *m, int nsegs)
973 {
974 unsigned int flits;
975
976 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
977 return 1;
978
979 flits = sgl_len(nsegs) + 2;
980 #ifdef TSO_SUPPORTED
981 if (m->m_pkthdr.csum_flags & (CSUM_TSO))
982 flits++;
983 #endif
984 return flits_to_desc(flits);
985 }
986
987 static unsigned int
988 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
989 struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs)
990 {
991 struct mbuf *m0;
992 int err, pktlen;
993
994 m0 = *m;
995 pktlen = m0->m_pkthdr.len;
996
997 err = bus_dmamap_load_mvec_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0);
998 #ifdef DEBUG
999 if (err) {
1000 int n = 0;
1001 struct mbuf *mtmp = m0;
1002 while(mtmp) {
1003 n++;
1004 mtmp = mtmp->m_next;
1005 }
1006 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n",
1007 err, m0->m_pkthdr.len, n);
1008 }
1009 #endif
1010 if (err == EFBIG) {
1011 /* Too many segments, try to defrag */
1012 m0 = m_defrag(m0, M_DONTWAIT);
1013 if (m0 == NULL) {
1014 m_freem(*m);
1015 *m = NULL;
1016 return (ENOBUFS);
1017 }
1018 *m = m0;
1019 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0);
1020 }
1021
1022 if (err == ENOMEM) {
1023 return (err);
1024 }
1025
1026 if (err) {
1027 if (cxgb_debug)
1028 printf("map failure err=%d pktlen=%d\n", err, pktlen);
1029 m_freem_vec(m0);
1030 *m = NULL;
1031 return (err);
1032 }
1033
1034 bus_dmamap_sync(txq->entry_tag, stx->map, BUS_DMASYNC_PREWRITE);
1035 stx->flags |= TX_SW_DESC_MAPPED;
1036
1037 return (0);
1038 }
1039
1040 /**
1041 * make_sgl - populate a scatter/gather list for a packet
1042 * @sgp: the SGL to populate
1043 * @segs: the packet dma segments
1044 * @nsegs: the number of segments
1045 *
1046 * Generates a scatter/gather list for the buffers that make up a packet
1047 * and returns the SGL size in 8-byte words. The caller must size the SGL
1048 * appropriately.
1049 */
1050 static __inline void
1051 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1052 {
1053 int i, idx;
1054
1055 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) {
1056 if (i && idx == 0)
1057 ++sgp;
1058
1059 sgp->len[idx] = htobe32(segs[i].ds_len);
1060 sgp->addr[idx] = htobe64(segs[i].ds_addr);
1061 }
1062
1063 if (idx)
1064 sgp->len[idx] = 0;
1065 }
1066
1067 /**
1068 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1069 * @adap: the adapter
1070 * @q: the Tx queue
1071 *
1072 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
1073 * where the HW is going to sleep just after we checked, however,
1074 * then the interrupt handler will detect the outstanding TX packet
1075 * and ring the doorbell for us.
1076 *
1077 * When GTS is disabled we unconditionally ring the doorbell.
1078 */
1079 static __inline void
1080 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1081 {
1082 #if USE_GTS
1083 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1084 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1085 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1086 #ifdef T3_TRACE
1087 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1088 q->cntxt_id);
1089 #endif
1090 t3_write_reg(adap, A_SG_KDOORBELL,
1091 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1092 }
1093 #else
1094 wmb(); /* write descriptors before telling HW */
1095 t3_write_reg(adap, A_SG_KDOORBELL,
1096 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1097 #endif
1098 }
1099
1100 static __inline void
1101 wr_gen2(struct tx_desc *d, unsigned int gen)
1102 {
1103 #if SGE_NUM_GENBITS == 2
1104 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1105 #endif
1106 }
1107
1108
1109
1110 /**
1111 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1112 * @ndesc: number of Tx descriptors spanned by the SGL
1113 * @txd: first Tx descriptor to be written
1114 * @txqs: txq state (generation and producer index)
1115 * @txq: the SGE Tx queue
1116 * @sgl: the SGL
1117 * @flits: number of flits to the start of the SGL in the first descriptor
1118 * @sgl_flits: the SGL size in flits
1119 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1120 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1121 *
1122 * Write a work request header and an associated SGL. If the SGL is
1123 * small enough to fit into one Tx descriptor it has already been written
1124 * and we just need to write the WR header. Otherwise we distribute the
1125 * SGL across the number of descriptors it spans.
1126 */
1127
1128 static void
1129 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1130 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1131 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1132 {
1133
1134 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1135 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1136
1137 if (__predict_true(ndesc == 1)) {
1138 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1139 V_WR_SGLSFLT(flits)) | wr_hi;
1140 wmb();
1141 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1142 V_WR_GEN(txqs->gen)) | wr_lo;
1143 /* XXX gen? */
1144 wr_gen2(txd, txqs->gen);
1145 } else {
1146 unsigned int ogen = txqs->gen;
1147 const uint64_t *fp = (const uint64_t *)sgl;
1148 struct work_request_hdr *wp = wrp;
1149
1150 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1151 V_WR_SGLSFLT(flits)) | wr_hi;
1152
1153 while (sgl_flits) {
1154 unsigned int avail = WR_FLITS - flits;
1155
1156 if (avail > sgl_flits)
1157 avail = sgl_flits;
1158 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1159 sgl_flits -= avail;
1160 ndesc--;
1161 if (!sgl_flits)
1162 break;
1163
1164 fp += avail;
1165 txd++;
1166 txsd++;
1167 if (++txqs->pidx == txq->size) {
1168 txqs->pidx = 0;
1169 txqs->gen ^= 1;
1170 txd = txq->desc;
1171 txsd = txq->sdesc;
1172 }
1173
1174 /*
1175 * when the head of the mbuf chain
1176 * is freed all clusters will be freed
1177 * with it
1178 */
1179 txsd->m = NULL;
1180 wrp = (struct work_request_hdr *)txd;
1181 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1182 V_WR_SGLSFLT(1)) | wr_hi;
1183 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1184 sgl_flits + 1)) |
1185 V_WR_GEN(txqs->gen)) | wr_lo;
1186 wr_gen2(txd, txqs->gen);
1187 flits = 1;
1188 }
1189 wrp->wr_hi |= htonl(F_WR_EOP);
1190 wmb();
1191 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1192 wr_gen2((struct tx_desc *)wp, ogen);
1193 }
1194 }
1195
1196
1197 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1198 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1199
1200 int
1201 t3_encap(struct port_info *p, struct mbuf **m, int *free)
1202 {
1203 adapter_t *sc;
1204 struct mbuf *m0;
1205 struct sge_qset *qs;
1206 struct sge_txq *txq;
1207 struct tx_sw_desc *stx;
1208 struct txq_state txqs;
1209 unsigned int ndesc, flits, cntrl, mlen;
1210 int err, nsegs, tso_info = 0;
1211
1212 struct work_request_hdr *wrp;
1213 struct tx_sw_desc *txsd;
1214 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1215 bus_dma_segment_t segs[TX_MAX_SEGS];
1216 uint32_t wr_hi, wr_lo, sgl_flits;
1217
1218 struct tx_desc *txd;
1219 struct cpl_tx_pkt *cpl;
1220
1221 m0 = *m;
1222 sc = p->adapter;
1223
1224 DPRINTF("t3_encap port_id=%d qsidx=%d ", p->port_id, p->first_qset);
1225
1226 /* port_id=1 qsid=1 txpkt_intf=2 tx_chan=0 */
1227
1228 qs = &sc->sge.qs[p->first_qset];
1229
1230 txq = &qs->txq[TXQ_ETH];
1231 stx = &txq->sdesc[txq->pidx];
1232 txd = &txq->desc[txq->pidx];
1233 cpl = (struct cpl_tx_pkt *)txd;
1234 mlen = m0->m_pkthdr.len;
1235 cpl->len = htonl(mlen | 0x80000000);
1236
1237 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", mlen, p->txpkt_intf, p->tx_chan);
1238 /*
1239 * XXX handle checksum, TSO, and VLAN here
1240 *
1241 */
1242 cntrl = V_TXPKT_INTF(p->txpkt_intf);
1243
1244 /*
1245 * XXX need to add VLAN support for 6.x
1246 */
1247 #ifdef VLAN_SUPPORTED
1248 if (m0->m_flags & M_VLANTAG)
1249 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
1250 if (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1251 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1252 #endif
1253 if (tso_info) {
1254 int eth_type;
1255 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl;
1256 struct ip *ip;
1257 struct tcphdr *tcp;
1258 char *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */
1259
1260 txd->flit[2] = 0;
1261 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1262 hdr->cntrl = htonl(cntrl);
1263
1264 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1265 pkthdr = &tmp[0];
1266 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr);
1267 } else {
1268 pkthdr = mtod(m0, char *);
1269 }
1270
1271 if (__predict_false(m0->m_flags & M_VLANTAG)) {
1272 eth_type = CPL_ETH_II_VLAN;
1273 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1274 ETHER_VLAN_ENCAP_LEN);
1275 } else {
1276 eth_type = CPL_ETH_II;
1277 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1278 }
1279 tcp = (struct tcphdr *)((uint8_t *)ip +
1280 sizeof(*ip));
1281
1282 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1283 V_LSO_IPHDR_WORDS(ip->ip_hl) |
1284 V_LSO_TCPHDR_WORDS(tcp->th_off);
1285 hdr->lso_info = htonl(tso_info);
1286 flits = 3;
1287 } else {
1288 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1289 cpl->cntrl = htonl(cntrl);
1290
1291 if (mlen <= WR_LEN - sizeof(*cpl)) {
1292 txq_prod(txq, 1, &txqs);
1293 txq->sdesc[txqs.pidx].m = NULL;
1294
1295 if (m0->m_len == m0->m_pkthdr.len)
1296 memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen);
1297 else
1298 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1299
1300 *free = 1;
1301 flits = (mlen + 7) / 8 + 2;
1302 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1303 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1304 F_WR_SOP | F_WR_EOP | txqs.compl);
1305 wmb();
1306 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1307 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1308
1309 wr_gen2(txd, txqs.gen);
1310 check_ring_tx_db(sc, txq);
1311 return (0);
1312 }
1313 flits = 2;
1314 }
1315
1316 wrp = (struct work_request_hdr *)txd;
1317
1318 if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) {
1319 return (err);
1320 }
1321 m0 = *m;
1322 ndesc = calc_tx_descs(m0, nsegs);
1323
1324 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1325 make_sgl(sgp, segs, nsegs);
1326
1327 sgl_flits = sgl_len(nsegs);
1328
1329 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1330 txq_prod(txq, ndesc, &txqs);
1331 txsd = &txq->sdesc[txqs.pidx];
1332 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1333 wr_lo = htonl(V_WR_TID(txq->token));
1334 txsd->m = m0;
1335 m_set_priority(m0, txqs.pidx);
1336
1337 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1338 check_ring_tx_db(p->adapter, txq);
1339
1340 return (0);
1341 }
1342
1343
1344 /**
1345 * write_imm - write a packet into a Tx descriptor as immediate data
1346 * @d: the Tx descriptor to write
1347 * @m: the packet
1348 * @len: the length of packet data to write as immediate data
1349 * @gen: the generation bit value to write
1350 *
1351 * Writes a packet as immediate data into a Tx descriptor. The packet
1352 * contains a work request at its beginning. We must write the packet
1353 * carefully so the SGE doesn't read accidentally before it's written in
1354 * its entirety.
1355 */
1356 static __inline void
1357 write_imm(struct tx_desc *d, struct mbuf *m,
1358 unsigned int len, unsigned int gen)
1359 {
1360 struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1361 struct work_request_hdr *to = (struct work_request_hdr *)d;
1362
1363 memcpy(&to[1], &from[1], len - sizeof(*from));
1364 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1365 V_WR_BCNTLFLT(len & 7));
1366 wmb();
1367 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1368 V_WR_LEN((len + 7) / 8));
1369 wr_gen2(d, gen);
1370 m_freem(m);
1371 }
1372
1373 /**
1374 * check_desc_avail - check descriptor availability on a send queue
1375 * @adap: the adapter
1376 * @q: the TX queue
1377 * @m: the packet needing the descriptors
1378 * @ndesc: the number of Tx descriptors needed
1379 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1380 *
1381 * Checks if the requested number of Tx descriptors is available on an
1382 * SGE send queue. If the queue is already suspended or not enough
1383 * descriptors are available the packet is queued for later transmission.
1384 * Must be called with the Tx queue locked.
1385 *
1386 * Returns 0 if enough descriptors are available, 1 if there aren't
1387 * enough descriptors and the packet has been queued, and 2 if the caller
1388 * needs to retry because there weren't enough descriptors at the
1389 * beginning of the call but some freed up in the mean time.
1390 */
1391 static __inline int
1392 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1393 struct mbuf *m, unsigned int ndesc,
1394 unsigned int qid)
1395 {
1396 /*
1397 * XXX We currently only use this for checking the control queue
1398 * the control queue is only used for binding qsets which happens
1399 * at init time so we are guaranteed enough descriptors
1400 */
1401 if (__predict_false(!mbufq_empty(&q->sendq))) {
1402 addq_exit: mbufq_tail(&q->sendq, m);
1403 return 1;
1404 }
1405 if (__predict_false(q->size - q->in_use < ndesc)) {
1406
1407 struct sge_qset *qs = txq_to_qset(q, qid);
1408
1409 setbit(&qs->txq_stopped, qid);
1410 smp_mb();
1411
1412 if (should_restart_tx(q) &&
1413 test_and_clear_bit(qid, &qs->txq_stopped))
1414 return 2;
1415
1416 q->stops++;
1417 goto addq_exit;
1418 }
1419 return 0;
1420 }
1421
1422
1423 /**
1424 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1425 * @q: the SGE control Tx queue
1426 *
1427 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1428 * that send only immediate data (presently just the control queues) and
1429 * thus do not have any mbufs
1430 */
1431 static __inline void
1432 reclaim_completed_tx_imm(struct sge_txq *q)
1433 {
1434 unsigned int reclaim = q->processed - q->cleaned;
1435
1436 mtx_assert(&q->lock, MA_OWNED);
1437
1438 q->in_use -= reclaim;
1439 q->cleaned += reclaim;
1440 }
1441
1442 static __inline int
1443 immediate(const struct mbuf *m)
1444 {
1445 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
1446 }
1447
1448 /**
1449 * ctrl_xmit - send a packet through an SGE control Tx queue
1450 * @adap: the adapter
1451 * @q: the control queue
1452 * @m: the packet
1453 *
1454 * Send a packet through an SGE control Tx queue. Packets sent through
1455 * a control queue must fit entirely as immediate data in a single Tx
1456 * descriptor and have no page fragments.
1457 */
1458 static int
1459 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1460 {
1461 int ret;
1462 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1463
1464 if (__predict_false(!immediate(m))) {
1465 m_freem(m);
1466 return 0;
1467 }
1468
1469 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1470 wrp->wr_lo = htonl(V_WR_TID(q->token));
1471
1472 mtx_lock(&q->lock);
1473 again: reclaim_completed_tx_imm(q);
1474
1475 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1476 if (__predict_false(ret)) {
1477 if (ret == 1) {
1478 mtx_unlock(&q->lock);
1479 return (-1);
1480 }
1481 goto again;
1482 }
1483
1484 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1485
1486 q->in_use++;
1487 if (++q->pidx >= q->size) {
1488 q->pidx = 0;
1489 q->gen ^= 1;
1490 }
1491 mtx_unlock(&q->lock);
1492 wmb();
1493 t3_write_reg(adap, A_SG_KDOORBELL,
1494 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1495 return (0);
1496 }
1497
1498
1499 /**
1500 * restart_ctrlq - restart a suspended control queue
1501 * @qs: the queue set cotaining the control queue
1502 *
1503 * Resumes transmission on a suspended Tx control queue.
1504 */
1505 static void
1506 restart_ctrlq(void *data, int npending)
1507 {
1508 struct mbuf *m;
1509 struct sge_qset *qs = (struct sge_qset *)data;
1510 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1511 adapter_t *adap = qs->port->adapter;
1512
1513 mtx_lock(&q->lock);
1514 again: reclaim_completed_tx_imm(q);
1515
1516 while (q->in_use < q->size &&
1517 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1518
1519 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1520
1521 if (++q->pidx >= q->size) {
1522 q->pidx = 0;
1523 q->gen ^= 1;
1524 }
1525 q->in_use++;
1526 }
1527 if (!mbufq_empty(&q->sendq)) {
1528 setbit(&qs->txq_stopped, TXQ_CTRL);
1529 smp_mb();
1530
1531 if (should_restart_tx(q) &&
1532 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1533 goto again;
1534 q->stops++;
1535 }
1536 mtx_unlock(&q->lock);
1537 t3_write_reg(adap, A_SG_KDOORBELL,
1538 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1539 }
1540
1541
1542 /*
1543 * Send a management message through control queue 0
1544 */
1545 int
1546 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1547 {
1548 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1549 }
1550
1551 /**
1552 * free_qset - free the resources of an SGE queue set
1553 * @sc: the controller owning the queue set
1554 * @q: the queue set
1555 *
1556 * Release the HW and SW resources associated with an SGE queue set, such
1557 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
1558 * queue set must be quiesced prior to calling this.
1559 */
1560 static void
1561 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1562 {
1563 int i;
1564
1565 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1566 if (q->fl[i].desc) {
1567 mtx_lock(&sc->sge.reg_lock);
1568 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1569 mtx_unlock(&sc->sge.reg_lock);
1570 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1571 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
1572 q->fl[i].desc_map);
1573 bus_dma_tag_destroy(q->fl[i].desc_tag);
1574 bus_dma_tag_destroy(q->fl[i].entry_tag);
1575 }
1576 if (q->fl[i].sdesc) {
1577 free_rx_bufs(sc, &q->fl[i]);
1578 free(q->fl[i].sdesc, M_DEVBUF);
1579 }
1580 }
1581
1582 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1583 if (q->txq[i].desc) {
1584 mtx_lock(&sc->sge.reg_lock);
1585 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1586 mtx_unlock(&sc->sge.reg_lock);
1587 bus_dmamap_unload(q->txq[i].desc_tag,
1588 q->txq[i].desc_map);
1589 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
1590 q->txq[i].desc_map);
1591 bus_dma_tag_destroy(q->txq[i].desc_tag);
1592 bus_dma_tag_destroy(q->txq[i].entry_tag);
1593 MTX_DESTROY(&q->txq[i].lock);
1594 }
1595 if (q->txq[i].sdesc) {
1596 free(q->txq[i].sdesc, M_DEVBUF);
1597 }
1598 }
1599
1600 if (q->rspq.desc) {
1601 mtx_lock(&sc->sge.reg_lock);
1602 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1603 mtx_unlock(&sc->sge.reg_lock);
1604
1605 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1606 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
1607 q->rspq.desc_map);
1608 bus_dma_tag_destroy(q->rspq.desc_tag);
1609 MTX_DESTROY(&q->rspq.lock);
1610 }
1611
1612 bzero(q, sizeof(*q));
1613 }
1614
1615 /**
1616 * t3_free_sge_resources - free SGE resources
1617 * @sc: the adapter softc
1618 *
1619 * Frees resources used by the SGE queue sets.
1620 */
1621 void
1622 t3_free_sge_resources(adapter_t *sc)
1623 {
1624 int i, nqsets;
1625
1626 for (nqsets = i = 0; i < (sc)->params.nports; i++)
1627 nqsets += sc->port[i].nqsets;
1628
1629 for (i = 0; i < nqsets; ++i)
1630 t3_free_qset(sc, &sc->sge.qs[i]);
1631 }
1632
1633 /**
1634 * t3_sge_start - enable SGE
1635 * @sc: the controller softc
1636 *
1637 * Enables the SGE for DMAs. This is the last step in starting packet
1638 * transfers.
1639 */
1640 void
1641 t3_sge_start(adapter_t *sc)
1642 {
1643 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1644 }
1645
1646 /**
1647 * t3_sge_stop - disable SGE operation
1648 * @sc: the adapter
1649 *
1650 * Disables the DMA engine. This can be called in emeregencies (e.g.,
1651 * from error interrupts) or from normal process context. In the latter
1652 * case it also disables any pending queue restart tasklets. Note that
1653 * if it is called in interrupt context it cannot disable the restart
1654 * tasklets as it cannot wait, however the tasklets will have no effect
1655 * since the doorbells are disabled and the driver will call this again
1656 * later from process context, at which time the tasklets will be stopped
1657 * if they are still running.
1658 */
1659 void
1660 t3_sge_stop(adapter_t *sc)
1661 {
1662 int i, nqsets;
1663
1664 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1665
1666 if (sc->tq == NULL)
1667 return;
1668
1669 for (nqsets = i = 0; i < (sc)->params.nports; i++)
1670 nqsets += sc->port[i].nqsets;
1671
1672 for (i = 0; i < nqsets; ++i) {
1673 struct sge_qset *qs = &sc->sge.qs[i];
1674
1675 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
1676 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
1677 }
1678 }
1679
1680
1681 /**
1682 * free_tx_desc - reclaims Tx descriptors and their buffers
1683 * @adapter: the adapter
1684 * @q: the Tx queue to reclaim descriptors from
1685 * @n: the number of descriptors to reclaim
1686 *
1687 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1688 * Tx buffers. Called with the Tx queue lock held.
1689 */
1690 int
1691 free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec)
1692 {
1693 struct tx_sw_desc *d;
1694 unsigned int cidx = q->cidx;
1695 int nbufs = 0;
1696
1697 #ifdef T3_TRACE
1698 T3_TRACE2(sc->tb[q->cntxt_id & 7],
1699 "reclaiming %u Tx descriptors at cidx %u", n, cidx);
1700 #endif
1701 d = &q->sdesc[cidx];
1702
1703 while (n-- > 0) {
1704 DPRINTF("cidx=%d d=%p\n", cidx, d);
1705 if (d->m) {
1706 if (d->flags & TX_SW_DESC_MAPPED) {
1707 bus_dmamap_unload(q->entry_tag, d->map);
1708 bus_dmamap_destroy(q->entry_tag, d->map);
1709 d->flags &= ~TX_SW_DESC_MAPPED;
1710 }
1711 if (m_get_priority(d->m) == cidx) {
1712 m_vec[nbufs] = d->m;
1713 d->m = NULL;
1714 nbufs++;
1715 } else {
1716 printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx);
1717 }
1718 }
1719 ++d;
1720 if (++cidx == q->size) {
1721 cidx = 0;
1722 d = q->sdesc;
1723 }
1724 }
1725 q->cidx = cidx;
1726
1727 return (nbufs);
1728 }
1729
1730 /**
1731 * is_new_response - check if a response is newly written
1732 * @r: the response descriptor
1733 * @q: the response queue
1734 *
1735 * Returns true if a response descriptor contains a yet unprocessed
1736 * response.
1737 */
1738 static __inline int
1739 is_new_response(const struct rsp_desc *r,
1740 const struct sge_rspq *q)
1741 {
1742 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1743 }
1744
1745 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1746 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1747 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1748 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1749 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1750
1751 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1752 #define NOMEM_INTR_DELAY 2500
1753
1754 /**
1755 * write_ofld_wr - write an offload work request
1756 * @adap: the adapter
1757 * @m: the packet to send
1758 * @q: the Tx queue
1759 * @pidx: index of the first Tx descriptor to write
1760 * @gen: the generation value to use
1761 * @ndesc: number of descriptors the packet will occupy
1762 *
1763 * Write an offload work request to send the supplied packet. The packet
1764 * data already carry the work request with most fields populated.
1765 */
1766 static void
1767 write_ofld_wr(adapter_t *adap, struct mbuf *m,
1768 struct sge_txq *q, unsigned int pidx,
1769 unsigned int gen, unsigned int ndesc,
1770 bus_dma_segment_t *segs, unsigned int nsegs)
1771 {
1772 unsigned int sgl_flits, flits;
1773 struct work_request_hdr *from;
1774 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1775 struct tx_desc *d = &q->desc[pidx];
1776 struct txq_state txqs;
1777
1778 if (immediate(m)) {
1779 q->sdesc[pidx].m = NULL;
1780 write_imm(d, m, m->m_len, gen);
1781 return;
1782 }
1783
1784 /* Only TX_DATA builds SGLs */
1785
1786 from = mtod(m, struct work_request_hdr *);
1787 memcpy(&d->flit[1], &from[1],
1788 (uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *) - sizeof(*from));
1789
1790 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8;
1791 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1792
1793 make_sgl(sgp, segs, nsegs);
1794 sgl_flits = sgl_len(nsegs);
1795
1796 txqs.gen = q->gen;
1797 txqs.pidx = q->pidx;
1798 txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1799 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
1800 from->wr_hi, from->wr_lo);
1801 }
1802
1803 /**
1804 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1805 * @m: the packet
1806 *
1807 * Returns the number of Tx descriptors needed for the given offload
1808 * packet. These packets are already fully constructed.
1809 */
1810 static __inline unsigned int
1811 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
1812 {
1813 unsigned int flits, cnt = 0;
1814
1815
1816 if (m->m_len <= WR_LEN)
1817 return 1; /* packet fits as immediate data */
1818
1819 if (m->m_flags & M_IOVEC)
1820 cnt = mtomv(m)->mv_count;
1821
1822 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; /* headers */
1823
1824 return flits_to_desc(flits + sgl_len(cnt));
1825 }
1826
1827 /**
1828 * ofld_xmit - send a packet through an offload queue
1829 * @adap: the adapter
1830 * @q: the Tx offload queue
1831 * @m: the packet
1832 *
1833 * Send an offload packet through an SGE offload queue.
1834 */
1835 static int
1836 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1837 {
1838 int ret, nsegs;
1839 unsigned int ndesc;
1840 unsigned int pidx, gen;
1841 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
1842 bus_dma_segment_t segs[TX_MAX_SEGS];
1843 int i, cleaned;
1844 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
1845
1846 mtx_lock(&q->lock);
1847 if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) {
1848 mtx_unlock(&q->lock);
1849 return (ret);
1850 }
1851 ndesc = calc_tx_descs_ofld(m, nsegs);
1852 again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec);
1853
1854 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
1855 if (__predict_false(ret)) {
1856 if (ret == 1) {
1857 m_set_priority(m, ndesc); /* save for restart */
1858 mtx_unlock(&q->lock);
1859 return EINTR;
1860 }
1861 goto again;
1862 }
1863
1864 gen = q->gen;
1865 q->in_use += ndesc;
1866 pidx = q->pidx;
1867 q->pidx += ndesc;
1868 if (q->pidx >= q->size) {
1869 q->pidx -= q->size;
1870 q->gen ^= 1;
1871 }
1872 #ifdef T3_TRACE
1873 T3_TRACE5(adap->tb[q->cntxt_id & 7],
1874 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
1875 ndesc, pidx, skb->len, skb->len - skb->data_len,
1876 skb_shinfo(skb)->nr_frags);
1877 #endif
1878 mtx_unlock(&q->lock);
1879
1880 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
1881 check_ring_tx_db(adap, q);
1882
1883 for (i = 0; i < cleaned; i++) {
1884 m_freem_vec(m_vec[i]);
1885 }
1886 return (0);
1887 }
1888
1889 /**
1890 * restart_offloadq - restart a suspended offload queue
1891 * @qs: the queue set cotaining the offload queue
1892 *
1893 * Resumes transmission on a suspended Tx offload queue.
1894 */
1895 static void
1896 restart_offloadq(void *data, int npending)
1897 {
1898
1899 struct mbuf *m;
1900 struct sge_qset *qs = data;
1901 struct sge_txq *q = &qs->txq[TXQ_OFLD];
1902 adapter_t *adap = qs->port->adapter;
1903 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
1904 bus_dma_segment_t segs[TX_MAX_SEGS];
1905 int nsegs, i, cleaned;
1906 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
1907
1908 mtx_lock(&q->lock);
1909 again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec);
1910
1911 while ((m = mbufq_peek(&q->sendq)) != NULL) {
1912 unsigned int gen, pidx;
1913 unsigned int ndesc = m_get_priority(m);
1914
1915 if (__predict_false(q->size - q->in_use < ndesc)) {
1916 setbit(&qs->txq_stopped, TXQ_OFLD);
1917 smp_mb();
1918
1919 if (should_restart_tx(q) &&
1920 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1921 goto again;
1922 q->stops++;
1923 break;
1924 }
1925
1926 gen = q->gen;
1927 q->in_use += ndesc;
1928 pidx = q->pidx;
1929 q->pidx += ndesc;
1930 if (q->pidx >= q->size) {
1931 q->pidx -= q->size;
1932 q->gen ^= 1;
1933 }
1934
1935 (void)mbufq_dequeue(&q->sendq);
1936 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
1937 mtx_unlock(&q->lock);
1938 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
1939 mtx_lock(&q->lock);
1940 }
1941 mtx_unlock(&q->lock);
1942
1943 #if USE_GTS
1944 set_bit(TXQ_RUNNING, &q->flags);
1945 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1946 #endif
1947 t3_write_reg(adap, A_SG_KDOORBELL,
1948 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1949
1950 for (i = 0; i < cleaned; i++) {
1951 m_freem_vec(m_vec[i]);
1952 }
1953 }
1954
1955 /**
1956 * queue_set - return the queue set a packet should use
1957 * @m: the packet
1958 *
1959 * Maps a packet to the SGE queue set it should use. The desired queue
1960 * set is carried in bits 1-3 in the packet's priority.
1961 */
1962 static __inline int
1963 queue_set(const struct mbuf *m)
1964 {
1965 return m_get_priority(m) >> 1;
1966 }
1967
1968 /**
1969 * is_ctrl_pkt - return whether an offload packet is a control packet
1970 * @m: the packet
1971 *
1972 * Determines whether an offload packet should use an OFLD or a CTRL
1973 * Tx queue. This is indicated by bit 0 in the packet's priority.
1974 */
1975 static __inline int
1976 is_ctrl_pkt(const struct mbuf *m)
1977 {
1978 return m_get_priority(m) & 1;
1979 }
1980
1981 /**
1982 * t3_offload_tx - send an offload packet
1983 * @tdev: the offload device to send to
1984 * @m: the packet
1985 *
1986 * Sends an offload packet. We use the packet priority to select the
1987 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1988 * should be sent as regular or control, bits 1-3 select the queue set.
1989 */
1990 int
1991 t3_offload_tx(struct toedev *tdev, struct mbuf *m)
1992 {
1993 adapter_t *adap = tdev2adap(tdev);
1994 struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
1995
1996 if (__predict_false(is_ctrl_pkt(m)))
1997 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
1998
1999 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
2000 }
2001
2002 /**
2003 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2004 * @tdev: the offload device that will be receiving the packets
2005 * @q: the SGE response queue that assembled the bundle
2006 * @m: the partial bundle
2007 * @n: the number of packets in the bundle
2008 *
2009 * Delivers a (partial) bundle of Rx offload packets to an offload device.
2010 */
2011 static __inline void
2012 deliver_partial_bundle(struct toedev *tdev,
2013 struct sge_rspq *q,
2014 struct mbuf *mbufs[], int n)
2015 {
2016 if (n) {
2017 q->offload_bundles++;
2018 cxgb_ofld_recv(tdev, mbufs, n);
2019 }
2020 }
2021
2022 static __inline int
2023 rx_offload(struct toedev *tdev, struct sge_rspq *rq,
2024 struct mbuf *m, struct mbuf *rx_gather[],
2025 unsigned int gather_idx)
2026 {
2027 rq->offload_pkts++;
2028 m->m_pkthdr.header = mtod(m, void *);
2029
2030 rx_gather[gather_idx++] = m;
2031 if (gather_idx == RX_BUNDLE_SIZE) {
2032 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2033 gather_idx = 0;
2034 rq->offload_bundles++;
2035 }
2036 return (gather_idx);
2037 }
2038
2039 static void
2040 restart_tx(struct sge_qset *qs)
2041 {
2042 struct adapter *sc = qs->port->adapter;
2043
2044 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2045 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2046 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2047 qs->txq[TXQ_OFLD].restarts++;
2048 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2049 }
2050 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2051 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2052 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2053 qs->txq[TXQ_CTRL].restarts++;
2054 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2055 }
2056 }
2057
2058 /**
2059 * t3_sge_alloc_qset - initialize an SGE queue set
2060 * @sc: the controller softc
2061 * @id: the queue set id
2062 * @nports: how many Ethernet ports will be using this queue set
2063 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2064 * @p: configuration parameters for this queue set
2065 * @ntxq: number of Tx queues for the queue set
2066 * @pi: port info for queue set
2067 *
2068 * Allocate resources and initialize an SGE queue set. A queue set
2069 * comprises a response queue, two Rx free-buffer queues, and up to 3
2070 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2071 * queue, offload queue, and control queue.
2072 */
2073 int
2074 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2075 const struct qset_params *p, int ntxq, struct port_info *pi)
2076 {
2077 struct sge_qset *q = &sc->sge.qs[id];
2078 int i, ret = 0;
2079
2080 init_qset_cntxt(q, id);
2081
2082 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2083 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2084 &q->fl[0].desc, &q->fl[0].sdesc,
2085 &q->fl[0].desc_tag, &q->fl[0].desc_map,
2086 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2087 printf("error %d from alloc ring fl0\n", ret);
2088 goto err;
2089 }
2090
2091 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2092 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2093 &q->fl[1].desc, &q->fl[1].sdesc,
2094 &q->fl[1].desc_tag, &q->fl[1].desc_map,
2095 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2096 printf("error %d from alloc ring fl1\n", ret);
2097 goto err;
2098 }
2099
2100 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2101 &q->rspq.phys_addr, &q->rspq.desc, NULL,
2102 &q->rspq.desc_tag, &q->rspq.desc_map,
2103 NULL, NULL)) != 0) {
2104 printf("error %d from alloc ring rspq\n", ret);
2105 goto err;
2106 }
2107
2108 for (i = 0; i < ntxq; ++i) {
2109 /*
2110 * The control queue always uses immediate data so does not
2111 * need to keep track of any mbufs.
2112 * XXX Placeholder for future TOE support.
2113 */
2114 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2115
2116 if ((ret = alloc_ring(sc, p->txq_size[i],
2117 sizeof(struct tx_desc), sz,
2118 &q->txq[i].phys_addr, &q->txq[i].desc,
2119 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2120 &q->txq[i].desc_map,
2121 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2122 printf("error %d from alloc ring tx %i\n", ret, i);
2123 goto err;
2124 }
2125 mbufq_init(&q->txq[i].sendq);
2126 q->txq[i].gen = 1;
2127 q->txq[i].size = p->txq_size[i];
2128 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
2129 device_get_unit(sc->dev), irq_vec_idx, i);
2130 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
2131 }
2132
2133 q->txq[TXQ_ETH].port = pi;
2134
2135 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2136 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2137 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]);
2138 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]);
2139
2140 q->fl[0].gen = q->fl[1].gen = 1;
2141 q->fl[0].size = p->fl_size;
2142 q->fl[1].size = p->jumbo_size;
2143
2144 q->rspq.gen = 1;
2145 q->rspq.cidx = 0;
2146 q->rspq.size = p->rspq_size;
2147
2148 q->txq[TXQ_ETH].stop_thres = nports *
2149 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2150
2151 q->fl[0].buf_size = MCLBYTES;
2152 q->fl[0].zone = zone_clust;
2153 q->fl[0].type = EXT_CLUSTER;
2154 q->fl[1].buf_size = MJUMPAGESIZE;
2155 q->fl[1].zone = zone_jumbop;
2156 q->fl[1].type = EXT_JUMBOP;
2157
2158 q->lro.enabled = lro_default;
2159
2160 mtx_lock(&sc->sge.reg_lock);
2161 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2162 q->rspq.phys_addr, q->rspq.size,
2163 q->fl[0].buf_size, 1, 0);
2164 if (ret) {
2165 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2166 goto err_unlock;
2167 }
2168
2169 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2170 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2171 q->fl[i].phys_addr, q->fl[i].size,
2172 q->fl[i].buf_size, p->cong_thres, 1,
2173 0);
2174 if (ret) {
2175 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2176 goto err_unlock;
2177 }
2178 }
2179
2180 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2181 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2182 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2183 1, 0);
2184 if (ret) {
2185 printf("error %d from t3_sge_init_ecntxt\n", ret);
2186 goto err_unlock;
2187 }
2188
2189 if (ntxq > 1) {
2190 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2191 USE_GTS, SGE_CNTXT_OFLD, id,
2192 q->txq[TXQ_OFLD].phys_addr,
2193 q->txq[TXQ_OFLD].size, 0, 1, 0);
2194 if (ret) {
2195 printf("error %d from t3_sge_init_ecntxt\n", ret);
2196 goto err_unlock;
2197 }
2198 }
2199
2200 if (ntxq > 2) {
2201 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2202 SGE_CNTXT_CTRL, id,
2203 q->txq[TXQ_CTRL].phys_addr,
2204 q->txq[TXQ_CTRL].size,
2205 q->txq[TXQ_CTRL].token, 1, 0);
2206 if (ret) {
2207 printf("error %d from t3_sge_init_ecntxt\n", ret);
2208 goto err_unlock;
2209 }
2210 }
2211
2212 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2213 device_get_unit(sc->dev), irq_vec_idx);
2214 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2215
2216 mtx_unlock(&sc->sge.reg_lock);
2217 t3_update_qset_coalesce(q, p);
2218 q->port = pi;
2219
2220 refill_fl(sc, &q->fl[0], q->fl[0].size);
2221 refill_fl(sc, &q->fl[1], q->fl[1].size);
2222 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2223
2224 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2225 V_NEWTIMER(q->rspq.holdoff_tmr));
2226
2227 return (0);
2228
2229 err_unlock:
2230 mtx_unlock(&sc->sge.reg_lock);
2231 err:
2232 t3_free_qset(sc, q);
2233
2234 return (ret);
2235 }
2236
2237 void
2238 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2239 {
2240 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2241 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2242 struct ifnet *ifp = pi->ifp;
2243
2244 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2245
2246 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2247 cpl->csum_valid && cpl->csum == 0xffff) {
2248 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2249 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2250 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2251 m->m_pkthdr.csum_data = 0xffff;
2252 }
2253 /*
2254 * XXX need to add VLAN support for 6.x
2255 */
2256 #ifdef VLAN_SUPPORTED
2257 if (__predict_false(cpl->vlan_valid)) {
2258 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2259 m->m_flags |= M_VLANTAG;
2260 }
2261 #endif
2262
2263 m->m_pkthdr.rcvif = ifp;
2264 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2265 m_explode(m);
2266 /*
2267 * adjust after conversion to mbuf chain
2268 */
2269 m_adj(m, sizeof(*cpl) + ethpad);
2270
2271 (*ifp->if_input)(ifp, m);
2272 }
2273
2274 /**
2275 * get_packet - return the next ingress packet buffer from a free list
2276 * @adap: the adapter that received the packet
2277 * @drop_thres: # of remaining buffers before we start dropping packets
2278 * @qs: the qset that the SGE free list holding the packet belongs to
2279 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2280 * @r: response descriptor
2281 *
2282 * Get the next packet from a free list and complete setup of the
2283 * sk_buff. If the packet is small we make a copy and recycle the
2284 * original buffer, otherwise we use the original buffer itself. If a
2285 * positive drop threshold is supplied packets are dropped and their
2286 * buffers recycled if (a) the number of remaining buffers is under the
2287 * threshold and the packet is too big to copy, or (b) the packet should
2288 * be copied but there is no memory for the copy.
2289 */
2290 #ifdef DISABLE_MBUF_IOVEC
2291
2292 static int
2293 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2294 struct t3_mbuf_hdr *mh, struct rsp_desc *r, struct mbuf *m)
2295 {
2296
2297 unsigned int len_cq = ntohl(r->len_cq);
2298 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2299 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2300 uint32_t len = G_RSPD_LEN(len_cq);
2301 uint32_t flags = ntohl(r->flags);
2302 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2303 int ret = 0;
2304
2305 prefetch(sd->cl);
2306
2307 fl->credits--;
2308 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2309 bus_dmamap_unload(fl->entry_tag, sd->map);
2310
2311 m_cljset(m, sd->cl, fl->type);
2312 m->m_len = len;
2313
2314 switch(sopeop) {
2315 case RSPQ_SOP_EOP:
2316 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2317 mh->mh_head = mh->mh_tail = m;
2318 m->m_pkthdr.len = len;
2319 m->m_flags |= M_PKTHDR;
2320 ret = 1;
2321 break;
2322 case RSPQ_NSOP_NEOP:
2323 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2324 m->m_flags &= ~M_PKTHDR;
2325 if (mh->mh_tail == NULL) {
2326 if (cxgb_debug)
2327 printf("discarding intermediate descriptor entry\n");
2328 m_freem(m);
2329 break;
2330 }
2331 mh->mh_tail->m_next = m;
2332 mh->mh_tail = m;
2333 mh->mh_head->m_pkthdr.len += len;
2334 ret = 0;
2335 break;
2336 case RSPQ_SOP:
2337 DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2338 m->m_pkthdr.len = len;
2339 mh->mh_head = mh->mh_tail = m;
2340 m->m_flags |= M_PKTHDR;
2341 ret = 0;
2342 break;
2343 case RSPQ_EOP:
2344 DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2345 m->m_flags &= ~M_PKTHDR;
2346 mh->mh_head->m_pkthdr.len += len;
2347 mh->mh_tail->m_next = m;
2348 mh->mh_tail = m;
2349 ret = 1;
2350 break;
2351 }
2352 if (++fl->cidx == fl->size)
2353 fl->cidx = 0;
2354
2355 return (ret);
2356 }
2357
2358 #else
2359 static int
2360 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2361 struct mbuf *m, struct rsp_desc *r)
2362 {
2363
2364 unsigned int len_cq = ntohl(r->len_cq);
2365 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2366 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2367 uint32_t len = G_RSPD_LEN(len_cq);
2368 uint32_t flags = ntohl(r->flags);
2369 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2370 void *cl;
2371 int ret = 0;
2372
2373 prefetch(sd->cl);
2374
2375 fl->credits--;
2376 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2377
2378 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2379 cl = mtod(m, void *);
2380 memcpy(cl, sd->cl, len);
2381 recycle_rx_buf(adap, fl, fl->cidx);
2382 } else {
2383 cl = sd->cl;
2384 bus_dmamap_unload(fl->entry_tag, sd->map);
2385 }
2386 switch(sopeop) {
2387 case RSPQ_SOP_EOP:
2388 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2389 if (cl == sd->cl)
2390 m_cljset(m, cl, fl->type);
2391 m->m_len = m->m_pkthdr.len = len;
2392 ret = 1;
2393 goto done;
2394 break;
2395 case RSPQ_NSOP_NEOP:
2396 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2397 ret = 0;
2398 break;
2399 case RSPQ_SOP:
2400 DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2401 m_iovinit(m);
2402 ret = 0;
2403 break;
2404 case RSPQ_EOP:
2405 DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2406 ret = 1;
2407 break;
2408 }
2409 m_iovappend(m, cl, fl->buf_size, len, 0);
2410
2411 done:
2412 if (++fl->cidx == fl->size)
2413 fl->cidx = 0;
2414
2415 return (ret);
2416 }
2417 #endif
2418 /**
2419 * handle_rsp_cntrl_info - handles control information in a response
2420 * @qs: the queue set corresponding to the response
2421 * @flags: the response control flags
2422 *
2423 * Handles the control information of an SGE response, such as GTS
2424 * indications and completion credits for the queue set's Tx queues.
2425 * HW coalesces credits, we don't do any extra SW coalescing.
2426 */
2427 static __inline void
2428 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2429 {
2430 unsigned int credits;
2431
2432 #if USE_GTS
2433 if (flags & F_RSPD_TXQ0_GTS)
2434 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2435 #endif
2436 credits = G_RSPD_TXQ0_CR(flags);
2437 if (credits) {
2438 qs->txq[TXQ_ETH].processed += credits;
2439 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC)
2440 taskqueue_enqueue(qs->port->adapter->tq,
2441 &qs->port->timer_reclaim_task);
2442 }
2443
2444 credits = G_RSPD_TXQ2_CR(flags);
2445 if (credits)
2446 qs->txq[TXQ_CTRL].processed += credits;
2447
2448 # if USE_GTS
2449 if (flags & F_RSPD_TXQ1_GTS)
2450 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2451 # endif
2452 credits = G_RSPD_TXQ1_CR(flags);
2453 if (credits)
2454 qs->txq[TXQ_OFLD].processed += credits;
2455 }
2456
2457 static void
2458 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2459 unsigned int sleeping)
2460 {
2461 ;
2462 }
2463
2464 /**
2465 * process_responses - process responses from an SGE response queue
2466 * @adap: the adapter
2467 * @qs: the queue set to which the response queue belongs
2468 * @budget: how many responses can be processed in this round
2469 *
2470 * Process responses from an SGE response queue up to the supplied budget.
2471 * Responses include received packets as well as credits and other events
2472 * for the queues that belong to the response queue's queue set.
2473 * A negative budget is effectively unlimited.
2474 *
2475 * Additionally choose the interrupt holdoff time for the next interrupt
2476 * on this queue. If the system is under memory shortage use a fairly
2477 * long delay to help recovery.
2478 */
2479 static int
2480 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2481 {
2482 struct sge_rspq *rspq = &qs->rspq;
2483 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2484 int budget_left = budget;
2485 unsigned int sleeping = 0;
2486 int lro = qs->lro.enabled;
2487 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2488 int ngathered = 0;
2489 #ifdef DEBUG
2490 static int last_holdoff = 0;
2491 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2492 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2493 last_holdoff = rspq->holdoff_tmr;
2494 }
2495 #endif
2496 rspq->next_holdoff = rspq->holdoff_tmr;
2497
2498 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2499 int eth, eop = 0, ethpad = 0;
2500 uint32_t flags = ntohl(r->flags);
2501 uint32_t rss_csum = *(const uint32_t *)r;
2502 uint32_t rss_hash = r->rss_hdr.rss_hash_val;
2503
2504 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2505
2506 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2507 /* XXX */
2508 printf("async notification\n");
2509
2510 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2511 #ifdef DISABLE_MBUF_IOVEC
2512
2513 if (cxgb_debug)
2514 printf("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", r->rss_hdr.opcode, rspq->cidx);
2515
2516 if(get_imm_packet(adap, r, &rspq->rspq_mh) == 0) {
2517 rspq->next_holdoff = NOMEM_INTR_DELAY;
2518 budget_left--;
2519 break;
2520 } else {
2521 eop = 1;
2522 }
2523 #else
2524 struct mbuf *m = NULL;
2525
2526 if (rspq->rspq_mbuf == NULL)
2527 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA);
2528 else
2529 m = m_gethdr(M_DONTWAIT, MT_DATA);
2530
2531 /*
2532 * XXX revisit me
2533 */
2534 if (rspq->rspq_mbuf == NULL && m == NULL) {
2535 rspq->next_holdoff = NOMEM_INTR_DELAY;
2536 budget_left--;
2537 break;
2538 }
2539 if (get_imm_packet(adap, r, rspq->rspq_mbuf, m, flags))
2540 goto skip;
2541 eop = 1;
2542 #endif
2543 rspq->imm_data++;
2544 } else if (r->len_cq) {
2545 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2546
2547 #ifdef DISABLE_MBUF_IOVEC
2548 struct mbuf *m;
2549 m = m_gethdr(M_NOWAIT, MT_DATA);
2550
2551 if (m == NULL) {
2552 log(LOG_WARNING, "failed to get mbuf for packet\n");
2553 break;
2554 }
2555
2556 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r, m);
2557 #else
2558 if (rspq->rspq_mbuf == NULL)
2559 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA);
2560 if (rspq->rspq_mbuf == NULL) {
2561 log(LOG_WARNING, "failed to get mbuf for packet\n");
2562 break;
2563 }
2564 eop = get_packet(adap, drop_thresh, qs, rspq->rspq_mbuf, r);
2565 #endif
2566 ethpad = 2;
2567 } else {
2568 DPRINTF("pure response\n");
2569 rspq->pure_rsps++;
2570 }
2571
2572 if (flags & RSPD_CTRL_MASK) {
2573 sleeping |= flags & RSPD_GTS_MASK;
2574 handle_rsp_cntrl_info(qs, flags);
2575 }
2576 #ifndef DISABLE_MBUF_IOVEC
2577 skip:
2578 #endif
2579 r++;
2580 if (__predict_false(++rspq->cidx == rspq->size)) {
2581 rspq->cidx = 0;
2582 rspq->gen ^= 1;
2583 r = rspq->desc;
2584 }
2585
2586 prefetch(r);
2587 if (++rspq->credits >= (rspq->size / 4)) {
2588 refill_rspq(adap, rspq, rspq->credits);
2589 rspq->credits = 0;
2590 }
2591
2592 if (eop) {
2593 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *));
2594 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES);
2595
2596 if (eth) {
2597 t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad,
2598 rss_hash, rss_csum, lro);
2599
2600 rspq->rspq_mh.mh_head = NULL;
2601 } else {
2602 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
2603 /*
2604 * XXX size mismatch
2605 */
2606 m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
2607
2608 ngathered = rx_offload(&adap->tdev, rspq,
2609 rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
2610 }
2611 __refill_fl(adap, &qs->fl[0]);
2612 __refill_fl(adap, &qs->fl[1]);
2613
2614 }
2615 --budget_left;
2616 }
2617
2618 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
2619 t3_lro_flush(adap, qs, &qs->lro);
2620
2621 if (sleeping)
2622 check_ring_db(adap, qs, sleeping);
2623
2624 smp_mb(); /* commit Tx queue processed updates */
2625 if (__predict_false(qs->txq_stopped != 0))
2626 restart_tx(qs);
2627
2628 budget -= budget_left;
2629 return (budget);
2630 }
2631
2632 /*
2633 * A helper function that processes responses and issues GTS.
2634 */
2635 static __inline int
2636 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2637 {
2638 int work;
2639 static int last_holdoff = 0;
2640
2641 work = process_responses(adap, rspq_to_qset(rq), -1);
2642
2643 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
2644 printf("next_holdoff=%d\n", rq->next_holdoff);
2645 last_holdoff = rq->next_holdoff;
2646 }
2647 if (work)
2648 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2649 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2650 return work;
2651 }
2652
2653
2654 /*
2655 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2656 * Handles data events from SGE response queues as well as error and other
2657 * async events as they all use the same interrupt pin. We use one SGE
2658 * response queue per port in this mode and protect all response queues with
2659 * queue 0's lock.
2660 */
2661 void
2662 t3b_intr(void *data)
2663 {
2664 uint32_t i, map;
2665 adapter_t *adap = data;
2666 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2667
2668 t3_write_reg(adap, A_PL_CLI, 0);
2669 map = t3_read_reg(adap, A_SG_DATA_INTR);
2670
2671 if (!map)
2672 return;
2673
2674 if (__predict_false(map & F_ERRINTR))
2675 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2676
2677 mtx_lock(&q0->lock);
2678 for_each_port(adap, i)
2679 if (map & (1 << i))
2680 process_responses_gts(adap, &adap->sge.qs[i].rspq);
2681 mtx_unlock(&q0->lock);
2682 }
2683
2684 /*
2685 * The MSI interrupt handler. This needs to handle data events from SGE
2686 * response queues as well as error and other async events as they all use
2687 * the same MSI vector. We use one SGE response queue per port in this mode
2688 * and protect all response queues with queue 0's lock.
2689 */
2690 void
2691 t3_intr_msi(void *data)
2692 {
2693 adapter_t *adap = data;
2694 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2695 int i, new_packets = 0;
2696
2697 mtx_lock(&q0->lock);
2698
2699 for_each_port(adap, i)
2700 if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
2701 new_packets = 1;
2702 mtx_unlock(&q0->lock);
2703 if (new_packets == 0)
2704 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2705 }
2706
2707 void
2708 t3_intr_msix(void *data)
2709 {
2710 struct sge_qset *qs = data;
2711 adapter_t *adap = qs->port->adapter;
2712 struct sge_rspq *rspq = &qs->rspq;
2713
2714 mtx_lock(&rspq->lock);
2715 if (process_responses_gts(adap, rspq) == 0)
2716 rspq->unhandled_irqs++;
2717 mtx_unlock(&rspq->lock);
2718 }
2719
2720 /*
2721 * broken by recent mbuf changes
2722 */
2723 static int
2724 t3_lro_enable(SYSCTL_HANDLER_ARGS)
2725 {
2726 adapter_t *sc;
2727 int i, j, enabled, err, nqsets = 0;
2728
2729 #ifndef LRO_WORKING
2730 return (0);
2731 #endif
2732
2733 sc = arg1;
2734 enabled = sc->sge.qs[0].lro.enabled;
2735 err = sysctl_handle_int(oidp, &enabled, arg2, req);
2736
2737 if (err != 0)
2738 return (err);
2739 if (enabled == sc->sge.qs[0].lro.enabled)
2740 return (0);
2741
2742 for (i = 0; i < sc->params.nports; i++)
2743 for (j = 0; j < sc->port[i].nqsets; j++)
2744 nqsets++;
2745
2746 for (i = 0; i < nqsets; i++)
2747 sc->sge.qs[i].lro.enabled = enabled;
2748
2749 return (0);
2750 }
2751
2752 static int
2753 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS)
2754 {
2755 adapter_t *sc = arg1;
2756 struct qset_params *qsp = &sc->params.sge.qset[0];
2757 int coalesce_nsecs;
2758 struct sge_qset *qs;
2759 int i, j, err, nqsets = 0;
2760 struct mtx *lock;
2761
2762 coalesce_nsecs = qsp->coalesce_nsecs;
2763 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req);
2764
2765 if (err != 0) {
2766 return (err);
2767 }
2768 if (coalesce_nsecs == qsp->coalesce_nsecs)
2769 return (0);
2770
2771 for (i = 0; i < sc->params.nports; i++)
2772 for (j = 0; j < sc->port[i].nqsets; j++)
2773 nqsets++;
2774
2775 coalesce_nsecs = max(100, coalesce_nsecs);
2776
2777 for (i = 0; i < nqsets; i++) {
2778 qs = &sc->sge.qs[i];
2779 qsp = &sc->params.sge.qset[i];
2780 qsp->coalesce_nsecs = coalesce_nsecs;
2781
2782 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
2783 &sc->sge.qs[0].rspq.lock;
2784
2785 mtx_lock(lock);
2786 t3_update_qset_coalesce(qs, qsp);
2787 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2788 V_NEWTIMER(qs->rspq.holdoff_tmr));
2789 mtx_unlock(lock);
2790 }
2791
2792 return (0);
2793 }
2794
2795
2796 void
2797 t3_add_sysctls(adapter_t *sc)
2798 {
2799 struct sysctl_ctx_list *ctx;
2800 struct sysctl_oid_list *children;
2801
2802 ctx = device_get_sysctl_ctx(sc->dev);
2803 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
2804
2805 /* random information */
2806 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
2807 "firmware_version",
2808 CTLFLAG_RD, &sc->fw_version,
2809 0, "firmware version");
2810
2811 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
2812 "enable_lro",
2813 CTLTYPE_INT|CTLFLAG_RW, sc,
2814 0, t3_lro_enable,
2815 "I", "enable large receive offload");
2816
2817 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
2818 "intr_coal",
2819 CTLTYPE_INT|CTLFLAG_RW, sc,
2820 0, t3_set_coalesce_nsecs,
2821 "I", "interrupt coalescing timer (ns)");
2822 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2823 "enable_debug",
2824 CTLFLAG_RW, &cxgb_debug,
2825 0, "enable verbose debugging output");
2826
2827 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2828 "collapse_free",
2829 CTLFLAG_RD, &collapse_free,
2830 0, "frees during collapse");
2831 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2832 "mb_free_vec_free",
2833 CTLFLAG_RD, &mb_free_vec_free,
2834 0, "frees during mb_free_vec");
2835 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2836 "collapse_mbufs",
2837 CTLFLAG_RW, &collapse_mbufs,
2838 0, "collapse mbuf chains into iovecs");
2839 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2840 "txq_overrun",
2841 CTLFLAG_RD, &txq_fills,
2842 0, "#times txq overrun");
2843 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2844 "bogus_imm",
2845 CTLFLAG_RD, &bogus_imm,
2846 0, "#times a bogus immediate response was seen");
2847 }
2848
2849 /**
2850 * t3_get_desc - dump an SGE descriptor for debugging purposes
2851 * @qs: the queue set
2852 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
2853 * @idx: the descriptor index in the queue
2854 * @data: where to dump the descriptor contents
2855 *
2856 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
2857 * size of the descriptor.
2858 */
2859 int
2860 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
2861 unsigned char *data)
2862 {
2863 if (qnum >= 6)
2864 return (EINVAL);
2865
2866 if (qnum < 3) {
2867 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
2868 return -EINVAL;
2869 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
2870 return sizeof(struct tx_desc);
2871 }
2872
2873 if (qnum == 3) {
2874 if (!qs->rspq.desc || idx >= qs->rspq.size)
2875 return (EINVAL);
2876 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
2877 return sizeof(struct rsp_desc);
2878 }
2879
2880 qnum -= 4;
2881 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
2882 return (EINVAL);
2883 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
2884 return sizeof(struct rx_desc);
2885 }
Cache object: 456b76738f6fdf63bddc02b44404a32a
|