1 /**************************************************************************
2
3 Copyright (c) 2007, Chelsio Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD: releng/6.4/sys/dev/cxgb/cxgb_sge.c 174319 2007-12-05 22:05:49Z kmacy $");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/module.h>
37 #include <sys/bus.h>
38 #include <sys/conf.h>
39 #include <machine/bus.h>
40 #include <machine/resource.h>
41 #include <sys/bus_dma.h>
42 #include <sys/rman.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/taskqueue.h>
46
47 #include <sys/proc.h>
48 #include <sys/sched.h>
49 #include <sys/smp.h>
50 #include <sys/systm.h>
51
52 #include <netinet/in_systm.h>
53 #include <netinet/in.h>
54 #include <netinet/ip.h>
55 #include <netinet/tcp.h>
56
57 #include <dev/pci/pcireg.h>
58 #include <dev/pci/pcivar.h>
59
60 #ifdef CONFIG_DEFINED
61 #include <cxgb_include.h>
62 #else
63 #include <dev/cxgb/cxgb_include.h>
64 #endif
65
66 uint32_t collapse_free = 0;
67 uint32_t mb_free_vec_free = 0;
68 int txq_fills = 0;
69 int collapse_mbufs = 0;
70 static int bogus_imm = 0;
71 #ifndef DISABLE_MBUF_IOVEC
72 static int recycle_enable = 1;
73 #endif
74
75 #define USE_GTS 0
76
77 #define SGE_RX_SM_BUF_SIZE 1536
78 #define SGE_RX_DROP_THRES 16
79 #define SGE_RX_COPY_THRES 128
80
81 /*
82 * Period of the Tx buffer reclaim timer. This timer does not need to run
83 * frequently as Tx buffers are usually reclaimed by new Tx packets.
84 */
85 #define TX_RECLAIM_PERIOD (hz >> 1)
86
87 /*
88 * work request size in bytes
89 */
90 #define WR_LEN (WR_FLITS * 8)
91
92 /*
93 * Values for sge_txq.flags
94 */
95 enum {
96 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
97 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
98 };
99
100 struct tx_desc {
101 uint64_t flit[TX_DESC_FLITS];
102 } __packed;
103
104 struct rx_desc {
105 uint32_t addr_lo;
106 uint32_t len_gen;
107 uint32_t gen2;
108 uint32_t addr_hi;
109 } __packed;;
110
111 struct rsp_desc { /* response queue descriptor */
112 struct rss_header rss_hdr;
113 uint32_t flags;
114 uint32_t len_cq;
115 uint8_t imm_data[47];
116 uint8_t intr_gen;
117 } __packed;
118
119 #define RX_SW_DESC_MAP_CREATED (1 << 0)
120 #define TX_SW_DESC_MAP_CREATED (1 << 1)
121 #define RX_SW_DESC_INUSE (1 << 3)
122 #define TX_SW_DESC_MAPPED (1 << 4)
123
124 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
125 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
126 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
127 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
128
129 struct tx_sw_desc { /* SW state per Tx descriptor */
130 struct mbuf *m;
131 bus_dmamap_t map;
132 int flags;
133 };
134
135 struct rx_sw_desc { /* SW state per Rx descriptor */
136 void *cl;
137 bus_dmamap_t map;
138 int flags;
139 };
140
141 struct txq_state {
142 unsigned int compl;
143 unsigned int gen;
144 unsigned int pidx;
145 };
146
147 struct refill_fl_cb_arg {
148 int error;
149 bus_dma_segment_t seg;
150 int nseg;
151 };
152
153 /*
154 * Maps a number of flits to the number of Tx descriptors that can hold them.
155 * The formula is
156 *
157 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
158 *
159 * HW allows up to 4 descriptors to be combined into a WR.
160 */
161 static uint8_t flit_desc_map[] = {
162 0,
163 #if SGE_NUM_GENBITS == 1
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
166 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
167 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
168 #elif SGE_NUM_GENBITS == 2
169 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
170 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
171 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
172 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
173 #else
174 # error "SGE_NUM_GENBITS must be 1 or 2"
175 #endif
176 };
177
178
179 static int lro_default = 0;
180 int cxgb_debug = 0;
181
182 static void t3_free_qset(adapter_t *sc, struct sge_qset *q);
183 static void sge_timer_cb(void *arg);
184 static void sge_timer_reclaim(void *arg, int ncount);
185 static void sge_txq_reclaim_handler(void *arg, int ncount);
186 static int free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec);
187
188 /**
189 * reclaim_completed_tx - reclaims completed Tx descriptors
190 * @adapter: the adapter
191 * @q: the Tx queue to reclaim completed descriptors from
192 *
193 * Reclaims Tx descriptors that the SGE has indicated it has processed,
194 * and frees the associated buffers if possible. Called with the Tx
195 * queue's lock held.
196 */
197 static __inline int
198 reclaim_completed_tx(struct sge_txq *q, int nbufs, struct mbuf **mvec)
199 {
200 int reclaimed, reclaim = desc_reclaimable(q);
201 int n = 0;
202
203 mtx_assert(&q->lock, MA_OWNED);
204 if (reclaim > 0) {
205 n = free_tx_desc(q, min(reclaim, nbufs), mvec);
206 reclaimed = min(reclaim, nbufs);
207 q->cleaned += reclaimed;
208 q->in_use -= reclaimed;
209 }
210 return (n);
211 }
212
213 /**
214 * should_restart_tx - are there enough resources to restart a Tx queue?
215 * @q: the Tx queue
216 *
217 * Checks if there are enough descriptors to restart a suspended Tx queue.
218 */
219 static __inline int
220 should_restart_tx(const struct sge_txq *q)
221 {
222 unsigned int r = q->processed - q->cleaned;
223
224 return q->in_use - r < (q->size >> 1);
225 }
226
227 /**
228 * t3_sge_init - initialize SGE
229 * @adap: the adapter
230 * @p: the SGE parameters
231 *
232 * Performs SGE initialization needed every time after a chip reset.
233 * We do not initialize any of the queue sets here, instead the driver
234 * top-level must request those individually. We also do not enable DMA
235 * here, that should be done after the queues have been set up.
236 */
237 void
238 t3_sge_init(adapter_t *adap, struct sge_params *p)
239 {
240 u_int ctrl, ups;
241
242 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
243
244 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
245 F_CQCRDTCTRL |
246 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
247 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
248 #if SGE_NUM_GENBITS == 1
249 ctrl |= F_EGRGENCTRL;
250 #endif
251 if (adap->params.rev > 0) {
252 if (!(adap->flags & (USING_MSIX | USING_MSI)))
253 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
254 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL;
255 }
256 t3_write_reg(adap, A_SG_CONTROL, ctrl);
257 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
258 V_LORCQDRBTHRSH(512));
259 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
260 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
261 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
262 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000);
263 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
264 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
265 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
266 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
267 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
268 }
269
270
271 /**
272 * sgl_len - calculates the size of an SGL of the given capacity
273 * @n: the number of SGL entries
274 *
275 * Calculates the number of flits needed for a scatter/gather list that
276 * can hold the given number of entries.
277 */
278 static __inline unsigned int
279 sgl_len(unsigned int n)
280 {
281 return ((3 * n) / 2 + (n & 1));
282 }
283
284 /**
285 * get_imm_packet - return the next ingress packet buffer from a response
286 * @resp: the response descriptor containing the packet data
287 *
288 * Return a packet containing the immediate data of the given response.
289 */
290 #ifdef DISABLE_MBUF_IOVEC
291 static __inline int
292 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct t3_mbuf_hdr *mh)
293 {
294 struct mbuf *m;
295 int len;
296 uint32_t flags = ntohl(resp->flags);
297 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
298
299 /*
300 * would be a firmware bug
301 */
302 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP)
303 return (0);
304
305 m = m_gethdr(M_NOWAIT, MT_DATA);
306 len = G_RSPD_LEN(ntohl(resp->len_cq));
307
308 if (m) {
309 MH_ALIGN(m, IMMED_PKT_SIZE);
310 memcpy(m->m_data, resp->imm_data, IMMED_PKT_SIZE);
311 m->m_len = len;
312
313 switch (sopeop) {
314 case RSPQ_SOP_EOP:
315 mh->mh_head = mh->mh_tail = m;
316 m->m_pkthdr.len = len;
317 m->m_flags |= M_PKTHDR;
318 break;
319 case RSPQ_EOP:
320 m->m_flags &= ~M_PKTHDR;
321 mh->mh_head->m_pkthdr.len += len;
322 mh->mh_tail->m_next = m;
323 mh->mh_tail = m;
324 break;
325 }
326 }
327 return (m != NULL);
328 }
329
330 #else
331 static int
332 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl, uint32_t flags)
333 {
334 int len, error;
335 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
336
337 /*
338 * would be a firmware bug
339 */
340 len = G_RSPD_LEN(ntohl(resp->len_cq));
341 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) {
342 if (cxgb_debug)
343 device_printf(sc->dev, "unexpected value sopeop=%d flags=0x%x len=%din get_imm_packet\n", sopeop, flags, len);
344 bogus_imm++;
345 return (EINVAL);
346 }
347 error = 0;
348 switch (sopeop) {
349 case RSPQ_SOP_EOP:
350 m->m_len = m->m_pkthdr.len = len;
351 memcpy(mtod(m, uint8_t *), resp->imm_data, len);
352 break;
353 case RSPQ_EOP:
354 memcpy(cl, resp->imm_data, len);
355 m_iovappend(m, cl, MSIZE, len, 0);
356 break;
357 default:
358 bogus_imm++;
359 error = EINVAL;
360 }
361
362 return (error);
363 }
364 #endif
365
366 static __inline u_int
367 flits_to_desc(u_int n)
368 {
369 return (flit_desc_map[n]);
370 }
371
372 void
373 t3_sge_err_intr_handler(adapter_t *adapter)
374 {
375 unsigned int v, status;
376
377
378 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
379
380 if (status & F_RSPQCREDITOVERFOW)
381 CH_ALERT(adapter, "SGE response queue credit overflow\n");
382
383 if (status & F_RSPQDISABLED) {
384 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
385
386 CH_ALERT(adapter,
387 "packet delivered to disabled response queue (0x%x)\n",
388 (v >> S_RSPQ0DISABLED) & 0xff);
389 }
390
391 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
392 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
393 t3_fatal_err(adapter);
394 }
395
396 void
397 t3_sge_prep(adapter_t *adap, struct sge_params *p)
398 {
399 int i;
400
401 /* XXX Does ETHER_ALIGN need to be accounted for here? */
402 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data);
403
404 for (i = 0; i < SGE_QSETS; ++i) {
405 struct qset_params *q = p->qset + i;
406
407 q->polling = adap->params.rev > 0;
408
409 if (adap->params.nports > 2)
410 q->coalesce_nsecs = 50000;
411 else
412 q->coalesce_nsecs = 5000;
413
414 q->rspq_size = RSPQ_Q_SIZE;
415 q->fl_size = FL_Q_SIZE;
416 q->jumbo_size = JUMBO_Q_SIZE;
417 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
418 q->txq_size[TXQ_OFLD] = 1024;
419 q->txq_size[TXQ_CTRL] = 256;
420 q->cong_thres = 0;
421 }
422 }
423
424 int
425 t3_sge_alloc(adapter_t *sc)
426 {
427
428 /* The parent tag. */
429 if (bus_dma_tag_create( NULL, /* parent */
430 1, 0, /* algnmnt, boundary */
431 BUS_SPACE_MAXADDR, /* lowaddr */
432 BUS_SPACE_MAXADDR, /* highaddr */
433 NULL, NULL, /* filter, filterarg */
434 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
435 BUS_SPACE_UNRESTRICTED, /* nsegments */
436 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
437 0, /* flags */
438 NULL, NULL, /* lock, lockarg */
439 &sc->parent_dmat)) {
440 device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
441 return (ENOMEM);
442 }
443
444 /*
445 * DMA tag for normal sized RX frames
446 */
447 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
448 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
449 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
450 device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
451 return (ENOMEM);
452 }
453
454 /*
455 * DMA tag for jumbo sized RX frames.
456 */
457 if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR,
458 BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE,
459 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
460 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
461 return (ENOMEM);
462 }
463
464 /*
465 * DMA tag for TX frames.
466 */
467 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
468 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
469 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
470 NULL, NULL, &sc->tx_dmat)) {
471 device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
472 return (ENOMEM);
473 }
474
475 return (0);
476 }
477
478 int
479 t3_sge_free(struct adapter * sc)
480 {
481
482 if (sc->tx_dmat != NULL)
483 bus_dma_tag_destroy(sc->tx_dmat);
484
485 if (sc->rx_jumbo_dmat != NULL)
486 bus_dma_tag_destroy(sc->rx_jumbo_dmat);
487
488 if (sc->rx_dmat != NULL)
489 bus_dma_tag_destroy(sc->rx_dmat);
490
491 if (sc->parent_dmat != NULL)
492 bus_dma_tag_destroy(sc->parent_dmat);
493
494 return (0);
495 }
496
497 void
498 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
499 {
500
501 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U);
502 qs->rspq.polling = 0 /* p->polling */;
503 }
504
505 static void
506 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
507 {
508 struct refill_fl_cb_arg *cb_arg = arg;
509
510 cb_arg->error = error;
511 cb_arg->seg = segs[0];
512 cb_arg->nseg = nseg;
513
514 }
515
516 /**
517 * refill_fl - refill an SGE free-buffer list
518 * @sc: the controller softc
519 * @q: the free-list to refill
520 * @n: the number of new buffers to allocate
521 *
522 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
523 * The caller must assure that @n does not exceed the queue's capacity.
524 */
525 static void
526 refill_fl(adapter_t *sc, struct sge_fl *q, int n, int blocking)
527 {
528 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
529 struct rx_desc *d = &q->desc[q->pidx];
530 struct refill_fl_cb_arg cb_arg;
531 void *cl;
532 int err;
533 #ifdef notyet
534 int flags = blocking ? M_TRYWAIT : M_DONTWAIT;
535 #else
536 int flags = M_DONTWAIT;
537 #endif
538
539 cb_arg.error = 0;
540 while (n--) {
541 /*
542 * We only allocate a cluster, mbuf allocation happens after rx
543 */
544 if ((cl = m_cljget(NULL, flags, q->buf_size)) == NULL) {
545 log(LOG_WARNING, "Failed to allocate cluster\n");
546 goto done;
547 }
548 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
549 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
550 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
551 uma_zfree(q->zone, cl);
552 goto done;
553 }
554 sd->flags |= RX_SW_DESC_MAP_CREATED;
555 }
556 err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size,
557 refill_fl_cb, &cb_arg, 0);
558
559 if (err != 0 || cb_arg.error) {
560 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error);
561 /*
562 * XXX free cluster
563 */
564 return;
565 }
566
567 sd->flags |= RX_SW_DESC_INUSE;
568 sd->cl = cl;
569 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
570 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
571 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
572 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
573
574 d++;
575 sd++;
576
577 if (++q->pidx == q->size) {
578 q->pidx = 0;
579 q->gen ^= 1;
580 sd = q->sdesc;
581 d = q->desc;
582 }
583 q->credits++;
584 }
585
586 done:
587 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
588 }
589
590
591 /**
592 * free_rx_bufs - free the Rx buffers on an SGE free list
593 * @sc: the controle softc
594 * @q: the SGE free list to clean up
595 *
596 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
597 * this queue should be stopped before calling this function.
598 */
599 static void
600 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
601 {
602 u_int cidx = q->cidx;
603
604 while (q->credits--) {
605 struct rx_sw_desc *d = &q->sdesc[cidx];
606
607 if (d->flags & RX_SW_DESC_INUSE) {
608 bus_dmamap_unload(q->entry_tag, d->map);
609 bus_dmamap_destroy(q->entry_tag, d->map);
610 uma_zfree(q->zone, d->cl);
611 }
612 d->cl = NULL;
613 if (++cidx == q->size)
614 cidx = 0;
615 }
616 }
617
618 static __inline void
619 __refill_fl(adapter_t *adap, struct sge_fl *fl)
620 {
621 refill_fl(adap, fl, min(16U, fl->size - fl->credits), 0);
622 }
623
624 #ifndef DISABLE_MBUF_IOVEC
625 /**
626 * recycle_rx_buf - recycle a receive buffer
627 * @adapter: the adapter
628 * @q: the SGE free list
629 * @idx: index of buffer to recycle
630 *
631 * Recycles the specified buffer on the given free list by adding it at
632 * the next available slot on the list.
633 */
634 static void
635 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
636 {
637 struct rx_desc *from = &q->desc[idx];
638 struct rx_desc *to = &q->desc[q->pidx];
639
640 q->sdesc[q->pidx] = q->sdesc[idx];
641 to->addr_lo = from->addr_lo; // already big endian
642 to->addr_hi = from->addr_hi; // likewise
643 wmb();
644 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
645 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
646 q->credits++;
647
648 if (++q->pidx == q->size) {
649 q->pidx = 0;
650 q->gen ^= 1;
651 }
652 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
653 }
654 #endif
655
656 static void
657 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
658 {
659 uint32_t *addr;
660
661 addr = arg;
662 *addr = segs[0].ds_addr;
663 }
664
665 static int
666 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
667 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
668 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
669 {
670 size_t len = nelem * elem_size;
671 void *s = NULL;
672 void *p = NULL;
673 int err;
674
675 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
676 BUS_SPACE_MAXADDR_32BIT,
677 BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
678 len, 0, NULL, NULL, tag)) != 0) {
679 device_printf(sc->dev, "Cannot allocate descriptor tag\n");
680 return (ENOMEM);
681 }
682
683 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
684 map)) != 0) {
685 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
686 return (ENOMEM);
687 }
688
689 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
690 bzero(p, len);
691 *(void **)desc = p;
692
693 if (sw_size) {
694 len = nelem * sw_size;
695 s = malloc(len, M_DEVBUF, M_WAITOK);
696 bzero(s, len);
697 *(void **)sdesc = s;
698 }
699 if (parent_entry_tag == NULL)
700 return (0);
701
702 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
703 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
704 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
705 TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
706 NULL, NULL, entry_tag)) != 0) {
707 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
708 return (ENOMEM);
709 }
710 return (0);
711 }
712
713 static void
714 sge_slow_intr_handler(void *arg, int ncount)
715 {
716 adapter_t *sc = arg;
717
718 t3_slow_intr_handler(sc);
719 }
720
721 /**
722 * sge_timer_cb - perform periodic maintenance of an SGE qset
723 * @data: the SGE queue set to maintain
724 *
725 * Runs periodically from a timer to perform maintenance of an SGE queue
726 * set. It performs two tasks:
727 *
728 * a) Cleans up any completed Tx descriptors that may still be pending.
729 * Normal descriptor cleanup happens when new packets are added to a Tx
730 * queue so this timer is relatively infrequent and does any cleanup only
731 * if the Tx queue has not seen any new packets in a while. We make a
732 * best effort attempt to reclaim descriptors, in that we don't wait
733 * around if we cannot get a queue's lock (which most likely is because
734 * someone else is queueing new packets and so will also handle the clean
735 * up). Since control queues use immediate data exclusively we don't
736 * bother cleaning them up here.
737 *
738 * b) Replenishes Rx queues that have run out due to memory shortage.
739 * Normally new Rx buffers are added when existing ones are consumed but
740 * when out of memory a queue can become empty. We try to add only a few
741 * buffers here, the queue will be replenished fully as these new buffers
742 * are used up if memory shortage has subsided.
743 *
744 * c) Return coalesced response queue credits in case a response queue is
745 * starved.
746 *
747 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell
748 * fifo overflows and the FW doesn't implement any recovery scheme yet.
749 */
750 static void
751 sge_timer_cb(void *arg)
752 {
753 adapter_t *sc = arg;
754 struct port_info *p;
755 struct sge_qset *qs;
756 struct sge_txq *txq;
757 int i, j;
758 int reclaim_eth, reclaim_ofl, refill_rx;
759
760 for (i = 0; i < sc->params.nports; i++)
761 for (j = 0; j < sc->port[i].nqsets; j++) {
762 qs = &sc->sge.qs[i + j];
763 txq = &qs->txq[0];
764 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned;
765 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
766 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
767 (qs->fl[1].credits < qs->fl[1].size));
768 if (reclaim_eth || reclaim_ofl || refill_rx) {
769 p = &sc->port[i];
770 taskqueue_enqueue(p->tq, &p->timer_reclaim_task);
771 break;
772 }
773 }
774 if (sc->params.nports > 2) {
775 int i;
776
777 for_each_port(sc, i) {
778 struct port_info *pi = &sc->port[i];
779
780 t3_write_reg(sc, A_SG_KDOORBELL,
781 F_SELEGRCNTX |
782 (FW_TUNNEL_SGEEC_START + pi->first_qset));
783 }
784 }
785 if (sc->open_device_map != 0)
786 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
787 }
788
789 /*
790 * This is meant to be a catch-all function to keep sge state private
791 * to sge.c
792 *
793 */
794 int
795 t3_sge_init_adapter(adapter_t *sc)
796 {
797 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
798 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
799 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
800 return (0);
801 }
802
803 int
804 t3_sge_init_port(struct port_info *p)
805 {
806 TASK_INIT(&p->timer_reclaim_task, 0, sge_timer_reclaim, p);
807 return (0);
808 }
809
810 void
811 t3_sge_deinit_sw(adapter_t *sc)
812 {
813 int i;
814
815 callout_drain(&sc->sge_timer_ch);
816 if (sc->tq)
817 taskqueue_drain(sc->tq, &sc->slow_intr_task);
818 for (i = 0; i < sc->params.nports; i++)
819 if (sc->port[i].tq != NULL)
820 taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task);
821 }
822
823 /**
824 * refill_rspq - replenish an SGE response queue
825 * @adapter: the adapter
826 * @q: the response queue to replenish
827 * @credits: how many new responses to make available
828 *
829 * Replenishes a response queue by making the supplied number of responses
830 * available to HW.
831 */
832 static __inline void
833 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
834 {
835
836 /* mbufs are allocated on demand when a rspq entry is processed. */
837 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
838 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
839 }
840
841 static __inline void
842 sge_txq_reclaim_(struct sge_txq *txq)
843 {
844 int reclaimable, i, n;
845 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
846 struct port_info *p;
847
848 p = txq->port;
849 reclaim_more:
850 n = 0;
851 reclaimable = desc_reclaimable(txq);
852 if (reclaimable > 0 && mtx_trylock(&txq->lock)) {
853 n = reclaim_completed_tx(txq, TX_CLEAN_MAX_DESC, m_vec);
854 mtx_unlock(&txq->lock);
855 }
856 if (n == 0)
857 return;
858
859 for (i = 0; i < n; i++) {
860 m_freem_vec(m_vec[i]);
861 }
862 if (p && p->ifp->if_drv_flags & IFF_DRV_OACTIVE &&
863 txq->size - txq->in_use >= TX_START_MAX_DESC) {
864 txq_fills++;
865 p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
866 taskqueue_enqueue(p->tq, &p->start_task);
867 }
868
869 if (n)
870 goto reclaim_more;
871 }
872
873 static void
874 sge_txq_reclaim_handler(void *arg, int ncount)
875 {
876 struct sge_txq *q = arg;
877
878 sge_txq_reclaim_(q);
879 }
880
881 static void
882 sge_timer_reclaim(void *arg, int ncount)
883 {
884 struct port_info *p = arg;
885 int i, nqsets = p->nqsets;
886 adapter_t *sc = p->adapter;
887 struct sge_qset *qs;
888 struct sge_txq *txq;
889 struct mtx *lock;
890
891 for (i = 0; i < nqsets; i++) {
892 qs = &sc->sge.qs[i];
893 txq = &qs->txq[TXQ_ETH];
894 sge_txq_reclaim_(txq);
895
896 txq = &qs->txq[TXQ_OFLD];
897 sge_txq_reclaim_(txq);
898
899 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
900 &sc->sge.qs[0].rspq.lock;
901
902 retry:
903 if (mtx_trylock(lock)) {
904 /* XXX currently assume that we are *NOT* polling */
905 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
906
907 if (qs->fl[0].credits < qs->fl[0].size - 16)
908 __refill_fl(sc, &qs->fl[0]);
909 if (qs->fl[1].credits < qs->fl[1].size - 16)
910 __refill_fl(sc, &qs->fl[1]);
911
912 if (status & (1 << qs->rspq.cntxt_id)) {
913 if (qs->rspq.credits) {
914 refill_rspq(sc, &qs->rspq, 1);
915 qs->rspq.credits--;
916 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
917 1 << qs->rspq.cntxt_id);
918 }
919 }
920 mtx_unlock(lock);
921 }
922 if ((qs->fl[0].credits < qs->fl[0].size - 16) ||
923 (qs->fl[1].credits < qs->fl[1].size - 16))
924 goto retry;
925 }
926 }
927
928 /**
929 * init_qset_cntxt - initialize an SGE queue set context info
930 * @qs: the queue set
931 * @id: the queue set id
932 *
933 * Initializes the TIDs and context ids for the queues of a queue set.
934 */
935 static void
936 init_qset_cntxt(struct sge_qset *qs, u_int id)
937 {
938
939 qs->rspq.cntxt_id = id;
940 qs->fl[0].cntxt_id = 2 * id;
941 qs->fl[1].cntxt_id = 2 * id + 1;
942 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
943 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
944 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
945 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
946 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
947 }
948
949
950 static void
951 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
952 {
953 txq->in_use += ndesc;
954 /*
955 * XXX we don't handle stopping of queue
956 * presumably start handles this when we bump against the end
957 */
958 txqs->gen = txq->gen;
959 txq->unacked += ndesc;
960 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3);
961 txq->unacked &= 7;
962 txqs->pidx = txq->pidx;
963 txq->pidx += ndesc;
964
965 if (txq->pidx >= txq->size) {
966 txq->pidx -= txq->size;
967 txq->gen ^= 1;
968 }
969
970 }
971
972 /**
973 * calc_tx_descs - calculate the number of Tx descriptors for a packet
974 * @m: the packet mbufs
975 * @nsegs: the number of segments
976 *
977 * Returns the number of Tx descriptors needed for the given Ethernet
978 * packet. Ethernet packets require addition of WR and CPL headers.
979 */
980 static __inline unsigned int
981 calc_tx_descs(const struct mbuf *m, int nsegs)
982 {
983 unsigned int flits;
984
985 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
986 return 1;
987
988 flits = sgl_len(nsegs) + 2;
989 #ifdef TSO_SUPPORTED
990 if (m->m_pkthdr.csum_flags & (CSUM_TSO))
991 flits++;
992 #endif
993 return flits_to_desc(flits);
994 }
995
996 static unsigned int
997 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
998 struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs)
999 {
1000 struct mbuf *m0;
1001 int err, pktlen;
1002
1003 m0 = *m;
1004 pktlen = m0->m_pkthdr.len;
1005
1006 err = bus_dmamap_load_mvec_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0);
1007 #ifdef DEBUG
1008 if (err) {
1009 int n = 0;
1010 struct mbuf *mtmp = m0;
1011 while(mtmp) {
1012 n++;
1013 mtmp = mtmp->m_next;
1014 }
1015 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n",
1016 err, m0->m_pkthdr.len, n);
1017 }
1018 #endif
1019 if (err == EFBIG) {
1020 /* Too many segments, try to defrag */
1021 m0 = m_defrag(m0, M_DONTWAIT);
1022 if (m0 == NULL) {
1023 m_freem(*m);
1024 *m = NULL;
1025 return (ENOBUFS);
1026 }
1027 *m = m0;
1028 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0);
1029 }
1030
1031 if (err == ENOMEM) {
1032 return (err);
1033 }
1034
1035 if (err) {
1036 if (cxgb_debug)
1037 printf("map failure err=%d pktlen=%d\n", err, pktlen);
1038 m_freem_vec(m0);
1039 *m = NULL;
1040 return (err);
1041 }
1042
1043 bus_dmamap_sync(txq->entry_tag, stx->map, BUS_DMASYNC_PREWRITE);
1044 stx->flags |= TX_SW_DESC_MAPPED;
1045
1046 return (0);
1047 }
1048
1049 /**
1050 * make_sgl - populate a scatter/gather list for a packet
1051 * @sgp: the SGL to populate
1052 * @segs: the packet dma segments
1053 * @nsegs: the number of segments
1054 *
1055 * Generates a scatter/gather list for the buffers that make up a packet
1056 * and returns the SGL size in 8-byte words. The caller must size the SGL
1057 * appropriately.
1058 */
1059 static __inline void
1060 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1061 {
1062 int i, idx;
1063
1064 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) {
1065 if (i && idx == 0)
1066 ++sgp;
1067
1068 sgp->len[idx] = htobe32(segs[i].ds_len);
1069 sgp->addr[idx] = htobe64(segs[i].ds_addr);
1070 }
1071
1072 if (idx)
1073 sgp->len[idx] = 0;
1074 }
1075
1076 /**
1077 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1078 * @adap: the adapter
1079 * @q: the Tx queue
1080 *
1081 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
1082 * where the HW is going to sleep just after we checked, however,
1083 * then the interrupt handler will detect the outstanding TX packet
1084 * and ring the doorbell for us.
1085 *
1086 * When GTS is disabled we unconditionally ring the doorbell.
1087 */
1088 static __inline void
1089 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1090 {
1091 #if USE_GTS
1092 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1093 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1094 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1095 #ifdef T3_TRACE
1096 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1097 q->cntxt_id);
1098 #endif
1099 t3_write_reg(adap, A_SG_KDOORBELL,
1100 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1101 }
1102 #else
1103 wmb(); /* write descriptors before telling HW */
1104 t3_write_reg(adap, A_SG_KDOORBELL,
1105 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1106 #endif
1107 }
1108
1109 static __inline void
1110 wr_gen2(struct tx_desc *d, unsigned int gen)
1111 {
1112 #if SGE_NUM_GENBITS == 2
1113 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1114 #endif
1115 }
1116
1117
1118
1119 /**
1120 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1121 * @ndesc: number of Tx descriptors spanned by the SGL
1122 * @txd: first Tx descriptor to be written
1123 * @txqs: txq state (generation and producer index)
1124 * @txq: the SGE Tx queue
1125 * @sgl: the SGL
1126 * @flits: number of flits to the start of the SGL in the first descriptor
1127 * @sgl_flits: the SGL size in flits
1128 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1129 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1130 *
1131 * Write a work request header and an associated SGL. If the SGL is
1132 * small enough to fit into one Tx descriptor it has already been written
1133 * and we just need to write the WR header. Otherwise we distribute the
1134 * SGL across the number of descriptors it spans.
1135 */
1136
1137 static void
1138 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1139 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1140 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1141 {
1142
1143 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1144 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1145
1146 if (__predict_true(ndesc == 1)) {
1147 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1148 V_WR_SGLSFLT(flits)) | wr_hi;
1149 wmb();
1150 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1151 V_WR_GEN(txqs->gen)) | wr_lo;
1152 /* XXX gen? */
1153 wr_gen2(txd, txqs->gen);
1154 } else {
1155 unsigned int ogen = txqs->gen;
1156 const uint64_t *fp = (const uint64_t *)sgl;
1157 struct work_request_hdr *wp = wrp;
1158
1159 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1160 V_WR_SGLSFLT(flits)) | wr_hi;
1161
1162 while (sgl_flits) {
1163 unsigned int avail = WR_FLITS - flits;
1164
1165 if (avail > sgl_flits)
1166 avail = sgl_flits;
1167 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1168 sgl_flits -= avail;
1169 ndesc--;
1170 if (!sgl_flits)
1171 break;
1172
1173 fp += avail;
1174 txd++;
1175 txsd++;
1176 if (++txqs->pidx == txq->size) {
1177 txqs->pidx = 0;
1178 txqs->gen ^= 1;
1179 txd = txq->desc;
1180 txsd = txq->sdesc;
1181 }
1182
1183 /*
1184 * when the head of the mbuf chain
1185 * is freed all clusters will be freed
1186 * with it
1187 */
1188 txsd->m = NULL;
1189 wrp = (struct work_request_hdr *)txd;
1190 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1191 V_WR_SGLSFLT(1)) | wr_hi;
1192 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1193 sgl_flits + 1)) |
1194 V_WR_GEN(txqs->gen)) | wr_lo;
1195 wr_gen2(txd, txqs->gen);
1196 flits = 1;
1197 }
1198 wrp->wr_hi |= htonl(F_WR_EOP);
1199 wmb();
1200 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1201 wr_gen2((struct tx_desc *)wp, ogen);
1202 }
1203 }
1204
1205
1206 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1207 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1208
1209 int
1210 t3_encap(struct port_info *p, struct mbuf **m, int *free)
1211 {
1212 adapter_t *sc;
1213 struct mbuf *m0;
1214 struct sge_qset *qs;
1215 struct sge_txq *txq;
1216 struct tx_sw_desc *stx;
1217 struct txq_state txqs;
1218 unsigned int ndesc, flits, cntrl, mlen;
1219 int err, nsegs, tso_info = 0;
1220
1221 struct work_request_hdr *wrp;
1222 struct tx_sw_desc *txsd;
1223 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1224 bus_dma_segment_t segs[TX_MAX_SEGS];
1225 uint32_t wr_hi, wr_lo, sgl_flits;
1226
1227 struct tx_desc *txd;
1228 struct cpl_tx_pkt *cpl;
1229
1230 m0 = *m;
1231 sc = p->adapter;
1232
1233 DPRINTF("t3_encap port_id=%d qsidx=%d ", p->port_id, p->first_qset);
1234
1235 /* port_id=1 qsid=1 txpkt_intf=2 tx_chan=0 */
1236
1237 qs = &sc->sge.qs[p->first_qset];
1238
1239 txq = &qs->txq[TXQ_ETH];
1240 stx = &txq->sdesc[txq->pidx];
1241 txd = &txq->desc[txq->pidx];
1242 cpl = (struct cpl_tx_pkt *)txd;
1243 mlen = m0->m_pkthdr.len;
1244 cpl->len = htonl(mlen | 0x80000000);
1245
1246 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", mlen, p->txpkt_intf, p->tx_chan);
1247 /*
1248 * XXX handle checksum, TSO, and VLAN here
1249 *
1250 */
1251 cntrl = V_TXPKT_INTF(p->txpkt_intf);
1252
1253 /*
1254 * XXX need to add VLAN support for 6.x
1255 */
1256 #ifdef VLAN_SUPPORTED
1257 if (m0->m_flags & M_VLANTAG)
1258 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
1259 if (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1260 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1261 #endif
1262 if (tso_info) {
1263 int eth_type;
1264 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl;
1265 struct ip *ip;
1266 struct tcphdr *tcp;
1267 char *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */
1268
1269 txd->flit[2] = 0;
1270 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1271 hdr->cntrl = htonl(cntrl);
1272
1273 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1274 pkthdr = &tmp[0];
1275 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr);
1276 } else {
1277 pkthdr = mtod(m0, char *);
1278 }
1279
1280 if (__predict_false(m0->m_flags & M_VLANTAG)) {
1281 eth_type = CPL_ETH_II_VLAN;
1282 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1283 ETHER_VLAN_ENCAP_LEN);
1284 } else {
1285 eth_type = CPL_ETH_II;
1286 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1287 }
1288 tcp = (struct tcphdr *)((uint8_t *)ip +
1289 sizeof(*ip));
1290
1291 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1292 V_LSO_IPHDR_WORDS(ip->ip_hl) |
1293 V_LSO_TCPHDR_WORDS(tcp->th_off);
1294 hdr->lso_info = htonl(tso_info);
1295 flits = 3;
1296 } else {
1297 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1298 cpl->cntrl = htonl(cntrl);
1299
1300 if (mlen <= WR_LEN - sizeof(*cpl)) {
1301 txq_prod(txq, 1, &txqs);
1302 txq->sdesc[txqs.pidx].m = NULL;
1303
1304 if (m0->m_len == m0->m_pkthdr.len)
1305 memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen);
1306 else
1307 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1308
1309 *free = 1;
1310 flits = (mlen + 7) / 8 + 2;
1311 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1312 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1313 F_WR_SOP | F_WR_EOP | txqs.compl);
1314 wmb();
1315 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1316 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1317
1318 wr_gen2(txd, txqs.gen);
1319 check_ring_tx_db(sc, txq);
1320 return (0);
1321 }
1322 flits = 2;
1323 }
1324
1325 wrp = (struct work_request_hdr *)txd;
1326
1327 if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) {
1328 return (err);
1329 }
1330 m0 = *m;
1331 ndesc = calc_tx_descs(m0, nsegs);
1332
1333 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1334 make_sgl(sgp, segs, nsegs);
1335
1336 sgl_flits = sgl_len(nsegs);
1337
1338 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1339 txq_prod(txq, ndesc, &txqs);
1340 txsd = &txq->sdesc[txqs.pidx];
1341 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1342 wr_lo = htonl(V_WR_TID(txq->token));
1343 txsd->m = m0;
1344 m_set_priority(m0, txqs.pidx);
1345
1346 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1347 check_ring_tx_db(p->adapter, txq);
1348
1349 return (0);
1350 }
1351
1352
1353 /**
1354 * write_imm - write a packet into a Tx descriptor as immediate data
1355 * @d: the Tx descriptor to write
1356 * @m: the packet
1357 * @len: the length of packet data to write as immediate data
1358 * @gen: the generation bit value to write
1359 *
1360 * Writes a packet as immediate data into a Tx descriptor. The packet
1361 * contains a work request at its beginning. We must write the packet
1362 * carefully so the SGE doesn't read accidentally before it's written in
1363 * its entirety.
1364 */
1365 static __inline void
1366 write_imm(struct tx_desc *d, struct mbuf *m,
1367 unsigned int len, unsigned int gen)
1368 {
1369 struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1370 struct work_request_hdr *to = (struct work_request_hdr *)d;
1371
1372 memcpy(&to[1], &from[1], len - sizeof(*from));
1373 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1374 V_WR_BCNTLFLT(len & 7));
1375 wmb();
1376 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1377 V_WR_LEN((len + 7) / 8));
1378 wr_gen2(d, gen);
1379 m_freem(m);
1380 }
1381
1382 /**
1383 * check_desc_avail - check descriptor availability on a send queue
1384 * @adap: the adapter
1385 * @q: the TX queue
1386 * @m: the packet needing the descriptors
1387 * @ndesc: the number of Tx descriptors needed
1388 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1389 *
1390 * Checks if the requested number of Tx descriptors is available on an
1391 * SGE send queue. If the queue is already suspended or not enough
1392 * descriptors are available the packet is queued for later transmission.
1393 * Must be called with the Tx queue locked.
1394 *
1395 * Returns 0 if enough descriptors are available, 1 if there aren't
1396 * enough descriptors and the packet has been queued, and 2 if the caller
1397 * needs to retry because there weren't enough descriptors at the
1398 * beginning of the call but some freed up in the mean time.
1399 */
1400 static __inline int
1401 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1402 struct mbuf *m, unsigned int ndesc,
1403 unsigned int qid)
1404 {
1405 /*
1406 * XXX We currently only use this for checking the control queue
1407 * the control queue is only used for binding qsets which happens
1408 * at init time so we are guaranteed enough descriptors
1409 */
1410 if (__predict_false(!mbufq_empty(&q->sendq))) {
1411 addq_exit: mbufq_tail(&q->sendq, m);
1412 return 1;
1413 }
1414 if (__predict_false(q->size - q->in_use < ndesc)) {
1415
1416 struct sge_qset *qs = txq_to_qset(q, qid);
1417
1418 setbit(&qs->txq_stopped, qid);
1419 smp_mb();
1420
1421 if (should_restart_tx(q) &&
1422 test_and_clear_bit(qid, &qs->txq_stopped))
1423 return 2;
1424
1425 q->stops++;
1426 goto addq_exit;
1427 }
1428 return 0;
1429 }
1430
1431
1432 /**
1433 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1434 * @q: the SGE control Tx queue
1435 *
1436 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1437 * that send only immediate data (presently just the control queues) and
1438 * thus do not have any mbufs
1439 */
1440 static __inline void
1441 reclaim_completed_tx_imm(struct sge_txq *q)
1442 {
1443 unsigned int reclaim = q->processed - q->cleaned;
1444
1445 mtx_assert(&q->lock, MA_OWNED);
1446
1447 q->in_use -= reclaim;
1448 q->cleaned += reclaim;
1449 }
1450
1451 static __inline int
1452 immediate(const struct mbuf *m)
1453 {
1454 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
1455 }
1456
1457 /**
1458 * ctrl_xmit - send a packet through an SGE control Tx queue
1459 * @adap: the adapter
1460 * @q: the control queue
1461 * @m: the packet
1462 *
1463 * Send a packet through an SGE control Tx queue. Packets sent through
1464 * a control queue must fit entirely as immediate data in a single Tx
1465 * descriptor and have no page fragments.
1466 */
1467 static int
1468 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1469 {
1470 int ret;
1471 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1472
1473 if (__predict_false(!immediate(m))) {
1474 m_freem(m);
1475 return 0;
1476 }
1477
1478 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1479 wrp->wr_lo = htonl(V_WR_TID(q->token));
1480
1481 mtx_lock(&q->lock);
1482 again: reclaim_completed_tx_imm(q);
1483
1484 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1485 if (__predict_false(ret)) {
1486 if (ret == 1) {
1487 mtx_unlock(&q->lock);
1488 return (-1);
1489 }
1490 goto again;
1491 }
1492
1493 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1494
1495 q->in_use++;
1496 if (++q->pidx >= q->size) {
1497 q->pidx = 0;
1498 q->gen ^= 1;
1499 }
1500 mtx_unlock(&q->lock);
1501 wmb();
1502 t3_write_reg(adap, A_SG_KDOORBELL,
1503 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1504 return (0);
1505 }
1506
1507
1508 /**
1509 * restart_ctrlq - restart a suspended control queue
1510 * @qs: the queue set cotaining the control queue
1511 *
1512 * Resumes transmission on a suspended Tx control queue.
1513 */
1514 static void
1515 restart_ctrlq(void *data, int npending)
1516 {
1517 struct mbuf *m;
1518 struct sge_qset *qs = (struct sge_qset *)data;
1519 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1520 adapter_t *adap = qs->port->adapter;
1521
1522 mtx_lock(&q->lock);
1523 again: reclaim_completed_tx_imm(q);
1524
1525 while (q->in_use < q->size &&
1526 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1527
1528 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1529
1530 if (++q->pidx >= q->size) {
1531 q->pidx = 0;
1532 q->gen ^= 1;
1533 }
1534 q->in_use++;
1535 }
1536 if (!mbufq_empty(&q->sendq)) {
1537 setbit(&qs->txq_stopped, TXQ_CTRL);
1538 smp_mb();
1539
1540 if (should_restart_tx(q) &&
1541 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1542 goto again;
1543 q->stops++;
1544 }
1545 mtx_unlock(&q->lock);
1546 t3_write_reg(adap, A_SG_KDOORBELL,
1547 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1548 }
1549
1550
1551 /*
1552 * Send a management message through control queue 0
1553 */
1554 int
1555 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1556 {
1557 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1558 }
1559
1560 /**
1561 * free_qset - free the resources of an SGE queue set
1562 * @sc: the controller owning the queue set
1563 * @q: the queue set
1564 *
1565 * Release the HW and SW resources associated with an SGE queue set, such
1566 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
1567 * queue set must be quiesced prior to calling this.
1568 */
1569 static void
1570 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1571 {
1572 int i;
1573
1574 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1575 if (q->fl[i].desc) {
1576 mtx_lock(&sc->sge.reg_lock);
1577 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1578 mtx_unlock(&sc->sge.reg_lock);
1579 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1580 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
1581 q->fl[i].desc_map);
1582 bus_dma_tag_destroy(q->fl[i].desc_tag);
1583 bus_dma_tag_destroy(q->fl[i].entry_tag);
1584 }
1585 if (q->fl[i].sdesc) {
1586 free_rx_bufs(sc, &q->fl[i]);
1587 free(q->fl[i].sdesc, M_DEVBUF);
1588 }
1589 }
1590
1591 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1592 if (q->txq[i].desc) {
1593 mtx_lock(&sc->sge.reg_lock);
1594 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1595 mtx_unlock(&sc->sge.reg_lock);
1596 bus_dmamap_unload(q->txq[i].desc_tag,
1597 q->txq[i].desc_map);
1598 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
1599 q->txq[i].desc_map);
1600 bus_dma_tag_destroy(q->txq[i].desc_tag);
1601 bus_dma_tag_destroy(q->txq[i].entry_tag);
1602 MTX_DESTROY(&q->txq[i].lock);
1603 }
1604 if (q->txq[i].sdesc) {
1605 free(q->txq[i].sdesc, M_DEVBUF);
1606 }
1607 }
1608
1609 if (q->rspq.desc) {
1610 mtx_lock(&sc->sge.reg_lock);
1611 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1612 mtx_unlock(&sc->sge.reg_lock);
1613
1614 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1615 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
1616 q->rspq.desc_map);
1617 bus_dma_tag_destroy(q->rspq.desc_tag);
1618 MTX_DESTROY(&q->rspq.lock);
1619 }
1620
1621 bzero(q, sizeof(*q));
1622 }
1623
1624 /**
1625 * t3_free_sge_resources - free SGE resources
1626 * @sc: the adapter softc
1627 *
1628 * Frees resources used by the SGE queue sets.
1629 */
1630 void
1631 t3_free_sge_resources(adapter_t *sc)
1632 {
1633 int i, nqsets;
1634
1635 for (nqsets = i = 0; i < (sc)->params.nports; i++)
1636 nqsets += sc->port[i].nqsets;
1637
1638 for (i = 0; i < nqsets; ++i)
1639 t3_free_qset(sc, &sc->sge.qs[i]);
1640 }
1641
1642 /**
1643 * t3_sge_start - enable SGE
1644 * @sc: the controller softc
1645 *
1646 * Enables the SGE for DMAs. This is the last step in starting packet
1647 * transfers.
1648 */
1649 void
1650 t3_sge_start(adapter_t *sc)
1651 {
1652 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1653 }
1654
1655 /**
1656 * t3_sge_stop - disable SGE operation
1657 * @sc: the adapter
1658 *
1659 * Disables the DMA engine. This can be called in emeregencies (e.g.,
1660 * from error interrupts) or from normal process context. In the latter
1661 * case it also disables any pending queue restart tasklets. Note that
1662 * if it is called in interrupt context it cannot disable the restart
1663 * tasklets as it cannot wait, however the tasklets will have no effect
1664 * since the doorbells are disabled and the driver will call this again
1665 * later from process context, at which time the tasklets will be stopped
1666 * if they are still running.
1667 */
1668 void
1669 t3_sge_stop(adapter_t *sc)
1670 {
1671 int i, nqsets;
1672
1673 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1674
1675 if (sc->tq == NULL)
1676 return;
1677
1678 for (nqsets = i = 0; i < (sc)->params.nports; i++)
1679 nqsets += sc->port[i].nqsets;
1680
1681 for (i = 0; i < nqsets; ++i) {
1682 struct sge_qset *qs = &sc->sge.qs[i];
1683
1684 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
1685 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
1686 }
1687 }
1688
1689
1690 /**
1691 * free_tx_desc - reclaims Tx descriptors and their buffers
1692 * @adapter: the adapter
1693 * @q: the Tx queue to reclaim descriptors from
1694 * @n: the number of descriptors to reclaim
1695 *
1696 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1697 * Tx buffers. Called with the Tx queue lock held.
1698 */
1699 int
1700 free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec)
1701 {
1702 struct tx_sw_desc *d;
1703 unsigned int cidx = q->cidx;
1704 int nbufs = 0;
1705
1706 #ifdef T3_TRACE
1707 T3_TRACE2(sc->tb[q->cntxt_id & 7],
1708 "reclaiming %u Tx descriptors at cidx %u", n, cidx);
1709 #endif
1710 d = &q->sdesc[cidx];
1711
1712 while (n-- > 0) {
1713 DPRINTF("cidx=%d d=%p\n", cidx, d);
1714 if (d->m) {
1715 if (d->flags & TX_SW_DESC_MAPPED) {
1716 bus_dmamap_unload(q->entry_tag, d->map);
1717 bus_dmamap_destroy(q->entry_tag, d->map);
1718 d->flags &= ~TX_SW_DESC_MAPPED;
1719 }
1720 if (m_get_priority(d->m) == cidx) {
1721 m_vec[nbufs] = d->m;
1722 d->m = NULL;
1723 nbufs++;
1724 } else {
1725 printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx);
1726 }
1727 }
1728 ++d;
1729 if (++cidx == q->size) {
1730 cidx = 0;
1731 d = q->sdesc;
1732 }
1733 }
1734 q->cidx = cidx;
1735
1736 return (nbufs);
1737 }
1738
1739 /**
1740 * is_new_response - check if a response is newly written
1741 * @r: the response descriptor
1742 * @q: the response queue
1743 *
1744 * Returns true if a response descriptor contains a yet unprocessed
1745 * response.
1746 */
1747 static __inline int
1748 is_new_response(const struct rsp_desc *r,
1749 const struct sge_rspq *q)
1750 {
1751 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1752 }
1753
1754 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1755 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1756 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1757 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1758 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1759
1760 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1761 #define NOMEM_INTR_DELAY 2500
1762
1763 /**
1764 * write_ofld_wr - write an offload work request
1765 * @adap: the adapter
1766 * @m: the packet to send
1767 * @q: the Tx queue
1768 * @pidx: index of the first Tx descriptor to write
1769 * @gen: the generation value to use
1770 * @ndesc: number of descriptors the packet will occupy
1771 *
1772 * Write an offload work request to send the supplied packet. The packet
1773 * data already carry the work request with most fields populated.
1774 */
1775 static void
1776 write_ofld_wr(adapter_t *adap, struct mbuf *m,
1777 struct sge_txq *q, unsigned int pidx,
1778 unsigned int gen, unsigned int ndesc,
1779 bus_dma_segment_t *segs, unsigned int nsegs)
1780 {
1781 unsigned int sgl_flits, flits;
1782 struct work_request_hdr *from;
1783 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1784 struct tx_desc *d = &q->desc[pidx];
1785 struct txq_state txqs;
1786
1787 if (immediate(m)) {
1788 q->sdesc[pidx].m = NULL;
1789 write_imm(d, m, m->m_len, gen);
1790 return;
1791 }
1792
1793 /* Only TX_DATA builds SGLs */
1794
1795 from = mtod(m, struct work_request_hdr *);
1796 memcpy(&d->flit[1], &from[1],
1797 (uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *) - sizeof(*from));
1798
1799 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8;
1800 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1801
1802 make_sgl(sgp, segs, nsegs);
1803 sgl_flits = sgl_len(nsegs);
1804
1805 txqs.gen = q->gen;
1806 txqs.pidx = q->pidx;
1807 txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1808 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
1809 from->wr_hi, from->wr_lo);
1810 }
1811
1812 /**
1813 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1814 * @m: the packet
1815 *
1816 * Returns the number of Tx descriptors needed for the given offload
1817 * packet. These packets are already fully constructed.
1818 */
1819 static __inline unsigned int
1820 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
1821 {
1822 unsigned int flits, cnt = 0;
1823
1824
1825 if (m->m_len <= WR_LEN)
1826 return 1; /* packet fits as immediate data */
1827
1828 if (m->m_flags & M_IOVEC)
1829 cnt = mtomv(m)->mv_count;
1830
1831 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; /* headers */
1832
1833 return flits_to_desc(flits + sgl_len(cnt));
1834 }
1835
1836 /**
1837 * ofld_xmit - send a packet through an offload queue
1838 * @adap: the adapter
1839 * @q: the Tx offload queue
1840 * @m: the packet
1841 *
1842 * Send an offload packet through an SGE offload queue.
1843 */
1844 static int
1845 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1846 {
1847 int ret, nsegs;
1848 unsigned int ndesc;
1849 unsigned int pidx, gen;
1850 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
1851 bus_dma_segment_t segs[TX_MAX_SEGS];
1852 int i, cleaned;
1853 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
1854
1855 mtx_lock(&q->lock);
1856 if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) {
1857 mtx_unlock(&q->lock);
1858 return (ret);
1859 }
1860 ndesc = calc_tx_descs_ofld(m, nsegs);
1861 again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec);
1862
1863 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
1864 if (__predict_false(ret)) {
1865 if (ret == 1) {
1866 m_set_priority(m, ndesc); /* save for restart */
1867 mtx_unlock(&q->lock);
1868 return EINTR;
1869 }
1870 goto again;
1871 }
1872
1873 gen = q->gen;
1874 q->in_use += ndesc;
1875 pidx = q->pidx;
1876 q->pidx += ndesc;
1877 if (q->pidx >= q->size) {
1878 q->pidx -= q->size;
1879 q->gen ^= 1;
1880 }
1881 #ifdef T3_TRACE
1882 T3_TRACE5(adap->tb[q->cntxt_id & 7],
1883 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
1884 ndesc, pidx, skb->len, skb->len - skb->data_len,
1885 skb_shinfo(skb)->nr_frags);
1886 #endif
1887 mtx_unlock(&q->lock);
1888
1889 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
1890 check_ring_tx_db(adap, q);
1891
1892 for (i = 0; i < cleaned; i++) {
1893 m_freem_vec(m_vec[i]);
1894 }
1895 return (0);
1896 }
1897
1898 /**
1899 * restart_offloadq - restart a suspended offload queue
1900 * @qs: the queue set cotaining the offload queue
1901 *
1902 * Resumes transmission on a suspended Tx offload queue.
1903 */
1904 static void
1905 restart_offloadq(void *data, int npending)
1906 {
1907
1908 struct mbuf *m;
1909 struct sge_qset *qs = data;
1910 struct sge_txq *q = &qs->txq[TXQ_OFLD];
1911 adapter_t *adap = qs->port->adapter;
1912 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
1913 bus_dma_segment_t segs[TX_MAX_SEGS];
1914 int nsegs, i, cleaned;
1915 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
1916
1917 mtx_lock(&q->lock);
1918 again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec);
1919
1920 while ((m = mbufq_peek(&q->sendq)) != NULL) {
1921 unsigned int gen, pidx;
1922 unsigned int ndesc = m_get_priority(m);
1923
1924 if (__predict_false(q->size - q->in_use < ndesc)) {
1925 setbit(&qs->txq_stopped, TXQ_OFLD);
1926 smp_mb();
1927
1928 if (should_restart_tx(q) &&
1929 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1930 goto again;
1931 q->stops++;
1932 break;
1933 }
1934
1935 gen = q->gen;
1936 q->in_use += ndesc;
1937 pidx = q->pidx;
1938 q->pidx += ndesc;
1939 if (q->pidx >= q->size) {
1940 q->pidx -= q->size;
1941 q->gen ^= 1;
1942 }
1943
1944 (void)mbufq_dequeue(&q->sendq);
1945 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
1946 mtx_unlock(&q->lock);
1947 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
1948 mtx_lock(&q->lock);
1949 }
1950 mtx_unlock(&q->lock);
1951
1952 #if USE_GTS
1953 set_bit(TXQ_RUNNING, &q->flags);
1954 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1955 #endif
1956 t3_write_reg(adap, A_SG_KDOORBELL,
1957 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1958
1959 for (i = 0; i < cleaned; i++) {
1960 m_freem_vec(m_vec[i]);
1961 }
1962 }
1963
1964 /**
1965 * queue_set - return the queue set a packet should use
1966 * @m: the packet
1967 *
1968 * Maps a packet to the SGE queue set it should use. The desired queue
1969 * set is carried in bits 1-3 in the packet's priority.
1970 */
1971 static __inline int
1972 queue_set(const struct mbuf *m)
1973 {
1974 return m_get_priority(m) >> 1;
1975 }
1976
1977 /**
1978 * is_ctrl_pkt - return whether an offload packet is a control packet
1979 * @m: the packet
1980 *
1981 * Determines whether an offload packet should use an OFLD or a CTRL
1982 * Tx queue. This is indicated by bit 0 in the packet's priority.
1983 */
1984 static __inline int
1985 is_ctrl_pkt(const struct mbuf *m)
1986 {
1987 return m_get_priority(m) & 1;
1988 }
1989
1990 /**
1991 * t3_offload_tx - send an offload packet
1992 * @tdev: the offload device to send to
1993 * @m: the packet
1994 *
1995 * Sends an offload packet. We use the packet priority to select the
1996 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1997 * should be sent as regular or control, bits 1-3 select the queue set.
1998 */
1999 int
2000 t3_offload_tx(struct toedev *tdev, struct mbuf *m)
2001 {
2002 adapter_t *adap = tdev2adap(tdev);
2003 struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2004
2005 if (__predict_false(is_ctrl_pkt(m)))
2006 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
2007
2008 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
2009 }
2010
2011 /**
2012 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2013 * @tdev: the offload device that will be receiving the packets
2014 * @q: the SGE response queue that assembled the bundle
2015 * @m: the partial bundle
2016 * @n: the number of packets in the bundle
2017 *
2018 * Delivers a (partial) bundle of Rx offload packets to an offload device.
2019 */
2020 static __inline void
2021 deliver_partial_bundle(struct toedev *tdev,
2022 struct sge_rspq *q,
2023 struct mbuf *mbufs[], int n)
2024 {
2025 if (n) {
2026 q->offload_bundles++;
2027 cxgb_ofld_recv(tdev, mbufs, n);
2028 }
2029 }
2030
2031 static __inline int
2032 rx_offload(struct toedev *tdev, struct sge_rspq *rq,
2033 struct mbuf *m, struct mbuf *rx_gather[],
2034 unsigned int gather_idx)
2035 {
2036 rq->offload_pkts++;
2037 m->m_pkthdr.header = mtod(m, void *);
2038
2039 rx_gather[gather_idx++] = m;
2040 if (gather_idx == RX_BUNDLE_SIZE) {
2041 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2042 gather_idx = 0;
2043 rq->offload_bundles++;
2044 }
2045 return (gather_idx);
2046 }
2047
2048 static void
2049 restart_tx(struct sge_qset *qs)
2050 {
2051 struct adapter *sc = qs->port->adapter;
2052
2053 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2054 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2055 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2056 qs->txq[TXQ_OFLD].restarts++;
2057 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2058 }
2059 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2060 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2061 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2062 qs->txq[TXQ_CTRL].restarts++;
2063 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2064 }
2065 }
2066
2067 /**
2068 * t3_sge_alloc_qset - initialize an SGE queue set
2069 * @sc: the controller softc
2070 * @id: the queue set id
2071 * @nports: how many Ethernet ports will be using this queue set
2072 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2073 * @p: configuration parameters for this queue set
2074 * @ntxq: number of Tx queues for the queue set
2075 * @pi: port info for queue set
2076 *
2077 * Allocate resources and initialize an SGE queue set. A queue set
2078 * comprises a response queue, two Rx free-buffer queues, and up to 3
2079 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2080 * queue, offload queue, and control queue.
2081 */
2082 int
2083 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2084 const struct qset_params *p, int ntxq, struct port_info *pi)
2085 {
2086 struct sge_qset *q = &sc->sge.qs[id];
2087 int i, ret = 0;
2088
2089 init_qset_cntxt(q, id);
2090
2091 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2092 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2093 &q->fl[0].desc, &q->fl[0].sdesc,
2094 &q->fl[0].desc_tag, &q->fl[0].desc_map,
2095 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2096 printf("error %d from alloc ring fl0\n", ret);
2097 goto err;
2098 }
2099
2100 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2101 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2102 &q->fl[1].desc, &q->fl[1].sdesc,
2103 &q->fl[1].desc_tag, &q->fl[1].desc_map,
2104 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2105 printf("error %d from alloc ring fl1\n", ret);
2106 goto err;
2107 }
2108
2109 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2110 &q->rspq.phys_addr, &q->rspq.desc, NULL,
2111 &q->rspq.desc_tag, &q->rspq.desc_map,
2112 NULL, NULL)) != 0) {
2113 printf("error %d from alloc ring rspq\n", ret);
2114 goto err;
2115 }
2116
2117 for (i = 0; i < ntxq; ++i) {
2118 /*
2119 * The control queue always uses immediate data so does not
2120 * need to keep track of any mbufs.
2121 * XXX Placeholder for future TOE support.
2122 */
2123 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2124
2125 if ((ret = alloc_ring(sc, p->txq_size[i],
2126 sizeof(struct tx_desc), sz,
2127 &q->txq[i].phys_addr, &q->txq[i].desc,
2128 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2129 &q->txq[i].desc_map,
2130 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2131 printf("error %d from alloc ring tx %i\n", ret, i);
2132 goto err;
2133 }
2134 mbufq_init(&q->txq[i].sendq);
2135 q->txq[i].gen = 1;
2136 q->txq[i].size = p->txq_size[i];
2137 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
2138 device_get_unit(sc->dev), irq_vec_idx, i);
2139 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
2140 }
2141
2142 q->txq[TXQ_ETH].port = pi;
2143
2144 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2145 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2146 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]);
2147 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]);
2148
2149 q->fl[0].gen = q->fl[1].gen = 1;
2150 q->fl[0].size = p->fl_size;
2151 q->fl[1].size = p->jumbo_size;
2152
2153 q->rspq.gen = 1;
2154 q->rspq.cidx = 0;
2155 q->rspq.size = p->rspq_size;
2156
2157 q->txq[TXQ_ETH].stop_thres = nports *
2158 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2159
2160 q->fl[0].buf_size = MCLBYTES;
2161 q->fl[0].zone = zone_clust;
2162 q->fl[0].type = EXT_CLUSTER;
2163 q->fl[1].buf_size = MJUMPAGESIZE;
2164 q->fl[1].zone = zone_jumbop;
2165 q->fl[1].type = EXT_JUMBOP;
2166
2167 q->lro.enabled = lro_default;
2168
2169 mtx_lock(&sc->sge.reg_lock);
2170 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2171 q->rspq.phys_addr, q->rspq.size,
2172 q->fl[0].buf_size, 1, 0);
2173 if (ret) {
2174 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2175 goto err_unlock;
2176 }
2177
2178 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2179 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2180 q->fl[i].phys_addr, q->fl[i].size,
2181 q->fl[i].buf_size, p->cong_thres, 1,
2182 0);
2183 if (ret) {
2184 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2185 goto err_unlock;
2186 }
2187 }
2188
2189 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2190 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2191 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2192 1, 0);
2193 if (ret) {
2194 printf("error %d from t3_sge_init_ecntxt\n", ret);
2195 goto err_unlock;
2196 }
2197
2198 if (ntxq > 1) {
2199 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2200 USE_GTS, SGE_CNTXT_OFLD, id,
2201 q->txq[TXQ_OFLD].phys_addr,
2202 q->txq[TXQ_OFLD].size, 0, 1, 0);
2203 if (ret) {
2204 printf("error %d from t3_sge_init_ecntxt\n", ret);
2205 goto err_unlock;
2206 }
2207 }
2208
2209 if (ntxq > 2) {
2210 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2211 SGE_CNTXT_CTRL, id,
2212 q->txq[TXQ_CTRL].phys_addr,
2213 q->txq[TXQ_CTRL].size,
2214 q->txq[TXQ_CTRL].token, 1, 0);
2215 if (ret) {
2216 printf("error %d from t3_sge_init_ecntxt\n", ret);
2217 goto err_unlock;
2218 }
2219 }
2220
2221 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2222 device_get_unit(sc->dev), irq_vec_idx);
2223 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2224
2225 mtx_unlock(&sc->sge.reg_lock);
2226 t3_update_qset_coalesce(q, p);
2227 q->port = pi;
2228
2229 refill_fl(sc, &q->fl[0], q->fl[0].size, 1);
2230 refill_fl(sc, &q->fl[1], q->fl[1].size, 1);
2231 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2232
2233 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2234 V_NEWTIMER(q->rspq.holdoff_tmr));
2235
2236 return (0);
2237
2238 err_unlock:
2239 mtx_unlock(&sc->sge.reg_lock);
2240 err:
2241 t3_free_qset(sc, q);
2242
2243 return (ret);
2244 }
2245
2246 void
2247 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2248 {
2249 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2250 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2251 struct ifnet *ifp = pi->ifp;
2252
2253 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2254
2255 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2256 cpl->csum_valid && cpl->csum == 0xffff) {
2257 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2258 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2259 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2260 m->m_pkthdr.csum_data = 0xffff;
2261 }
2262 /*
2263 * XXX need to add VLAN support for 6.x
2264 */
2265 #ifdef VLAN_SUPPORTED
2266 if (__predict_false(cpl->vlan_valid)) {
2267 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2268 m->m_flags |= M_VLANTAG;
2269 }
2270 #endif
2271
2272 m->m_pkthdr.rcvif = ifp;
2273 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2274 #ifndef DISABLE_MBUF_IOVEC
2275 m_explode(m);
2276 #endif
2277 /*
2278 * adjust after conversion to mbuf chain
2279 */
2280 m_adj(m, sizeof(*cpl) + ethpad);
2281
2282 (*ifp->if_input)(ifp, m);
2283 }
2284
2285 /**
2286 * get_packet - return the next ingress packet buffer from a free list
2287 * @adap: the adapter that received the packet
2288 * @drop_thres: # of remaining buffers before we start dropping packets
2289 * @qs: the qset that the SGE free list holding the packet belongs to
2290 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2291 * @r: response descriptor
2292 *
2293 * Get the next packet from a free list and complete setup of the
2294 * sk_buff. If the packet is small we make a copy and recycle the
2295 * original buffer, otherwise we use the original buffer itself. If a
2296 * positive drop threshold is supplied packets are dropped and their
2297 * buffers recycled if (a) the number of remaining buffers is under the
2298 * threshold and the packet is too big to copy, or (b) the packet should
2299 * be copied but there is no memory for the copy.
2300 */
2301 #ifdef DISABLE_MBUF_IOVEC
2302
2303 static int
2304 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2305 struct t3_mbuf_hdr *mh, struct rsp_desc *r, struct mbuf *m)
2306 {
2307
2308 unsigned int len_cq = ntohl(r->len_cq);
2309 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2310 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2311 uint32_t len = G_RSPD_LEN(len_cq);
2312 uint32_t flags = ntohl(r->flags);
2313 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2314 int ret = 0;
2315
2316 prefetch(sd->cl);
2317
2318 fl->credits--;
2319 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2320 bus_dmamap_unload(fl->entry_tag, sd->map);
2321
2322 m_cljset(m, sd->cl, fl->type);
2323 m->m_len = len;
2324
2325 switch(sopeop) {
2326 case RSPQ_SOP_EOP:
2327 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2328 mh->mh_head = mh->mh_tail = m;
2329 m->m_pkthdr.len = len;
2330 m->m_flags |= M_PKTHDR;
2331 ret = 1;
2332 break;
2333 case RSPQ_NSOP_NEOP:
2334 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2335 m->m_flags &= ~M_PKTHDR;
2336 if (mh->mh_tail == NULL) {
2337 if (cxgb_debug)
2338 printf("discarding intermediate descriptor entry\n");
2339 m_freem(m);
2340 break;
2341 }
2342 mh->mh_tail->m_next = m;
2343 mh->mh_tail = m;
2344 mh->mh_head->m_pkthdr.len += len;
2345 ret = 0;
2346 break;
2347 case RSPQ_SOP:
2348 DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2349 m->m_pkthdr.len = len;
2350 mh->mh_head = mh->mh_tail = m;
2351 m->m_flags |= M_PKTHDR;
2352 ret = 0;
2353 break;
2354 case RSPQ_EOP:
2355 DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2356 m->m_flags &= ~M_PKTHDR;
2357 mh->mh_head->m_pkthdr.len += len;
2358 mh->mh_tail->m_next = m;
2359 mh->mh_tail = m;
2360 ret = 1;
2361 break;
2362 }
2363 if (++fl->cidx == fl->size)
2364 fl->cidx = 0;
2365
2366 return (ret);
2367 }
2368
2369 #else
2370 static int
2371 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2372 struct mbuf *m, struct rsp_desc *r)
2373 {
2374
2375 unsigned int len_cq = ntohl(r->len_cq);
2376 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2377 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2378 uint32_t len = G_RSPD_LEN(len_cq);
2379 uint32_t flags = ntohl(r->flags);
2380 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2381 void *cl;
2382 int ret = 0;
2383
2384 prefetch(sd->cl);
2385
2386 fl->credits--;
2387 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2388
2389 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2390 cl = mtod(m, void *);
2391 memcpy(cl, sd->cl, len);
2392 recycle_rx_buf(adap, fl, fl->cidx);
2393 } else {
2394 cl = sd->cl;
2395 bus_dmamap_unload(fl->entry_tag, sd->map);
2396 }
2397 switch(sopeop) {
2398 case RSPQ_SOP_EOP:
2399 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2400 if (cl == sd->cl)
2401 m_cljset(m, cl, fl->type);
2402 m->m_len = m->m_pkthdr.len = len;
2403 ret = 1;
2404 goto done;
2405 break;
2406 case RSPQ_NSOP_NEOP:
2407 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2408 ret = 0;
2409 break;
2410 case RSPQ_SOP:
2411 DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2412 m_iovinit(m);
2413 ret = 0;
2414 break;
2415 case RSPQ_EOP:
2416 DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2417 ret = 1;
2418 break;
2419 }
2420 m_iovappend(m, cl, fl->buf_size, len, 0);
2421
2422 done:
2423 if (++fl->cidx == fl->size)
2424 fl->cidx = 0;
2425
2426 return (ret);
2427 }
2428 #endif
2429 /**
2430 * handle_rsp_cntrl_info - handles control information in a response
2431 * @qs: the queue set corresponding to the response
2432 * @flags: the response control flags
2433 *
2434 * Handles the control information of an SGE response, such as GTS
2435 * indications and completion credits for the queue set's Tx queues.
2436 * HW coalesces credits, we don't do any extra SW coalescing.
2437 */
2438 static __inline void
2439 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2440 {
2441 unsigned int credits;
2442
2443 #if USE_GTS
2444 if (flags & F_RSPD_TXQ0_GTS)
2445 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2446 #endif
2447 credits = G_RSPD_TXQ0_CR(flags);
2448 if (credits) {
2449 qs->txq[TXQ_ETH].processed += credits;
2450 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC)
2451 taskqueue_enqueue(qs->port->adapter->tq,
2452 &qs->port->timer_reclaim_task);
2453 }
2454
2455 credits = G_RSPD_TXQ2_CR(flags);
2456 if (credits)
2457 qs->txq[TXQ_CTRL].processed += credits;
2458
2459 # if USE_GTS
2460 if (flags & F_RSPD_TXQ1_GTS)
2461 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2462 # endif
2463 credits = G_RSPD_TXQ1_CR(flags);
2464 if (credits)
2465 qs->txq[TXQ_OFLD].processed += credits;
2466 }
2467
2468 static void
2469 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2470 unsigned int sleeping)
2471 {
2472 ;
2473 }
2474
2475 /**
2476 * process_responses - process responses from an SGE response queue
2477 * @adap: the adapter
2478 * @qs: the queue set to which the response queue belongs
2479 * @budget: how many responses can be processed in this round
2480 *
2481 * Process responses from an SGE response queue up to the supplied budget.
2482 * Responses include received packets as well as credits and other events
2483 * for the queues that belong to the response queue's queue set.
2484 * A negative budget is effectively unlimited.
2485 *
2486 * Additionally choose the interrupt holdoff time for the next interrupt
2487 * on this queue. If the system is under memory shortage use a fairly
2488 * long delay to help recovery.
2489 */
2490 static int
2491 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2492 {
2493 struct sge_rspq *rspq = &qs->rspq;
2494 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2495 int budget_left = budget;
2496 unsigned int sleeping = 0;
2497 int lro = qs->lro.enabled;
2498 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2499 int ngathered = 0;
2500 #ifdef DEBUG
2501 static int last_holdoff = 0;
2502 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2503 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2504 last_holdoff = rspq->holdoff_tmr;
2505 }
2506 #endif
2507 rspq->next_holdoff = rspq->holdoff_tmr;
2508
2509 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2510 int eth, eop = 0, ethpad = 0;
2511 uint32_t flags = ntohl(r->flags);
2512 uint32_t rss_csum = *(const uint32_t *)r;
2513 uint32_t rss_hash = r->rss_hdr.rss_hash_val;
2514
2515 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2516
2517 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2518 /* XXX */
2519 printf("async notification\n");
2520
2521 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2522 #ifdef DISABLE_MBUF_IOVEC
2523
2524 if (cxgb_debug)
2525 printf("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", r->rss_hdr.opcode, rspq->cidx);
2526
2527 if(get_imm_packet(adap, r, &rspq->rspq_mh) == 0) {
2528 rspq->next_holdoff = NOMEM_INTR_DELAY;
2529 budget_left--;
2530 break;
2531 } else {
2532 eop = 1;
2533 }
2534 #else
2535 struct mbuf *m = NULL;
2536
2537 if (rspq->rspq_mbuf == NULL)
2538 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA);
2539 else
2540 m = m_gethdr(M_DONTWAIT, MT_DATA);
2541
2542 /*
2543 * XXX revisit me
2544 */
2545 if (rspq->rspq_mbuf == NULL && m == NULL) {
2546 rspq->next_holdoff = NOMEM_INTR_DELAY;
2547 budget_left--;
2548 break;
2549 }
2550 if (get_imm_packet(adap, r, rspq->rspq_mbuf, m, flags))
2551 goto skip;
2552 eop = 1;
2553 #endif
2554 rspq->imm_data++;
2555 } else if (r->len_cq) {
2556 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2557
2558 #ifdef DISABLE_MBUF_IOVEC
2559 struct mbuf *m;
2560 m = m_gethdr(M_NOWAIT, MT_DATA);
2561
2562 if (m == NULL) {
2563 log(LOG_WARNING, "failed to get mbuf for packet\n");
2564 break;
2565 }
2566
2567 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r, m);
2568 #else
2569 if (rspq->rspq_mbuf == NULL)
2570 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA);
2571 if (rspq->rspq_mbuf == NULL) {
2572 log(LOG_WARNING, "failed to get mbuf for packet\n");
2573 break;
2574 }
2575 eop = get_packet(adap, drop_thresh, qs, rspq->rspq_mbuf, r);
2576 #endif
2577 ethpad = 2;
2578 } else {
2579 DPRINTF("pure response\n");
2580 rspq->pure_rsps++;
2581 }
2582
2583 if (flags & RSPD_CTRL_MASK) {
2584 sleeping |= flags & RSPD_GTS_MASK;
2585 handle_rsp_cntrl_info(qs, flags);
2586 }
2587 #ifndef DISABLE_MBUF_IOVEC
2588 skip:
2589 #endif
2590 r++;
2591 if (__predict_false(++rspq->cidx == rspq->size)) {
2592 rspq->cidx = 0;
2593 rspq->gen ^= 1;
2594 r = rspq->desc;
2595 }
2596
2597 prefetch(r);
2598 if (++rspq->credits >= (rspq->size / 4)) {
2599 refill_rspq(adap, rspq, rspq->credits);
2600 rspq->credits = 0;
2601 }
2602
2603 if (eop) {
2604 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *));
2605 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES);
2606
2607 if (eth) {
2608 t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad,
2609 rss_hash, rss_csum, lro);
2610
2611 rspq->rspq_mh.mh_tail = rspq->rspq_mh.mh_head = NULL;
2612 } else {
2613 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
2614 /*
2615 * XXX size mismatch
2616 */
2617 m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
2618
2619 ngathered = rx_offload(&adap->tdev, rspq,
2620 rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
2621 }
2622
2623 __refill_fl(adap, &qs->fl[0]);
2624 __refill_fl(adap, &qs->fl[1]);
2625 }
2626
2627
2628 --budget_left;
2629 }
2630
2631 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
2632 t3_lro_flush(adap, qs, &qs->lro);
2633
2634 if (sleeping)
2635 check_ring_db(adap, qs, sleeping);
2636
2637 smp_mb(); /* commit Tx queue processed updates */
2638 if (__predict_false(qs->txq_stopped != 0))
2639 restart_tx(qs);
2640
2641 budget -= budget_left;
2642 #if 0
2643 refill_fl(adap, &qs->fl[0], &qs->fl[0].size - &qs->fl[0].credits, 1);
2644 refill_fl(adap, &qs->fl[1], &qs->fl[1].size - &qs->fl[1].credits, 1);
2645 #endif
2646 return (budget);
2647 }
2648
2649 /*
2650 * A helper function that processes responses and issues GTS.
2651 */
2652 static __inline int
2653 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2654 {
2655 int work;
2656 static int last_holdoff = 0;
2657
2658 work = process_responses(adap, rspq_to_qset(rq), -1);
2659
2660 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
2661 printf("next_holdoff=%d\n", rq->next_holdoff);
2662 last_holdoff = rq->next_holdoff;
2663 }
2664 if (work)
2665 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2666 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2667 return work;
2668 }
2669
2670
2671 /*
2672 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2673 * Handles data events from SGE response queues as well as error and other
2674 * async events as they all use the same interrupt pin. We use one SGE
2675 * response queue per port in this mode and protect all response queues with
2676 * queue 0's lock.
2677 */
2678 void
2679 t3b_intr(void *data)
2680 {
2681 uint32_t i, map;
2682 adapter_t *adap = data;
2683 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2684
2685 t3_write_reg(adap, A_PL_CLI, 0);
2686 map = t3_read_reg(adap, A_SG_DATA_INTR);
2687
2688 if (!map)
2689 return;
2690
2691 if (__predict_false(map & F_ERRINTR))
2692 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2693
2694 mtx_lock(&q0->lock);
2695 for_each_port(adap, i)
2696 if (map & (1 << i))
2697 process_responses_gts(adap, &adap->sge.qs[i].rspq);
2698 mtx_unlock(&q0->lock);
2699 }
2700
2701 /*
2702 * The MSI interrupt handler. This needs to handle data events from SGE
2703 * response queues as well as error and other async events as they all use
2704 * the same MSI vector. We use one SGE response queue per port in this mode
2705 * and protect all response queues with queue 0's lock.
2706 */
2707 void
2708 t3_intr_msi(void *data)
2709 {
2710 adapter_t *adap = data;
2711 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2712 int i, new_packets = 0;
2713
2714 mtx_lock(&q0->lock);
2715
2716 for_each_port(adap, i)
2717 if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
2718 new_packets = 1;
2719 mtx_unlock(&q0->lock);
2720 if (new_packets == 0)
2721 taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2722 }
2723
2724 void
2725 t3_intr_msix(void *data)
2726 {
2727 struct sge_qset *qs = data;
2728 adapter_t *adap = qs->port->adapter;
2729 struct sge_rspq *rspq = &qs->rspq;
2730
2731 mtx_lock(&rspq->lock);
2732 if (process_responses_gts(adap, rspq) == 0)
2733 rspq->unhandled_irqs++;
2734 mtx_unlock(&rspq->lock);
2735 }
2736
2737 /*
2738 * broken by recent mbuf changes
2739 */
2740 static int
2741 t3_lro_enable(SYSCTL_HANDLER_ARGS)
2742 {
2743 adapter_t *sc;
2744 int i, j, enabled, err, nqsets = 0;
2745
2746 #ifndef LRO_WORKING
2747 return (0);
2748 #endif
2749
2750 sc = arg1;
2751 enabled = sc->sge.qs[0].lro.enabled;
2752 err = sysctl_handle_int(oidp, &enabled, arg2, req);
2753
2754 if (err != 0)
2755 return (err);
2756 if (enabled == sc->sge.qs[0].lro.enabled)
2757 return (0);
2758
2759 for (i = 0; i < sc->params.nports; i++)
2760 for (j = 0; j < sc->port[i].nqsets; j++)
2761 nqsets++;
2762
2763 for (i = 0; i < nqsets; i++)
2764 sc->sge.qs[i].lro.enabled = enabled;
2765
2766 return (0);
2767 }
2768
2769 static int
2770 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS)
2771 {
2772 adapter_t *sc = arg1;
2773 struct qset_params *qsp = &sc->params.sge.qset[0];
2774 int coalesce_nsecs;
2775 struct sge_qset *qs;
2776 int i, j, err, nqsets = 0;
2777 struct mtx *lock;
2778
2779 coalesce_nsecs = qsp->coalesce_nsecs;
2780 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req);
2781
2782 if (err != 0) {
2783 return (err);
2784 }
2785 if (coalesce_nsecs == qsp->coalesce_nsecs)
2786 return (0);
2787
2788 for (i = 0; i < sc->params.nports; i++)
2789 for (j = 0; j < sc->port[i].nqsets; j++)
2790 nqsets++;
2791
2792 coalesce_nsecs = max(100, coalesce_nsecs);
2793
2794 for (i = 0; i < nqsets; i++) {
2795 qs = &sc->sge.qs[i];
2796 qsp = &sc->params.sge.qset[i];
2797 qsp->coalesce_nsecs = coalesce_nsecs;
2798
2799 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
2800 &sc->sge.qs[0].rspq.lock;
2801
2802 mtx_lock(lock);
2803 t3_update_qset_coalesce(qs, qsp);
2804 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2805 V_NEWTIMER(qs->rspq.holdoff_tmr));
2806 mtx_unlock(lock);
2807 }
2808
2809 return (0);
2810 }
2811
2812
2813 void
2814 t3_add_sysctls(adapter_t *sc)
2815 {
2816 struct sysctl_ctx_list *ctx;
2817 struct sysctl_oid_list *children;
2818
2819 ctx = device_get_sysctl_ctx(sc->dev);
2820 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
2821
2822 /* random information */
2823 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
2824 "firmware_version",
2825 CTLFLAG_RD, &sc->fw_version,
2826 0, "firmware version");
2827
2828 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
2829 "enable_lro",
2830 CTLTYPE_INT|CTLFLAG_RW, sc,
2831 0, t3_lro_enable,
2832 "I", "enable large receive offload");
2833
2834 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
2835 "intr_coal",
2836 CTLTYPE_INT|CTLFLAG_RW, sc,
2837 0, t3_set_coalesce_nsecs,
2838 "I", "interrupt coalescing timer (ns)");
2839 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2840 "enable_debug",
2841 CTLFLAG_RW, &cxgb_debug,
2842 0, "enable verbose debugging output");
2843
2844 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2845 "collapse_free",
2846 CTLFLAG_RD, &collapse_free,
2847 0, "frees during collapse");
2848 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2849 "mb_free_vec_free",
2850 CTLFLAG_RD, &mb_free_vec_free,
2851 0, "frees during mb_free_vec");
2852 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2853 "collapse_mbufs",
2854 CTLFLAG_RW, &collapse_mbufs,
2855 0, "collapse mbuf chains into iovecs");
2856 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2857 "txq_overrun",
2858 CTLFLAG_RD, &txq_fills,
2859 0, "#times txq overrun");
2860 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
2861 "bogus_imm",
2862 CTLFLAG_RD, &bogus_imm,
2863 0, "#times a bogus immediate response was seen");
2864 }
2865
2866 /**
2867 * t3_get_desc - dump an SGE descriptor for debugging purposes
2868 * @qs: the queue set
2869 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
2870 * @idx: the descriptor index in the queue
2871 * @data: where to dump the descriptor contents
2872 *
2873 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
2874 * size of the descriptor.
2875 */
2876 int
2877 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
2878 unsigned char *data)
2879 {
2880 if (qnum >= 6)
2881 return (EINVAL);
2882
2883 if (qnum < 3) {
2884 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
2885 return -EINVAL;
2886 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
2887 return sizeof(struct tx_desc);
2888 }
2889
2890 if (qnum == 3) {
2891 if (!qs->rspq.desc || idx >= qs->rspq.size)
2892 return (EINVAL);
2893 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
2894 return sizeof(struct rsp_desc);
2895 }
2896
2897 qnum -= 4;
2898 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
2899 return (EINVAL);
2900 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
2901 return sizeof(struct rx_desc);
2902 }
Cache object: ef28dd510431f2a90dcb05c84ab957dd
|