1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include "opt_rss.h"
34 #include "ena.h"
35 #include "ena_datapath.h"
36 #ifdef DEV_NETMAP
37 #include "ena_netmap.h"
38 #endif /* DEV_NETMAP */
39
40 /*********************************************************************
41 * Static functions prototypes
42 *********************************************************************/
43
44 static int ena_tx_cleanup(struct ena_ring *);
45 static int ena_rx_cleanup(struct ena_ring *);
46 static inline int validate_tx_req_id(struct ena_ring *, uint16_t);
47 static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
48 struct mbuf *);
49 static struct mbuf* ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *,
50 struct ena_com_rx_ctx *, uint16_t *);
51 static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *,
52 struct mbuf *);
53 static void ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *, bool);
54 static int ena_check_and_collapse_mbuf(struct ena_ring *tx_ring,
55 struct mbuf **mbuf);
56 static int ena_xmit_mbuf(struct ena_ring *, struct mbuf **);
57 static void ena_start_xmit(struct ena_ring *);
58
59 /*********************************************************************
60 * Global functions
61 *********************************************************************/
62
63 void
64 ena_cleanup(void *arg, int pending)
65 {
66 struct ena_que *que = arg;
67 struct ena_adapter *adapter = que->adapter;
68 if_t ifp = adapter->ifp;
69 struct ena_ring *tx_ring;
70 struct ena_ring *rx_ring;
71 struct ena_com_io_cq* io_cq;
72 struct ena_eth_io_intr_reg intr_reg;
73 int qid, ena_qid;
74 int txc, rxc, i;
75
76 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
77 return;
78
79 ena_trace(NULL, ENA_DBG, "MSI-X TX/RX routine\n");
80
81 tx_ring = que->tx_ring;
82 rx_ring = que->rx_ring;
83 qid = que->id;
84 ena_qid = ENA_IO_TXQ_IDX(qid);
85 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
86
87 tx_ring->first_interrupt = true;
88 rx_ring->first_interrupt = true;
89
90 for (i = 0; i < CLEAN_BUDGET; ++i) {
91 rxc = ena_rx_cleanup(rx_ring);
92 txc = ena_tx_cleanup(tx_ring);
93
94 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
95 return;
96
97 if ((txc != TX_BUDGET) && (rxc != RX_BUDGET))
98 break;
99 }
100
101 /* Signal that work is done and unmask interrupt */
102 ena_com_update_intr_reg(&intr_reg,
103 RX_IRQ_INTERVAL,
104 TX_IRQ_INTERVAL,
105 true);
106 ena_com_unmask_intr(io_cq, &intr_reg);
107 }
108
109 void
110 ena_deferred_mq_start(void *arg, int pending)
111 {
112 struct ena_ring *tx_ring = (struct ena_ring *)arg;
113 struct ifnet *ifp = tx_ring->adapter->ifp;
114
115 while (!drbr_empty(ifp, tx_ring->br) &&
116 tx_ring->running &&
117 (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
118 ENA_RING_MTX_LOCK(tx_ring);
119 ena_start_xmit(tx_ring);
120 ENA_RING_MTX_UNLOCK(tx_ring);
121 }
122 }
123
124 int
125 ena_mq_start(if_t ifp, struct mbuf *m)
126 {
127 struct ena_adapter *adapter = ifp->if_softc;
128 struct ena_ring *tx_ring;
129 int ret, is_drbr_empty;
130 uint32_t i;
131
132 if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
133 return (ENODEV);
134
135 /* Which queue to use */
136 /*
137 * If everything is setup correctly, it should be the
138 * same bucket that the current CPU we're on is.
139 * It should improve performance.
140 */
141 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
142 i = m->m_pkthdr.flowid % adapter->num_io_queues;
143 } else {
144 i = curcpu % adapter->num_io_queues;
145 }
146 tx_ring = &adapter->tx_ring[i];
147
148 /* Check if drbr is empty before putting packet */
149 is_drbr_empty = drbr_empty(ifp, tx_ring->br);
150 ret = drbr_enqueue(ifp, tx_ring->br, m);
151 if (unlikely(ret != 0)) {
152 taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
153 return (ret);
154 }
155
156 if (is_drbr_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) {
157 ena_start_xmit(tx_ring);
158 ENA_RING_MTX_UNLOCK(tx_ring);
159 } else {
160 taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
161 }
162
163 return (0);
164 }
165
166 void
167 ena_qflush(if_t ifp)
168 {
169 struct ena_adapter *adapter = ifp->if_softc;
170 struct ena_ring *tx_ring = adapter->tx_ring;
171 int i;
172
173 for(i = 0; i < adapter->num_io_queues; ++i, ++tx_ring)
174 if (!drbr_empty(ifp, tx_ring->br)) {
175 ENA_RING_MTX_LOCK(tx_ring);
176 drbr_flush(ifp, tx_ring->br);
177 ENA_RING_MTX_UNLOCK(tx_ring);
178 }
179
180 if_qflush(ifp);
181 }
182
183 /*********************************************************************
184 * Static functions
185 *********************************************************************/
186
187 static inline int
188 validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id)
189 {
190 struct ena_adapter *adapter = tx_ring->adapter;
191 struct ena_tx_buffer *tx_info = NULL;
192
193 if (likely(req_id < tx_ring->ring_size)) {
194 tx_info = &tx_ring->tx_buffer_info[req_id];
195 if (tx_info->mbuf != NULL)
196 return (0);
197 device_printf(adapter->pdev,
198 "tx_info doesn't have valid mbuf\n");
199 }
200
201 device_printf(adapter->pdev, "Invalid req_id: %hu\n", req_id);
202 counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
203
204 /* Trigger device reset */
205 ena_trigger_reset(adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
206
207 return (EFAULT);
208 }
209
210 /**
211 * ena_tx_cleanup - clear sent packets and corresponding descriptors
212 * @tx_ring: ring for which we want to clean packets
213 *
214 * Once packets are sent, we ask the device in a loop for no longer used
215 * descriptors. We find the related mbuf chain in a map (index in an array)
216 * and free it, then update ring state.
217 * This is performed in "endless" loop, updating ring pointers every
218 * TX_COMMIT. The first check of free descriptor is performed before the actual
219 * loop, then repeated at the loop end.
220 **/
221 static int
222 ena_tx_cleanup(struct ena_ring *tx_ring)
223 {
224 struct ena_adapter *adapter;
225 struct ena_com_io_cq* io_cq;
226 uint16_t next_to_clean;
227 uint16_t req_id;
228 uint16_t ena_qid;
229 unsigned int total_done = 0;
230 int rc;
231 int commit = TX_COMMIT;
232 int budget = TX_BUDGET;
233 int work_done;
234 bool above_thresh;
235
236 adapter = tx_ring->que->adapter;
237 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
238 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
239 next_to_clean = tx_ring->next_to_clean;
240
241 #ifdef DEV_NETMAP
242 if (netmap_tx_irq(adapter->ifp, tx_ring->qid) != NM_IRQ_PASS)
243 return (0);
244 #endif /* DEV_NETMAP */
245
246 do {
247 struct ena_tx_buffer *tx_info;
248 struct mbuf *mbuf;
249
250 rc = ena_com_tx_comp_req_id_get(io_cq, &req_id);
251 if (unlikely(rc != 0))
252 break;
253
254 rc = validate_tx_req_id(tx_ring, req_id);
255 if (unlikely(rc != 0))
256 break;
257
258 tx_info = &tx_ring->tx_buffer_info[req_id];
259
260 mbuf = tx_info->mbuf;
261
262 tx_info->mbuf = NULL;
263 bintime_clear(&tx_info->timestamp);
264
265 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
266 BUS_DMASYNC_POSTWRITE);
267 bus_dmamap_unload(adapter->tx_buf_tag,
268 tx_info->dmamap);
269
270 ena_trace(NULL, ENA_DBG | ENA_TXPTH, "tx: q %d mbuf %p completed\n",
271 tx_ring->qid, mbuf);
272
273 m_freem(mbuf);
274
275 total_done += tx_info->tx_descs;
276
277 tx_ring->free_tx_ids[next_to_clean] = req_id;
278 next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
279 tx_ring->ring_size);
280
281 if (unlikely(--commit == 0)) {
282 commit = TX_COMMIT;
283 /* update ring state every TX_COMMIT descriptor */
284 tx_ring->next_to_clean = next_to_clean;
285 ena_com_comp_ack(
286 &adapter->ena_dev->io_sq_queues[ena_qid],
287 total_done);
288 ena_com_update_dev_comp_head(io_cq);
289 total_done = 0;
290 }
291 } while (likely(--budget));
292
293 work_done = TX_BUDGET - budget;
294
295 ena_trace(NULL, ENA_DBG | ENA_TXPTH, "tx: q %d done. total pkts: %d\n",
296 tx_ring->qid, work_done);
297
298 /* If there is still something to commit update ring state */
299 if (likely(commit != TX_COMMIT)) {
300 tx_ring->next_to_clean = next_to_clean;
301 ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid],
302 total_done);
303 ena_com_update_dev_comp_head(io_cq);
304 }
305
306 /*
307 * Need to make the rings circular update visible to
308 * ena_xmit_mbuf() before checking for tx_ring->running.
309 */
310 mb();
311
312 above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
313 ENA_TX_RESUME_THRESH);
314 if (unlikely(!tx_ring->running && above_thresh)) {
315 ENA_RING_MTX_LOCK(tx_ring);
316 above_thresh =
317 ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
318 ENA_TX_RESUME_THRESH);
319 if (!tx_ring->running && above_thresh) {
320 tx_ring->running = true;
321 counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
322 taskqueue_enqueue(tx_ring->enqueue_tq,
323 &tx_ring->enqueue_task);
324 }
325 ENA_RING_MTX_UNLOCK(tx_ring);
326 }
327
328 return (work_done);
329 }
330
331 static void
332 ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
333 struct mbuf *mbuf)
334 {
335 struct ena_adapter *adapter = rx_ring->adapter;
336
337 if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
338 mbuf->m_pkthdr.flowid = ena_rx_ctx->hash;
339
340 #ifdef RSS
341 /*
342 * Hardware and software RSS are in agreement only when both are
343 * configured to Toeplitz algorithm. This driver configures
344 * that algorithm only when software RSS is enabled and uses it.
345 */
346 if (adapter->ena_dev->rss.hash_func != ENA_ADMIN_TOEPLITZ &&
347 ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN) {
348 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
349 return;
350 }
351 #endif
352
353 if (ena_rx_ctx->frag &&
354 (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) {
355 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
356 return;
357 }
358
359 switch (ena_rx_ctx->l3_proto) {
360 case ENA_ETH_IO_L3_PROTO_IPV4:
361 switch (ena_rx_ctx->l4_proto) {
362 case ENA_ETH_IO_L4_PROTO_TCP:
363 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
364 break;
365 case ENA_ETH_IO_L4_PROTO_UDP:
366 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
367 break;
368 default:
369 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
370 }
371 break;
372 case ENA_ETH_IO_L3_PROTO_IPV6:
373 switch (ena_rx_ctx->l4_proto) {
374 case ENA_ETH_IO_L4_PROTO_TCP:
375 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
376 break;
377 case ENA_ETH_IO_L4_PROTO_UDP:
378 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
379 break;
380 default:
381 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
382 }
383 break;
384 case ENA_ETH_IO_L3_PROTO_UNKNOWN:
385 M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
386 break;
387 default:
388 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
389 }
390 } else {
391 mbuf->m_pkthdr.flowid = rx_ring->qid;
392 M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
393 }
394 }
395
396 /**
397 * ena_rx_mbuf - assemble mbuf from descriptors
398 * @rx_ring: ring for which we want to clean packets
399 * @ena_bufs: buffer info
400 * @ena_rx_ctx: metadata for this packet(s)
401 * @next_to_clean: ring pointer, will be updated only upon success
402 *
403 **/
404 static struct mbuf*
405 ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs,
406 struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean)
407 {
408 struct mbuf *mbuf;
409 struct ena_rx_buffer *rx_info;
410 struct ena_adapter *adapter;
411 unsigned int descs = ena_rx_ctx->descs;
412 uint16_t ntc, len, req_id, buf = 0;
413
414 ntc = *next_to_clean;
415 adapter = rx_ring->adapter;
416
417 len = ena_bufs[buf].len;
418 req_id = ena_bufs[buf].req_id;
419 rx_info = &rx_ring->rx_buffer_info[req_id];
420 if (unlikely(rx_info->mbuf == NULL)) {
421 device_printf(adapter->pdev, "NULL mbuf in rx_info");
422 return (NULL);
423 }
424
425 ena_trace(NULL, ENA_DBG | ENA_RXPTH, "rx_info %p, mbuf %p, paddr %jx\n",
426 rx_info, rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr);
427
428 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
429 BUS_DMASYNC_POSTREAD);
430 mbuf = rx_info->mbuf;
431 mbuf->m_flags |= M_PKTHDR;
432 mbuf->m_pkthdr.len = len;
433 mbuf->m_len = len;
434 // Only for the first segment the data starts at specific offset
435 mbuf->m_data = mtodo(mbuf, ena_rx_ctx->pkt_offset);
436 ena_trace(NULL, ENA_DBG | ENA_RXPTH,
437 "Mbuf data offset=%u\n", ena_rx_ctx->pkt_offset);
438 mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp;
439
440 /* Fill mbuf with hash key and it's interpretation for optimization */
441 ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf);
442
443 ena_trace(NULL, ENA_DBG | ENA_RXPTH, "rx mbuf 0x%p, flags=0x%x, len: %d\n",
444 mbuf, mbuf->m_flags, mbuf->m_pkthdr.len);
445
446 /* DMA address is not needed anymore, unmap it */
447 bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
448
449 rx_info->mbuf = NULL;
450 rx_ring->free_rx_ids[ntc] = req_id;
451 ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
452
453 /*
454 * While we have more than 1 descriptors for one rcvd packet, append
455 * other mbufs to the main one
456 */
457 while (--descs) {
458 ++buf;
459 len = ena_bufs[buf].len;
460 req_id = ena_bufs[buf].req_id;
461 rx_info = &rx_ring->rx_buffer_info[req_id];
462
463 if (unlikely(rx_info->mbuf == NULL)) {
464 device_printf(adapter->pdev, "NULL mbuf in rx_info");
465 /*
466 * If one of the required mbufs was not allocated yet,
467 * we can break there.
468 * All earlier used descriptors will be reallocated
469 * later and not used mbufs can be reused.
470 * The next_to_clean pointer will not be updated in case
471 * of an error, so caller should advance it manually
472 * in error handling routine to keep it up to date
473 * with hw ring.
474 */
475 m_freem(mbuf);
476 return (NULL);
477 }
478
479 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
480 BUS_DMASYNC_POSTREAD);
481 if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) {
482 counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
483 ena_trace(NULL, ENA_WARNING, "Failed to append Rx mbuf %p\n",
484 mbuf);
485 }
486
487 ena_trace(NULL, ENA_DBG | ENA_RXPTH,
488 "rx mbuf updated. len %d\n", mbuf->m_pkthdr.len);
489
490 /* Free already appended mbuf, it won't be useful anymore */
491 bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
492 m_freem(rx_info->mbuf);
493 rx_info->mbuf = NULL;
494
495 rx_ring->free_rx_ids[ntc] = req_id;
496 ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
497 }
498
499 *next_to_clean = ntc;
500
501 return (mbuf);
502 }
503
504 /**
505 * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum
506 **/
507 static inline void
508 ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
509 struct mbuf *mbuf)
510 {
511
512 /* if IP and error */
513 if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
514 ena_rx_ctx->l3_csum_err)) {
515 /* ipv4 checksum error */
516 mbuf->m_pkthdr.csum_flags = 0;
517 counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
518 ena_trace(NULL, ENA_DBG, "RX IPv4 header checksum error\n");
519 return;
520 }
521
522 /* if TCP/UDP */
523 if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
524 (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) {
525 if (ena_rx_ctx->l4_csum_err) {
526 /* TCP/UDP checksum error */
527 mbuf->m_pkthdr.csum_flags = 0;
528 counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
529 ena_trace(NULL, ENA_DBG, "RX L4 checksum error\n");
530 } else {
531 mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
532 mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID;
533 }
534 }
535 }
536
537 /**
538 * ena_rx_cleanup - handle rx irq
539 * @arg: ring for which irq is being handled
540 **/
541 static int
542 ena_rx_cleanup(struct ena_ring *rx_ring)
543 {
544 struct ena_adapter *adapter;
545 struct mbuf *mbuf;
546 struct ena_com_rx_ctx ena_rx_ctx;
547 struct ena_com_io_cq* io_cq;
548 struct ena_com_io_sq* io_sq;
549 enum ena_regs_reset_reason_types reset_reason;
550 if_t ifp;
551 uint16_t ena_qid;
552 uint16_t next_to_clean;
553 uint32_t refill_required;
554 uint32_t refill_threshold;
555 uint32_t do_if_input = 0;
556 unsigned int qid;
557 int rc, i;
558 int budget = RX_BUDGET;
559 #ifdef DEV_NETMAP
560 int done;
561 #endif /* DEV_NETMAP */
562
563 adapter = rx_ring->que->adapter;
564 ifp = adapter->ifp;
565 qid = rx_ring->que->id;
566 ena_qid = ENA_IO_RXQ_IDX(qid);
567 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
568 io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
569 next_to_clean = rx_ring->next_to_clean;
570
571 #ifdef DEV_NETMAP
572 if (netmap_rx_irq(adapter->ifp, rx_ring->qid, &done) != NM_IRQ_PASS)
573 return (0);
574 #endif /* DEV_NETMAP */
575
576 ena_trace(NULL, ENA_DBG, "rx: qid %d\n", qid);
577
578 do {
579 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
580 ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size;
581 ena_rx_ctx.descs = 0;
582 ena_rx_ctx.pkt_offset = 0;
583
584 bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
585 io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_POSTREAD);
586 rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx);
587 if (unlikely(rc != 0)) {
588 if (rc == ENA_COM_NO_SPACE) {
589 counter_u64_add(rx_ring->rx_stats.bad_desc_num,
590 1);
591 reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
592 } else {
593 counter_u64_add(rx_ring->rx_stats.bad_req_id,
594 1);
595 reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
596 }
597 ena_trigger_reset(adapter, reset_reason);
598 return (0);
599 }
600
601 if (unlikely(ena_rx_ctx.descs == 0))
602 break;
603
604 ena_trace(NULL, ENA_DBG | ENA_RXPTH, "rx: q %d got packet from ena. "
605 "descs #: %d l3 proto %d l4 proto %d hash: %x\n",
606 rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
607 ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
608
609 /* Receive mbuf from the ring */
610 mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs,
611 &ena_rx_ctx, &next_to_clean);
612 bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
613 io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_PREREAD);
614 /* Exit if we failed to retrieve a buffer */
615 if (unlikely(mbuf == NULL)) {
616 for (i = 0; i < ena_rx_ctx.descs; ++i) {
617 rx_ring->free_rx_ids[next_to_clean] =
618 rx_ring->ena_bufs[i].req_id;
619 next_to_clean =
620 ENA_RX_RING_IDX_NEXT(next_to_clean,
621 rx_ring->ring_size);
622
623 }
624 break;
625 }
626
627 if (((ifp->if_capenable & IFCAP_RXCSUM) != 0) ||
628 ((ifp->if_capenable & IFCAP_RXCSUM_IPV6) != 0)) {
629 ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf);
630 }
631
632 counter_enter();
633 counter_u64_add_protected(rx_ring->rx_stats.bytes,
634 mbuf->m_pkthdr.len);
635 counter_u64_add_protected(adapter->hw_stats.rx_bytes,
636 mbuf->m_pkthdr.len);
637 counter_exit();
638 /*
639 * LRO is only for IP/TCP packets and TCP checksum of the packet
640 * should be computed by hardware.
641 */
642 do_if_input = 1;
643 if (((ifp->if_capenable & IFCAP_LRO) != 0) &&
644 ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) &&
645 (ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) {
646 /*
647 * Send to the stack if:
648 * - LRO not enabled, or
649 * - no LRO resources, or
650 * - lro enqueue fails
651 */
652 if ((rx_ring->lro.lro_cnt != 0) &&
653 (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0))
654 do_if_input = 0;
655 }
656 if (do_if_input != 0) {
657 ena_trace(NULL, ENA_DBG | ENA_RXPTH,
658 "calling if_input() with mbuf %p\n", mbuf);
659 (*ifp->if_input)(ifp, mbuf);
660 }
661
662 counter_enter();
663 counter_u64_add_protected(rx_ring->rx_stats.cnt, 1);
664 counter_u64_add_protected(adapter->hw_stats.rx_packets, 1);
665 counter_exit();
666 } while (--budget);
667
668 rx_ring->next_to_clean = next_to_clean;
669
670 refill_required = ena_com_free_q_entries(io_sq);
671 refill_threshold = min_t(int,
672 rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
673 ENA_RX_REFILL_THRESH_PACKET);
674
675 if (refill_required > refill_threshold) {
676 ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
677 ena_refill_rx_bufs(rx_ring, refill_required);
678 }
679
680 tcp_lro_flush_all(&rx_ring->lro);
681
682 return (RX_BUDGET - budget);
683 }
684
685 static void
686 ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf,
687 bool disable_meta_caching)
688 {
689 struct ena_com_tx_meta *ena_meta;
690 struct ether_vlan_header *eh;
691 struct mbuf *mbuf_next;
692 u32 mss;
693 bool offload;
694 uint16_t etype;
695 int ehdrlen;
696 struct ip *ip;
697 int iphlen;
698 struct tcphdr *th;
699 int offset;
700
701 offload = false;
702 ena_meta = &ena_tx_ctx->ena_meta;
703 mss = mbuf->m_pkthdr.tso_segsz;
704
705 if (mss != 0)
706 offload = true;
707
708 if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0)
709 offload = true;
710
711 if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0)
712 offload = true;
713
714 if (!offload) {
715 if (disable_meta_caching) {
716 memset(ena_meta, 0, sizeof(*ena_meta));
717 ena_tx_ctx->meta_valid = 1;
718 } else {
719 ena_tx_ctx->meta_valid = 0;
720 }
721 return;
722 }
723
724 /* Determine where frame payload starts. */
725 eh = mtod(mbuf, struct ether_vlan_header *);
726 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
727 etype = ntohs(eh->evl_proto);
728 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
729 } else {
730 etype = ntohs(eh->evl_encap_proto);
731 ehdrlen = ETHER_HDR_LEN;
732 }
733
734 mbuf_next = m_getptr(mbuf, ehdrlen, &offset);
735 ip = (struct ip *)(mtodo(mbuf_next, offset));
736 iphlen = ip->ip_hl << 2;
737
738 mbuf_next = m_getptr(mbuf, iphlen + ehdrlen, &offset);
739 th = (struct tcphdr *)(mtodo(mbuf_next, offset));
740
741 if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) {
742 ena_tx_ctx->l3_csum_enable = 1;
743 }
744 if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
745 ena_tx_ctx->tso_enable = 1;
746 ena_meta->l4_hdr_len = (th->th_off);
747 }
748
749 switch (etype) {
750 case ETHERTYPE_IP:
751 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
752 if ((ip->ip_off & htons(IP_DF)) != 0)
753 ena_tx_ctx->df = 1;
754 break;
755 case ETHERTYPE_IPV6:
756 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
757
758 default:
759 break;
760 }
761
762 if (ip->ip_p == IPPROTO_TCP) {
763 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
764 if ((mbuf->m_pkthdr.csum_flags &
765 (CSUM_IP_TCP | CSUM_IP6_TCP)) != 0)
766 ena_tx_ctx->l4_csum_enable = 1;
767 else
768 ena_tx_ctx->l4_csum_enable = 0;
769 } else if (ip->ip_p == IPPROTO_UDP) {
770 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
771 if ((mbuf->m_pkthdr.csum_flags &
772 (CSUM_IP_UDP | CSUM_IP6_UDP)) != 0)
773 ena_tx_ctx->l4_csum_enable = 1;
774 else
775 ena_tx_ctx->l4_csum_enable = 0;
776 } else {
777 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN;
778 ena_tx_ctx->l4_csum_enable = 0;
779 }
780
781 ena_meta->mss = mss;
782 ena_meta->l3_hdr_len = iphlen;
783 ena_meta->l3_hdr_offset = ehdrlen;
784 ena_tx_ctx->meta_valid = 1;
785 }
786
787 static int
788 ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
789 {
790 struct ena_adapter *adapter;
791 struct mbuf *collapsed_mbuf;
792 int num_frags;
793
794 adapter = tx_ring->adapter;
795 num_frags = ena_mbuf_count(*mbuf);
796
797 /* One segment must be reserved for configuration descriptor. */
798 if (num_frags < adapter->max_tx_sgl_size)
799 return (0);
800 counter_u64_add(tx_ring->tx_stats.collapse, 1);
801
802 collapsed_mbuf = m_collapse(*mbuf, M_NOWAIT,
803 adapter->max_tx_sgl_size - 1);
804 if (unlikely(collapsed_mbuf == NULL)) {
805 counter_u64_add(tx_ring->tx_stats.collapse_err, 1);
806 return (ENOMEM);
807 }
808
809 /* If mbuf was collapsed succesfully, original mbuf is released. */
810 *mbuf = collapsed_mbuf;
811
812 return (0);
813 }
814
815 static int
816 ena_tx_map_mbuf(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info,
817 struct mbuf *mbuf, void **push_hdr, u16 *header_len)
818 {
819 struct ena_adapter *adapter = tx_ring->adapter;
820 struct ena_com_buf *ena_buf;
821 bus_dma_segment_t segs[ENA_BUS_DMA_SEGS];
822 size_t iseg = 0;
823 uint32_t mbuf_head_len;
824 uint16_t offset;
825 int rc, nsegs;
826
827 mbuf_head_len = mbuf->m_len;
828 tx_info->mbuf = mbuf;
829 ena_buf = tx_info->bufs;
830
831 /*
832 * For easier maintaining of the DMA map, map the whole mbuf even if
833 * the LLQ is used. The descriptors will be filled using the segments.
834 */
835 rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag, tx_info->dmamap, mbuf,
836 segs, &nsegs, BUS_DMA_NOWAIT);
837 if (unlikely((rc != 0) || (nsegs == 0))) {
838 ena_trace(NULL, ENA_WARNING,
839 "dmamap load failed! err: %d nsegs: %d\n", rc, nsegs);
840 goto dma_error;
841 }
842
843 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
844 /*
845 * When the device is LLQ mode, the driver will copy
846 * the header into the device memory space.
847 * the ena_com layer assumes the header is in a linear
848 * memory space.
849 * This assumption might be wrong since part of the header
850 * can be in the fragmented buffers.
851 * First check if header fits in the mbuf. If not, copy it to
852 * separate buffer that will be holding linearized data.
853 */
854 *header_len = min_t(uint32_t, mbuf->m_pkthdr.len, tx_ring->tx_max_header_size);
855
856 /* If header is in linear space, just point into mbuf's data. */
857 if (likely(*header_len <= mbuf_head_len)) {
858 *push_hdr = mbuf->m_data;
859 /*
860 * Otherwise, copy whole portion of header from multiple mbufs
861 * to intermediate buffer.
862 */
863 } else {
864 m_copydata(mbuf, 0, *header_len, tx_ring->push_buf_intermediate_buf);
865 *push_hdr = tx_ring->push_buf_intermediate_buf;
866
867 counter_u64_add(tx_ring->tx_stats.llq_buffer_copy, 1);
868 }
869
870 ena_trace(NULL, ENA_DBG | ENA_TXPTH,
871 "mbuf: %p header_buf->vaddr: %p push_len: %d\n",
872 mbuf, *push_hdr, *header_len);
873
874 /* If packet is fitted in LLQ header, no need for DMA segments. */
875 if (mbuf->m_pkthdr.len <= tx_ring->tx_max_header_size) {
876 return (0);
877 } else {
878 offset = tx_ring->tx_max_header_size;
879 /*
880 * As Header part is mapped to LLQ header, we can skip it and just
881 * map the residuum of the mbuf to DMA Segments.
882 */
883 while (offset > 0) {
884 if (offset >= segs[iseg].ds_len) {
885 offset -= segs[iseg].ds_len;
886 } else {
887 ena_buf->paddr = segs[iseg].ds_addr + offset;
888 ena_buf->len = segs[iseg].ds_len - offset;
889 ena_buf++;
890 tx_info->num_of_bufs++;
891 offset = 0;
892 }
893 iseg++;
894 }
895 }
896 } else {
897 *push_hdr = NULL;
898 /*
899 * header_len is just a hint for the device. Because FreeBSD is not
900 * giving us information about packet header length and it is not
901 * guaranteed that all packet headers will be in the 1st mbuf, setting
902 * header_len to 0 is making the device ignore this value and resolve
903 * header on it's own.
904 */
905 *header_len = 0;
906 }
907
908 /* Map rest of the mbuf */
909 while (iseg < nsegs) {
910 ena_buf->paddr = segs[iseg].ds_addr;
911 ena_buf->len = segs[iseg].ds_len;
912 ena_buf++;
913 iseg++;
914 tx_info->num_of_bufs++;
915 }
916
917 return (0);
918
919 dma_error:
920 counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1);
921 tx_info->mbuf = NULL;
922 return (rc);
923 }
924
925 static int
926 ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
927 {
928 struct ena_adapter *adapter;
929 struct ena_tx_buffer *tx_info;
930 struct ena_com_tx_ctx ena_tx_ctx;
931 struct ena_com_dev *ena_dev;
932 struct ena_com_io_sq* io_sq;
933 void *push_hdr;
934 uint16_t next_to_use;
935 uint16_t req_id;
936 uint16_t ena_qid;
937 uint16_t header_len;
938 int rc;
939 int nb_hw_desc;
940
941 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
942 adapter = tx_ring->que->adapter;
943 ena_dev = adapter->ena_dev;
944 io_sq = &ena_dev->io_sq_queues[ena_qid];
945
946 rc = ena_check_and_collapse_mbuf(tx_ring, mbuf);
947 if (unlikely(rc != 0)) {
948 ena_trace(NULL, ENA_WARNING,
949 "Failed to collapse mbuf! err: %d\n", rc);
950 return (rc);
951 }
952
953 ena_trace(NULL, ENA_DBG | ENA_TXPTH, "Tx: %d bytes\n", (*mbuf)->m_pkthdr.len);
954
955 next_to_use = tx_ring->next_to_use;
956 req_id = tx_ring->free_tx_ids[next_to_use];
957 tx_info = &tx_ring->tx_buffer_info[req_id];
958 tx_info->num_of_bufs = 0;
959
960 rc = ena_tx_map_mbuf(tx_ring, tx_info, *mbuf, &push_hdr, &header_len);
961 if (unlikely(rc != 0)) {
962 ena_trace(NULL, ENA_WARNING, "Failed to map TX mbuf\n");
963 return (rc);
964 }
965 memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
966 ena_tx_ctx.ena_bufs = tx_info->bufs;
967 ena_tx_ctx.push_header = push_hdr;
968 ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
969 ena_tx_ctx.req_id = req_id;
970 ena_tx_ctx.header_len = header_len;
971
972 /* Set flags and meta data */
973 ena_tx_csum(&ena_tx_ctx, *mbuf, adapter->disable_meta_caching);
974
975 if (tx_ring->acum_pkts == DB_THRESHOLD ||
976 ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx)) {
977 ena_trace(NULL, ENA_DBG | ENA_TXPTH,
978 "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
979 tx_ring->que->id);
980 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
981 counter_u64_add(tx_ring->tx_stats.doorbells, 1);
982 tx_ring->acum_pkts = 0;
983 }
984
985 /* Prepare the packet's descriptors and send them to device */
986 rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc);
987 if (unlikely(rc != 0)) {
988 if (likely(rc == ENA_COM_NO_MEM)) {
989 ena_trace(NULL, ENA_DBG | ENA_TXPTH,
990 "tx ring[%d] if out of space\n", tx_ring->que->id);
991 } else {
992 device_printf(adapter->pdev,
993 "failed to prepare tx bufs\n");
994 }
995 counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1);
996 goto dma_error;
997 }
998
999 counter_enter();
1000 counter_u64_add_protected(tx_ring->tx_stats.cnt, 1);
1001 counter_u64_add_protected(tx_ring->tx_stats.bytes,
1002 (*mbuf)->m_pkthdr.len);
1003
1004 counter_u64_add_protected(adapter->hw_stats.tx_packets, 1);
1005 counter_u64_add_protected(adapter->hw_stats.tx_bytes,
1006 (*mbuf)->m_pkthdr.len);
1007 counter_exit();
1008
1009 tx_info->tx_descs = nb_hw_desc;
1010 getbinuptime(&tx_info->timestamp);
1011 tx_info->print_once = true;
1012
1013 tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
1014 tx_ring->ring_size);
1015
1016 /* stop the queue when no more space available, the packet can have up
1017 * to sgl_size + 2. one for the meta descriptor and one for header
1018 * (if the header is larger than tx_max_header_size).
1019 */
1020 if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1021 adapter->max_tx_sgl_size + 2))) {
1022 ena_trace(NULL, ENA_DBG | ENA_TXPTH, "Stop queue %d\n",
1023 tx_ring->que->id);
1024
1025 tx_ring->running = false;
1026 counter_u64_add(tx_ring->tx_stats.queue_stop, 1);
1027
1028 /* There is a rare condition where this function decides to
1029 * stop the queue but meanwhile tx_cleanup() updates
1030 * next_to_completion and terminates.
1031 * The queue will remain stopped forever.
1032 * To solve this issue this function performs mb(), checks
1033 * the wakeup condition and wakes up the queue if needed.
1034 */
1035 mb();
1036
1037 if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1038 ENA_TX_RESUME_THRESH)) {
1039 tx_ring->running = true;
1040 counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
1041 }
1042 }
1043
1044 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
1045 BUS_DMASYNC_PREWRITE);
1046
1047 return (0);
1048
1049 dma_error:
1050 tx_info->mbuf = NULL;
1051 bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
1052
1053 return (rc);
1054 }
1055
1056 static void
1057 ena_start_xmit(struct ena_ring *tx_ring)
1058 {
1059 struct mbuf *mbuf;
1060 struct ena_adapter *adapter = tx_ring->adapter;
1061 struct ena_com_io_sq* io_sq;
1062 int ena_qid;
1063 int ret = 0;
1064
1065 if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
1066 return;
1067
1068 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)))
1069 return;
1070
1071 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
1072 io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
1073
1074 while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) {
1075 ena_trace(NULL, ENA_DBG | ENA_TXPTH, "\ndequeued mbuf %p with flags %#x and"
1076 " header csum flags %#jx\n",
1077 mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags);
1078
1079 if (unlikely(!tx_ring->running)) {
1080 drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1081 break;
1082 }
1083
1084 if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) {
1085 if (ret == ENA_COM_NO_MEM) {
1086 drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1087 } else if (ret == ENA_COM_NO_SPACE) {
1088 drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1089 } else {
1090 m_freem(mbuf);
1091 drbr_advance(adapter->ifp, tx_ring->br);
1092 }
1093
1094 break;
1095 }
1096
1097 drbr_advance(adapter->ifp, tx_ring->br);
1098
1099 if (unlikely((if_getdrvflags(adapter->ifp) &
1100 IFF_DRV_RUNNING) == 0))
1101 return;
1102
1103 tx_ring->acum_pkts++;
1104
1105 BPF_MTAP(adapter->ifp, mbuf);
1106 }
1107
1108 if (likely(tx_ring->acum_pkts != 0)) {
1109 /* Trigger the dma engine */
1110 ena_com_write_sq_doorbell(io_sq);
1111 counter_u64_add(tx_ring->tx_stats.doorbells, 1);
1112 tx_ring->acum_pkts = 0;
1113 }
1114
1115 if (unlikely(!tx_ring->running))
1116 taskqueue_enqueue(tx_ring->que->cleanup_tq,
1117 &tx_ring->que->cleanup_task);
1118 }
Cache object: 698e7589b15f47f2f867acf236df3f0b
|