1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include "opt_rss.h"
34 #include "ena.h"
35 #include "ena_datapath.h"
36 #ifdef DEV_NETMAP
37 #include "ena_netmap.h"
38 #endif /* DEV_NETMAP */
39 #ifdef RSS
40 #include <net/rss_config.h>
41 #endif /* RSS */
42
43 #include <netinet6/ip6_var.h>
44
45 /*********************************************************************
46 * Static functions prototypes
47 *********************************************************************/
48
49 static int ena_tx_cleanup(struct ena_ring *);
50 static int ena_rx_cleanup(struct ena_ring *);
51 static inline int ena_get_tx_req_id(struct ena_ring *tx_ring,
52 struct ena_com_io_cq *io_cq, uint16_t *req_id);
53 static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
54 struct mbuf *);
55 static struct mbuf *ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *,
56 struct ena_com_rx_ctx *, uint16_t *);
57 static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *,
58 struct mbuf *);
59 static void ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *, bool);
60 static int ena_check_and_collapse_mbuf(struct ena_ring *tx_ring,
61 struct mbuf **mbuf);
62 static int ena_xmit_mbuf(struct ena_ring *, struct mbuf **);
63 static void ena_start_xmit(struct ena_ring *);
64
65 /*********************************************************************
66 * Global functions
67 *********************************************************************/
68
69 void
70 ena_cleanup(void *arg, int pending)
71 {
72 struct ena_que *que = arg;
73 struct ena_adapter *adapter = que->adapter;
74 if_t ifp = adapter->ifp;
75 struct ena_ring *tx_ring;
76 struct ena_ring *rx_ring;
77 struct ena_com_io_cq *io_cq;
78 struct ena_eth_io_intr_reg intr_reg;
79 int qid, ena_qid;
80 int txc, rxc, i;
81
82 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
83 return;
84
85 ena_log_io(adapter->pdev, DBG, "MSI-X TX/RX routine\n");
86
87 tx_ring = que->tx_ring;
88 rx_ring = que->rx_ring;
89 qid = que->id;
90 ena_qid = ENA_IO_TXQ_IDX(qid);
91 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
92
93 atomic_store_8(&tx_ring->first_interrupt, 1);
94 atomic_store_8(&rx_ring->first_interrupt, 1);
95
96 for (i = 0; i < ENA_CLEAN_BUDGET; ++i) {
97 rxc = ena_rx_cleanup(rx_ring);
98 txc = ena_tx_cleanup(tx_ring);
99
100 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
101 return;
102
103 if ((txc != ENA_TX_BUDGET) && (rxc != ENA_RX_BUDGET))
104 break;
105 }
106
107 /* Signal that work is done and unmask interrupt */
108 ena_com_update_intr_reg(&intr_reg, ENA_RX_IRQ_INTERVAL,
109 ENA_TX_IRQ_INTERVAL, true);
110 counter_u64_add(tx_ring->tx_stats.unmask_interrupt_num, 1);
111 ena_com_unmask_intr(io_cq, &intr_reg);
112 }
113
114 void
115 ena_deferred_mq_start(void *arg, int pending)
116 {
117 struct ena_ring *tx_ring = (struct ena_ring *)arg;
118 if_t ifp = tx_ring->adapter->ifp;
119
120 while (!drbr_empty(ifp, tx_ring->br) && tx_ring->running &&
121 (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
122 ENA_RING_MTX_LOCK(tx_ring);
123 ena_start_xmit(tx_ring);
124 ENA_RING_MTX_UNLOCK(tx_ring);
125 }
126 }
127
128 int
129 ena_mq_start(if_t ifp, struct mbuf *m)
130 {
131 struct ena_adapter *adapter = if_getsoftc(ifp);
132 struct ena_ring *tx_ring;
133 int ret, is_drbr_empty;
134 uint32_t i;
135 #ifdef RSS
136 uint32_t bucket_id;
137 #endif
138
139 if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
140 return (ENODEV);
141
142 /* Which queue to use */
143 /*
144 * If everything is setup correctly, it should be the
145 * same bucket that the current CPU we're on is.
146 * It should improve performance.
147 */
148 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
149 #ifdef RSS
150 if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
151 &bucket_id) == 0)
152 i = bucket_id % adapter->num_io_queues;
153 else
154 #endif
155 i = m->m_pkthdr.flowid % adapter->num_io_queues;
156 } else {
157 i = curcpu % adapter->num_io_queues;
158 }
159 tx_ring = &adapter->tx_ring[i];
160
161 /* Check if drbr is empty before putting packet */
162 is_drbr_empty = drbr_empty(ifp, tx_ring->br);
163 ret = drbr_enqueue(ifp, tx_ring->br, m);
164 if (unlikely(ret != 0)) {
165 taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
166 return (ret);
167 }
168
169 if (is_drbr_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) {
170 ena_start_xmit(tx_ring);
171 ENA_RING_MTX_UNLOCK(tx_ring);
172 } else {
173 taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
174 }
175
176 return (0);
177 }
178
179 void
180 ena_qflush(if_t ifp)
181 {
182 struct ena_adapter *adapter = if_getsoftc(ifp);
183 struct ena_ring *tx_ring = adapter->tx_ring;
184 int i;
185
186 for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring)
187 if (!drbr_empty(ifp, tx_ring->br)) {
188 ENA_RING_MTX_LOCK(tx_ring);
189 drbr_flush(ifp, tx_ring->br);
190 ENA_RING_MTX_UNLOCK(tx_ring);
191 }
192
193 if_qflush(ifp);
194 }
195
196 /*********************************************************************
197 * Static functions
198 *********************************************************************/
199
200 static inline int
201 ena_get_tx_req_id(struct ena_ring *tx_ring, struct ena_com_io_cq *io_cq,
202 uint16_t *req_id)
203 {
204 struct ena_adapter *adapter = tx_ring->adapter;
205 int rc;
206
207 rc = ena_com_tx_comp_req_id_get(io_cq, req_id);
208 if (rc == ENA_COM_TRY_AGAIN)
209 return (EAGAIN);
210
211 if (unlikely(rc != 0)) {
212 ena_log(adapter->pdev, ERR, "Invalid req_id %hu in qid %hu\n",
213 *req_id, tx_ring->qid);
214 counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
215 goto err;
216 }
217
218 if (tx_ring->tx_buffer_info[*req_id].mbuf != NULL)
219 return (0);
220
221 ena_log(adapter->pdev, ERR,
222 "tx_info doesn't have valid mbuf. req_id %hu qid %hu\n",
223 *req_id, tx_ring->qid);
224 err:
225 ena_trigger_reset(adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
226
227 return (EFAULT);
228 }
229
230 /**
231 * ena_tx_cleanup - clear sent packets and corresponding descriptors
232 * @tx_ring: ring for which we want to clean packets
233 *
234 * Once packets are sent, we ask the device in a loop for no longer used
235 * descriptors. We find the related mbuf chain in a map (index in an array)
236 * and free it, then update ring state.
237 * This is performed in "endless" loop, updating ring pointers every
238 * TX_COMMIT. The first check of free descriptor is performed before the actual
239 * loop, then repeated at the loop end.
240 **/
241 static int
242 ena_tx_cleanup(struct ena_ring *tx_ring)
243 {
244 struct ena_adapter *adapter;
245 struct ena_com_io_cq *io_cq;
246 uint16_t next_to_clean;
247 uint16_t req_id;
248 uint16_t ena_qid;
249 unsigned int total_done = 0;
250 int rc;
251 int commit = ENA_TX_COMMIT;
252 int budget = ENA_TX_BUDGET;
253 int work_done;
254 bool above_thresh;
255
256 adapter = tx_ring->que->adapter;
257 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
258 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
259 next_to_clean = tx_ring->next_to_clean;
260
261 #ifdef DEV_NETMAP
262 if (netmap_tx_irq(adapter->ifp, tx_ring->qid) != NM_IRQ_PASS)
263 return (0);
264 #endif /* DEV_NETMAP */
265
266 do {
267 struct ena_tx_buffer *tx_info;
268 struct mbuf *mbuf;
269
270 rc = ena_get_tx_req_id(tx_ring, io_cq, &req_id);
271 if (unlikely(rc != 0))
272 break;
273
274 tx_info = &tx_ring->tx_buffer_info[req_id];
275
276 mbuf = tx_info->mbuf;
277
278 tx_info->mbuf = NULL;
279 bintime_clear(&tx_info->timestamp);
280
281 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
282 BUS_DMASYNC_POSTWRITE);
283 bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
284
285 ena_log_io(adapter->pdev, DBG, "tx: q %d mbuf %p completed\n",
286 tx_ring->qid, mbuf);
287
288 m_freem(mbuf);
289
290 total_done += tx_info->tx_descs;
291
292 tx_ring->free_tx_ids[next_to_clean] = req_id;
293 next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
294 tx_ring->ring_size);
295
296 if (unlikely(--commit == 0)) {
297 commit = ENA_TX_COMMIT;
298 /* update ring state every ENA_TX_COMMIT descriptor */
299 tx_ring->next_to_clean = next_to_clean;
300 ena_com_comp_ack(
301 &adapter->ena_dev->io_sq_queues[ena_qid],
302 total_done);
303 ena_com_update_dev_comp_head(io_cq);
304 total_done = 0;
305 }
306 } while (likely(--budget));
307
308 work_done = ENA_TX_BUDGET - budget;
309
310 ena_log_io(adapter->pdev, DBG, "tx: q %d done. total pkts: %d\n",
311 tx_ring->qid, work_done);
312
313 /* If there is still something to commit update ring state */
314 if (likely(commit != ENA_TX_COMMIT)) {
315 tx_ring->next_to_clean = next_to_clean;
316 ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid],
317 total_done);
318 ena_com_update_dev_comp_head(io_cq);
319 }
320
321 /*
322 * Need to make the rings circular update visible to
323 * ena_xmit_mbuf() before checking for tx_ring->running.
324 */
325 mb();
326
327 above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
328 ENA_TX_RESUME_THRESH);
329 if (unlikely(!tx_ring->running && above_thresh)) {
330 ENA_RING_MTX_LOCK(tx_ring);
331 above_thresh = ena_com_sq_have_enough_space(
332 tx_ring->ena_com_io_sq, ENA_TX_RESUME_THRESH);
333 if (!tx_ring->running && above_thresh) {
334 tx_ring->running = true;
335 counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
336 taskqueue_enqueue(tx_ring->enqueue_tq,
337 &tx_ring->enqueue_task);
338 }
339 ENA_RING_MTX_UNLOCK(tx_ring);
340 }
341
342 tx_ring->tx_last_cleanup_ticks = ticks;
343
344 return (work_done);
345 }
346
347 static void
348 ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
349 struct mbuf *mbuf)
350 {
351 struct ena_adapter *adapter = rx_ring->adapter;
352
353 if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
354 mbuf->m_pkthdr.flowid = ena_rx_ctx->hash;
355
356 #ifdef RSS
357 /*
358 * Hardware and software RSS are in agreement only when both are
359 * configured to Toeplitz algorithm. This driver configures
360 * that algorithm only when software RSS is enabled and uses it.
361 */
362 if (adapter->ena_dev->rss.hash_func != ENA_ADMIN_TOEPLITZ &&
363 ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN) {
364 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
365 return;
366 }
367 #endif
368
369 if (ena_rx_ctx->frag &&
370 (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) {
371 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
372 return;
373 }
374
375 switch (ena_rx_ctx->l3_proto) {
376 case ENA_ETH_IO_L3_PROTO_IPV4:
377 switch (ena_rx_ctx->l4_proto) {
378 case ENA_ETH_IO_L4_PROTO_TCP:
379 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
380 break;
381 case ENA_ETH_IO_L4_PROTO_UDP:
382 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
383 break;
384 default:
385 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
386 }
387 break;
388 case ENA_ETH_IO_L3_PROTO_IPV6:
389 switch (ena_rx_ctx->l4_proto) {
390 case ENA_ETH_IO_L4_PROTO_TCP:
391 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
392 break;
393 case ENA_ETH_IO_L4_PROTO_UDP:
394 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
395 break;
396 default:
397 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
398 }
399 break;
400 case ENA_ETH_IO_L3_PROTO_UNKNOWN:
401 M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
402 break;
403 default:
404 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
405 }
406 } else {
407 mbuf->m_pkthdr.flowid = rx_ring->qid;
408 M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
409 }
410 }
411
412 /**
413 * ena_rx_mbuf - assemble mbuf from descriptors
414 * @rx_ring: ring for which we want to clean packets
415 * @ena_bufs: buffer info
416 * @ena_rx_ctx: metadata for this packet(s)
417 * @next_to_clean: ring pointer, will be updated only upon success
418 *
419 **/
420 static struct mbuf *
421 ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs,
422 struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean)
423 {
424 struct mbuf *mbuf;
425 struct ena_rx_buffer *rx_info;
426 struct ena_adapter *adapter;
427 device_t pdev;
428 unsigned int descs = ena_rx_ctx->descs;
429 uint16_t ntc, len, req_id, buf = 0;
430
431 ntc = *next_to_clean;
432 adapter = rx_ring->adapter;
433 pdev = adapter->pdev;
434
435 len = ena_bufs[buf].len;
436 req_id = ena_bufs[buf].req_id;
437 rx_info = &rx_ring->rx_buffer_info[req_id];
438 if (unlikely(rx_info->mbuf == NULL)) {
439 ena_log(pdev, ERR, "NULL mbuf in rx_info");
440 return (NULL);
441 }
442
443 ena_log_io(pdev, DBG, "rx_info %p, mbuf %p, paddr %jx\n", rx_info,
444 rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr);
445
446 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
447 BUS_DMASYNC_POSTREAD);
448 mbuf = rx_info->mbuf;
449 mbuf->m_flags |= M_PKTHDR;
450 mbuf->m_pkthdr.len = len;
451 mbuf->m_len = len;
452 /* Only for the first segment the data starts at specific offset */
453 mbuf->m_data = mtodo(mbuf, ena_rx_ctx->pkt_offset);
454 ena_log_io(pdev, DBG, "Mbuf data offset=%u\n", ena_rx_ctx->pkt_offset);
455 mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp;
456
457 /* Fill mbuf with hash key and it's interpretation for optimization */
458 ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf);
459
460 ena_log_io(pdev, DBG, "rx mbuf 0x%p, flags=0x%x, len: %d\n", mbuf,
461 mbuf->m_flags, mbuf->m_pkthdr.len);
462
463 /* DMA address is not needed anymore, unmap it */
464 bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
465
466 rx_info->mbuf = NULL;
467 rx_ring->free_rx_ids[ntc] = req_id;
468 ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
469
470 /*
471 * While we have more than 1 descriptors for one rcvd packet, append
472 * other mbufs to the main one
473 */
474 while (--descs) {
475 ++buf;
476 len = ena_bufs[buf].len;
477 req_id = ena_bufs[buf].req_id;
478 rx_info = &rx_ring->rx_buffer_info[req_id];
479
480 if (unlikely(rx_info->mbuf == NULL)) {
481 ena_log(pdev, ERR, "NULL mbuf in rx_info");
482 /*
483 * If one of the required mbufs was not allocated yet,
484 * we can break there.
485 * All earlier used descriptors will be reallocated
486 * later and not used mbufs can be reused.
487 * The next_to_clean pointer will not be updated in case
488 * of an error, so caller should advance it manually
489 * in error handling routine to keep it up to date
490 * with hw ring.
491 */
492 m_freem(mbuf);
493 return (NULL);
494 }
495
496 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
497 BUS_DMASYNC_POSTREAD);
498 if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) {
499 counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
500 ena_log_io(pdev, WARN, "Failed to append Rx mbuf %p\n",
501 mbuf);
502 }
503
504 ena_log_io(pdev, DBG, "rx mbuf updated. len %d\n",
505 mbuf->m_pkthdr.len);
506
507 /* Free already appended mbuf, it won't be useful anymore */
508 bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
509 m_freem(rx_info->mbuf);
510 rx_info->mbuf = NULL;
511
512 rx_ring->free_rx_ids[ntc] = req_id;
513 ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
514 }
515
516 *next_to_clean = ntc;
517
518 return (mbuf);
519 }
520
521 /**
522 * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum
523 **/
524 static inline void
525 ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
526 struct mbuf *mbuf)
527 {
528 device_t pdev = rx_ring->adapter->pdev;
529
530 /* if IP and error */
531 if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
532 ena_rx_ctx->l3_csum_err)) {
533 /* ipv4 checksum error */
534 mbuf->m_pkthdr.csum_flags = 0;
535 counter_u64_add(rx_ring->rx_stats.csum_bad, 1);
536 ena_log_io(pdev, DBG, "RX IPv4 header checksum error\n");
537 return;
538 }
539
540 /* if TCP/UDP */
541 if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
542 (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) {
543 if (ena_rx_ctx->l4_csum_err) {
544 /* TCP/UDP checksum error */
545 mbuf->m_pkthdr.csum_flags = 0;
546 counter_u64_add(rx_ring->rx_stats.csum_bad, 1);
547 ena_log_io(pdev, DBG, "RX L4 checksum error\n");
548 } else {
549 mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
550 mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID;
551 counter_u64_add(rx_ring->rx_stats.csum_good, 1);
552 }
553 }
554 }
555
556 /**
557 * ena_rx_cleanup - handle rx irq
558 * @arg: ring for which irq is being handled
559 **/
560 static int
561 ena_rx_cleanup(struct ena_ring *rx_ring)
562 {
563 struct ena_adapter *adapter;
564 device_t pdev;
565 struct mbuf *mbuf;
566 struct ena_com_rx_ctx ena_rx_ctx;
567 struct ena_com_io_cq *io_cq;
568 struct ena_com_io_sq *io_sq;
569 enum ena_regs_reset_reason_types reset_reason;
570 if_t ifp;
571 uint16_t ena_qid;
572 uint16_t next_to_clean;
573 uint32_t refill_required;
574 uint32_t refill_threshold;
575 uint32_t do_if_input = 0;
576 unsigned int qid;
577 int rc, i;
578 int budget = ENA_RX_BUDGET;
579 #ifdef DEV_NETMAP
580 int done;
581 #endif /* DEV_NETMAP */
582
583 adapter = rx_ring->que->adapter;
584 pdev = adapter->pdev;
585 ifp = adapter->ifp;
586 qid = rx_ring->que->id;
587 ena_qid = ENA_IO_RXQ_IDX(qid);
588 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
589 io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
590 next_to_clean = rx_ring->next_to_clean;
591
592 #ifdef DEV_NETMAP
593 if (netmap_rx_irq(adapter->ifp, rx_ring->qid, &done) != NM_IRQ_PASS)
594 return (0);
595 #endif /* DEV_NETMAP */
596
597 ena_log_io(pdev, DBG, "rx: qid %d\n", qid);
598
599 do {
600 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
601 ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size;
602 ena_rx_ctx.descs = 0;
603 ena_rx_ctx.pkt_offset = 0;
604
605 bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
606 io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_POSTREAD);
607 rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx);
608 if (unlikely(rc != 0)) {
609 if (rc == ENA_COM_NO_SPACE) {
610 counter_u64_add(rx_ring->rx_stats.bad_desc_num,
611 1);
612 reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
613 } else {
614 counter_u64_add(rx_ring->rx_stats.bad_req_id,
615 1);
616 reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
617 }
618 ena_trigger_reset(adapter, reset_reason);
619 return (0);
620 }
621
622 if (unlikely(ena_rx_ctx.descs == 0))
623 break;
624
625 ena_log_io(pdev, DBG,
626 "rx: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
627 rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
628 ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
629
630 /* Receive mbuf from the ring */
631 mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs, &ena_rx_ctx,
632 &next_to_clean);
633 bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
634 io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_PREREAD);
635 /* Exit if we failed to retrieve a buffer */
636 if (unlikely(mbuf == NULL)) {
637 for (i = 0; i < ena_rx_ctx.descs; ++i) {
638 rx_ring->free_rx_ids[next_to_clean] =
639 rx_ring->ena_bufs[i].req_id;
640 next_to_clean = ENA_RX_RING_IDX_NEXT(
641 next_to_clean, rx_ring->ring_size);
642 }
643 break;
644 }
645
646 if (((if_getcapenable(ifp) & IFCAP_RXCSUM) != 0) ||
647 ((if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) != 0)) {
648 ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf);
649 }
650
651 counter_enter();
652 counter_u64_add_protected(rx_ring->rx_stats.bytes,
653 mbuf->m_pkthdr.len);
654 counter_u64_add_protected(adapter->hw_stats.rx_bytes,
655 mbuf->m_pkthdr.len);
656 counter_exit();
657 /*
658 * LRO is only for IP/TCP packets and TCP checksum of the packet
659 * should be computed by hardware.
660 */
661 do_if_input = 1;
662 if (((if_getcapenable(ifp) & IFCAP_LRO) != 0) &&
663 ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) &&
664 (ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) {
665 /*
666 * Send to the stack if:
667 * - LRO not enabled, or
668 * - no LRO resources, or
669 * - lro enqueue fails
670 */
671 if ((rx_ring->lro.lro_cnt != 0) &&
672 (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0))
673 do_if_input = 0;
674 }
675 if (do_if_input != 0) {
676 ena_log_io(pdev, DBG,
677 "calling if_input() with mbuf %p\n", mbuf);
678 if_input(ifp, mbuf);
679 }
680
681 counter_enter();
682 counter_u64_add_protected(rx_ring->rx_stats.cnt, 1);
683 counter_u64_add_protected(adapter->hw_stats.rx_packets, 1);
684 counter_exit();
685 } while (--budget);
686
687 rx_ring->next_to_clean = next_to_clean;
688
689 refill_required = ena_com_free_q_entries(io_sq);
690 refill_threshold = min_t(int,
691 rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
692 ENA_RX_REFILL_THRESH_PACKET);
693
694 if (refill_required > refill_threshold) {
695 ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
696 ena_refill_rx_bufs(rx_ring, refill_required);
697 }
698
699 tcp_lro_flush_all(&rx_ring->lro);
700
701 return (ENA_RX_BUDGET - budget);
702 }
703
704 static void
705 ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf,
706 bool disable_meta_caching)
707 {
708 struct ena_com_tx_meta *ena_meta;
709 struct ether_vlan_header *eh;
710 struct mbuf *mbuf_next;
711 u32 mss;
712 bool offload;
713 uint16_t etype;
714 int ehdrlen;
715 struct ip *ip;
716 int ipproto;
717 int iphlen;
718 struct tcphdr *th;
719 int offset;
720
721 offload = false;
722 ena_meta = &ena_tx_ctx->ena_meta;
723 mss = mbuf->m_pkthdr.tso_segsz;
724
725 if (mss != 0)
726 offload = true;
727
728 if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0)
729 offload = true;
730
731 if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0)
732 offload = true;
733
734 if ((mbuf->m_pkthdr.csum_flags & CSUM6_OFFLOAD) != 0)
735 offload = true;
736
737 if (!offload) {
738 if (disable_meta_caching) {
739 memset(ena_meta, 0, sizeof(*ena_meta));
740 ena_tx_ctx->meta_valid = 1;
741 } else {
742 ena_tx_ctx->meta_valid = 0;
743 }
744 return;
745 }
746
747 /* Determine where frame payload starts. */
748 eh = mtod(mbuf, struct ether_vlan_header *);
749 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
750 etype = ntohs(eh->evl_proto);
751 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
752 } else {
753 etype = ntohs(eh->evl_encap_proto);
754 ehdrlen = ETHER_HDR_LEN;
755 }
756
757 mbuf_next = m_getptr(mbuf, ehdrlen, &offset);
758
759 switch (etype) {
760 case ETHERTYPE_IP:
761 ip = (struct ip *)(mtodo(mbuf_next, offset));
762 iphlen = ip->ip_hl << 2;
763 ipproto = ip->ip_p;
764 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
765 if ((ip->ip_off & htons(IP_DF)) != 0)
766 ena_tx_ctx->df = 1;
767 break;
768 case ETHERTYPE_IPV6:
769 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
770 iphlen = ip6_lasthdr(mbuf, ehdrlen, IPPROTO_IPV6, &ipproto);
771 iphlen -= ehdrlen;
772 ena_tx_ctx->df = 1;
773 break;
774 default:
775 iphlen = 0;
776 ipproto = 0;
777 break;
778 }
779
780 mbuf_next = m_getptr(mbuf, iphlen + ehdrlen, &offset);
781 th = (struct tcphdr *)(mtodo(mbuf_next, offset));
782
783 if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) {
784 ena_tx_ctx->l3_csum_enable = 1;
785 }
786 if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
787 ena_tx_ctx->tso_enable = 1;
788 ena_meta->l4_hdr_len = (th->th_off);
789 }
790
791 if (ipproto == IPPROTO_TCP) {
792 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
793 if ((mbuf->m_pkthdr.csum_flags &
794 (CSUM_IP_TCP | CSUM_IP6_TCP)) != 0)
795 ena_tx_ctx->l4_csum_enable = 1;
796 else
797 ena_tx_ctx->l4_csum_enable = 0;
798 } else if (ipproto == IPPROTO_UDP) {
799 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
800 if ((mbuf->m_pkthdr.csum_flags &
801 (CSUM_IP_UDP | CSUM_IP6_UDP)) != 0)
802 ena_tx_ctx->l4_csum_enable = 1;
803 else
804 ena_tx_ctx->l4_csum_enable = 0;
805 } else {
806 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN;
807 ena_tx_ctx->l4_csum_enable = 0;
808 }
809
810 ena_meta->mss = mss;
811 ena_meta->l3_hdr_len = iphlen;
812 ena_meta->l3_hdr_offset = ehdrlen;
813 ena_tx_ctx->meta_valid = 1;
814 }
815
816 static int
817 ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
818 {
819 struct ena_adapter *adapter;
820 struct mbuf *collapsed_mbuf;
821 int num_frags;
822
823 adapter = tx_ring->adapter;
824 num_frags = ena_mbuf_count(*mbuf);
825
826 /* One segment must be reserved for configuration descriptor. */
827 if (num_frags < adapter->max_tx_sgl_size)
828 return (0);
829
830 if ((num_frags == adapter->max_tx_sgl_size) &&
831 ((*mbuf)->m_pkthdr.len < tx_ring->tx_max_header_size))
832 return (0);
833
834 counter_u64_add(tx_ring->tx_stats.collapse, 1);
835
836 collapsed_mbuf = m_collapse(*mbuf, M_NOWAIT,
837 adapter->max_tx_sgl_size - 1);
838 if (unlikely(collapsed_mbuf == NULL)) {
839 counter_u64_add(tx_ring->tx_stats.collapse_err, 1);
840 return (ENOMEM);
841 }
842
843 /* If mbuf was collapsed succesfully, original mbuf is released. */
844 *mbuf = collapsed_mbuf;
845
846 return (0);
847 }
848
849 static int
850 ena_tx_map_mbuf(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info,
851 struct mbuf *mbuf, void **push_hdr, u16 *header_len)
852 {
853 struct ena_adapter *adapter = tx_ring->adapter;
854 struct ena_com_buf *ena_buf;
855 bus_dma_segment_t segs[ENA_BUS_DMA_SEGS];
856 size_t iseg = 0;
857 uint32_t mbuf_head_len;
858 uint16_t offset;
859 int rc, nsegs;
860
861 mbuf_head_len = mbuf->m_len;
862 tx_info->mbuf = mbuf;
863 ena_buf = tx_info->bufs;
864
865 /*
866 * For easier maintaining of the DMA map, map the whole mbuf even if
867 * the LLQ is used. The descriptors will be filled using the segments.
868 */
869 rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag,
870 tx_info->dmamap, mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
871 if (unlikely((rc != 0) || (nsegs == 0))) {
872 ena_log_io(adapter->pdev, WARN,
873 "dmamap load failed! err: %d nsegs: %d\n", rc, nsegs);
874 goto dma_error;
875 }
876
877 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
878 /*
879 * When the device is LLQ mode, the driver will copy
880 * the header into the device memory space.
881 * the ena_com layer assumes the header is in a linear
882 * memory space.
883 * This assumption might be wrong since part of the header
884 * can be in the fragmented buffers.
885 * First check if header fits in the mbuf. If not, copy it to
886 * separate buffer that will be holding linearized data.
887 */
888 *header_len = min_t(uint32_t, mbuf->m_pkthdr.len,
889 tx_ring->tx_max_header_size);
890
891 /* If header is in linear space, just point into mbuf's data. */
892 if (likely(*header_len <= mbuf_head_len)) {
893 *push_hdr = mbuf->m_data;
894 /*
895 * Otherwise, copy whole portion of header from multiple
896 * mbufs to intermediate buffer.
897 */
898 } else {
899 m_copydata(mbuf, 0, *header_len,
900 tx_ring->push_buf_intermediate_buf);
901 *push_hdr = tx_ring->push_buf_intermediate_buf;
902
903 counter_u64_add(tx_ring->tx_stats.llq_buffer_copy, 1);
904 }
905
906 ena_log_io(adapter->pdev, DBG,
907 "mbuf: %p header_buf->vaddr: %p push_len: %d\n",
908 mbuf, *push_hdr, *header_len);
909
910 /* If packet is fitted in LLQ header, no need for DMA segments. */
911 if (mbuf->m_pkthdr.len <= tx_ring->tx_max_header_size) {
912 return (0);
913 } else {
914 offset = tx_ring->tx_max_header_size;
915 /*
916 * As Header part is mapped to LLQ header, we can skip
917 * it and just map the residuum of the mbuf to DMA
918 * Segments.
919 */
920 while (offset > 0) {
921 if (offset >= segs[iseg].ds_len) {
922 offset -= segs[iseg].ds_len;
923 } else {
924 ena_buf->paddr = segs[iseg].ds_addr +
925 offset;
926 ena_buf->len = segs[iseg].ds_len -
927 offset;
928 ena_buf++;
929 tx_info->num_of_bufs++;
930 offset = 0;
931 }
932 iseg++;
933 }
934 }
935 } else {
936 *push_hdr = NULL;
937 /*
938 * header_len is just a hint for the device. Because FreeBSD is
939 * not giving us information about packet header length and it
940 * is not guaranteed that all packet headers will be in the 1st
941 * mbuf, setting header_len to 0 is making the device ignore
942 * this value and resolve header on it's own.
943 */
944 *header_len = 0;
945 }
946
947 /* Map rest of the mbuf */
948 while (iseg < nsegs) {
949 ena_buf->paddr = segs[iseg].ds_addr;
950 ena_buf->len = segs[iseg].ds_len;
951 ena_buf++;
952 iseg++;
953 tx_info->num_of_bufs++;
954 }
955
956 return (0);
957
958 dma_error:
959 counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1);
960 tx_info->mbuf = NULL;
961 return (rc);
962 }
963
964 static int
965 ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
966 {
967 struct ena_adapter *adapter;
968 device_t pdev;
969 struct ena_tx_buffer *tx_info;
970 struct ena_com_tx_ctx ena_tx_ctx;
971 struct ena_com_dev *ena_dev;
972 struct ena_com_io_sq *io_sq;
973 void *push_hdr;
974 uint16_t next_to_use;
975 uint16_t req_id;
976 uint16_t ena_qid;
977 uint16_t header_len;
978 int rc;
979 int nb_hw_desc;
980
981 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
982 adapter = tx_ring->que->adapter;
983 pdev = adapter->pdev;
984 ena_dev = adapter->ena_dev;
985 io_sq = &ena_dev->io_sq_queues[ena_qid];
986
987 rc = ena_check_and_collapse_mbuf(tx_ring, mbuf);
988 if (unlikely(rc != 0)) {
989 ena_log_io(pdev, WARN, "Failed to collapse mbuf! err: %d\n",
990 rc);
991 return (rc);
992 }
993
994 ena_log_io(pdev, DBG, "Tx: %d bytes\n", (*mbuf)->m_pkthdr.len);
995
996 next_to_use = tx_ring->next_to_use;
997 req_id = tx_ring->free_tx_ids[next_to_use];
998 tx_info = &tx_ring->tx_buffer_info[req_id];
999 tx_info->num_of_bufs = 0;
1000
1001 ENA_WARN(tx_info->mbuf != NULL, adapter->ena_dev,
1002 "mbuf isn't NULL for req_id %d\n", req_id);
1003
1004 rc = ena_tx_map_mbuf(tx_ring, tx_info, *mbuf, &push_hdr, &header_len);
1005 if (unlikely(rc != 0)) {
1006 ena_log_io(pdev, WARN, "Failed to map TX mbuf\n");
1007 return (rc);
1008 }
1009 memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
1010 ena_tx_ctx.ena_bufs = tx_info->bufs;
1011 ena_tx_ctx.push_header = push_hdr;
1012 ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
1013 ena_tx_ctx.req_id = req_id;
1014 ena_tx_ctx.header_len = header_len;
1015
1016 /* Set flags and meta data */
1017 ena_tx_csum(&ena_tx_ctx, *mbuf, adapter->disable_meta_caching);
1018
1019 if (tx_ring->acum_pkts == ENA_DB_THRESHOLD ||
1020 ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx)) {
1021 ena_log_io(pdev, DBG,
1022 "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
1023 tx_ring->que->id);
1024 ena_ring_tx_doorbell(tx_ring);
1025 }
1026
1027 /* Prepare the packet's descriptors and send them to device */
1028 rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc);
1029 if (unlikely(rc != 0)) {
1030 if (likely(rc == ENA_COM_NO_MEM)) {
1031 ena_log_io(pdev, DBG, "tx ring[%d] is out of space\n",
1032 tx_ring->que->id);
1033 } else {
1034 ena_log(pdev, ERR, "failed to prepare tx bufs\n");
1035 ena_trigger_reset(adapter,
1036 ENA_REGS_RESET_DRIVER_INVALID_STATE);
1037 }
1038 counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1);
1039 goto dma_error;
1040 }
1041
1042 counter_enter();
1043 counter_u64_add_protected(tx_ring->tx_stats.cnt, 1);
1044 counter_u64_add_protected(tx_ring->tx_stats.bytes,
1045 (*mbuf)->m_pkthdr.len);
1046
1047 counter_u64_add_protected(adapter->hw_stats.tx_packets, 1);
1048 counter_u64_add_protected(adapter->hw_stats.tx_bytes,
1049 (*mbuf)->m_pkthdr.len);
1050 counter_exit();
1051
1052 tx_info->tx_descs = nb_hw_desc;
1053 getbinuptime(&tx_info->timestamp);
1054 tx_info->print_once = true;
1055
1056 tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
1057 tx_ring->ring_size);
1058
1059 /* stop the queue when no more space available, the packet can have up
1060 * to sgl_size + 2. one for the meta descriptor and one for header
1061 * (if the header is larger than tx_max_header_size).
1062 */
1063 if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1064 adapter->max_tx_sgl_size + 2))) {
1065 ena_log_io(pdev, DBG, "Stop queue %d\n", tx_ring->que->id);
1066
1067 tx_ring->running = false;
1068 counter_u64_add(tx_ring->tx_stats.queue_stop, 1);
1069
1070 /* There is a rare condition where this function decides to
1071 * stop the queue but meanwhile tx_cleanup() updates
1072 * next_to_completion and terminates.
1073 * The queue will remain stopped forever.
1074 * To solve this issue this function performs mb(), checks
1075 * the wakeup condition and wakes up the queue if needed.
1076 */
1077 mb();
1078
1079 if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1080 ENA_TX_RESUME_THRESH)) {
1081 tx_ring->running = true;
1082 counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
1083 }
1084 }
1085
1086 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
1087 BUS_DMASYNC_PREWRITE);
1088
1089 return (0);
1090
1091 dma_error:
1092 tx_info->mbuf = NULL;
1093 bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
1094
1095 return (rc);
1096 }
1097
1098 static void
1099 ena_start_xmit(struct ena_ring *tx_ring)
1100 {
1101 struct mbuf *mbuf;
1102 struct ena_adapter *adapter = tx_ring->adapter;
1103 int ret = 0;
1104
1105 ENA_RING_MTX_ASSERT(tx_ring);
1106
1107 if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
1108 return;
1109
1110 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)))
1111 return;
1112
1113 while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) {
1114 ena_log_io(adapter->pdev, DBG,
1115 "\ndequeued mbuf %p with flags %#x and header csum flags %#jx\n",
1116 mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags);
1117
1118 if (unlikely(!tx_ring->running)) {
1119 drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1120 break;
1121 }
1122
1123 if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) {
1124 if (ret == ENA_COM_NO_MEM) {
1125 drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1126 } else if (ret == ENA_COM_NO_SPACE) {
1127 drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1128 } else {
1129 m_freem(mbuf);
1130 drbr_advance(adapter->ifp, tx_ring->br);
1131 }
1132
1133 break;
1134 }
1135
1136 drbr_advance(adapter->ifp, tx_ring->br);
1137
1138 if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
1139 return;
1140
1141 tx_ring->acum_pkts++;
1142
1143 BPF_MTAP(adapter->ifp, mbuf);
1144 }
1145
1146 if (likely(tx_ring->acum_pkts != 0)) {
1147 /* Trigger the dma engine */
1148 ena_ring_tx_doorbell(tx_ring);
1149 }
1150
1151 if (unlikely(!tx_ring->running))
1152 taskqueue_enqueue(tx_ring->que->cleanup_tq,
1153 &tx_ring->que->cleanup_task);
1154 }
Cache object: c1cc16957ebe50749fbdbc8f3834038c
|