FreeBSD/Linux Kernel Cross Reference
sys/dev/ena/ena.c
1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include "opt_rss.h"
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/bus.h>
38 #include <sys/endian.h>
39 #include <sys/kernel.h>
40 #include <sys/kthread.h>
41 #include <sys/malloc.h>
42 #include <sys/mbuf.h>
43 #include <sys/module.h>
44 #include <sys/rman.h>
45 #include <sys/smp.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/taskqueue.h>
50 #include <sys/time.h>
51 #include <sys/eventhandler.h>
52
53 #include <machine/bus.h>
54 #include <machine/resource.h>
55 #include <machine/in_cksum.h>
56
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66 #ifdef RSS
67 #include <net/rss_config.h>
68 #endif
69
70 #include <netinet/in_systm.h>
71 #include <netinet/in.h>
72 #include <netinet/if_ether.h>
73 #include <netinet/ip.h>
74 #include <netinet/ip6.h>
75 #include <netinet/tcp.h>
76 #include <netinet/udp.h>
77
78 #include <dev/pci/pcivar.h>
79 #include <dev/pci/pcireg.h>
80
81 #include <vm/vm.h>
82 #include <vm/pmap.h>
83
84 #include "ena_datapath.h"
85 #include "ena.h"
86 #include "ena_sysctl.h"
87
88 #ifdef DEV_NETMAP
89 #include "ena_netmap.h"
90 #endif /* DEV_NETMAP */
91
92 /*********************************************************
93 * Function prototypes
94 *********************************************************/
95 static int ena_probe(device_t);
96 static void ena_intr_msix_mgmnt(void *);
97 static void ena_free_pci_resources(struct ena_adapter *);
98 static int ena_change_mtu(if_t, int);
99 static inline void ena_alloc_counters(counter_u64_t *, int);
100 static inline void ena_free_counters(counter_u64_t *, int);
101 static inline void ena_reset_counters(counter_u64_t *, int);
102 static void ena_init_io_rings_common(struct ena_adapter *,
103 struct ena_ring *, uint16_t);
104 static void ena_init_io_rings_basic(struct ena_adapter *);
105 static void ena_init_io_rings_advanced(struct ena_adapter *);
106 static void ena_init_io_rings(struct ena_adapter *);
107 static void ena_free_io_ring_resources(struct ena_adapter *, unsigned int);
108 static void ena_free_all_io_rings_resources(struct ena_adapter *);
109 static int ena_setup_tx_dma_tag(struct ena_adapter *);
110 static int ena_free_tx_dma_tag(struct ena_adapter *);
111 static int ena_setup_rx_dma_tag(struct ena_adapter *);
112 static int ena_free_rx_dma_tag(struct ena_adapter *);
113 static void ena_release_all_tx_dmamap(struct ena_ring *);
114 static int ena_setup_tx_resources(struct ena_adapter *, int);
115 static void ena_free_tx_resources(struct ena_adapter *, int);
116 static int ena_setup_all_tx_resources(struct ena_adapter *);
117 static void ena_free_all_tx_resources(struct ena_adapter *);
118 static int ena_setup_rx_resources(struct ena_adapter *, unsigned int);
119 static void ena_free_rx_resources(struct ena_adapter *, unsigned int);
120 static int ena_setup_all_rx_resources(struct ena_adapter *);
121 static void ena_free_all_rx_resources(struct ena_adapter *);
122 static inline int ena_alloc_rx_mbuf(struct ena_adapter *, struct ena_ring *,
123 struct ena_rx_buffer *);
124 static void ena_free_rx_mbuf(struct ena_adapter *, struct ena_ring *,
125 struct ena_rx_buffer *);
126 static void ena_free_rx_bufs(struct ena_adapter *, unsigned int);
127 static void ena_refill_all_rx_bufs(struct ena_adapter *);
128 static void ena_free_all_rx_bufs(struct ena_adapter *);
129 static void ena_free_tx_bufs(struct ena_adapter *, unsigned int);
130 static void ena_free_all_tx_bufs(struct ena_adapter *);
131 static void ena_destroy_all_tx_queues(struct ena_adapter *);
132 static void ena_destroy_all_rx_queues(struct ena_adapter *);
133 static void ena_destroy_all_io_queues(struct ena_adapter *);
134 static int ena_create_io_queues(struct ena_adapter *);
135 static int ena_handle_msix(void *);
136 static int ena_enable_msix(struct ena_adapter *);
137 static void ena_setup_mgmnt_intr(struct ena_adapter *);
138 static int ena_setup_io_intr(struct ena_adapter *);
139 static int ena_request_mgmnt_irq(struct ena_adapter *);
140 static int ena_request_io_irq(struct ena_adapter *);
141 static void ena_free_mgmnt_irq(struct ena_adapter *);
142 static void ena_free_io_irq(struct ena_adapter *);
143 static void ena_free_irqs(struct ena_adapter*);
144 static void ena_disable_msix(struct ena_adapter *);
145 static void ena_unmask_all_io_irqs(struct ena_adapter *);
146 static int ena_rss_configure(struct ena_adapter *);
147 static int ena_up_complete(struct ena_adapter *);
148 static uint64_t ena_get_counter(if_t, ift_counter);
149 static int ena_media_change(if_t);
150 static void ena_media_status(if_t, struct ifmediareq *);
151 static void ena_init(void *);
152 static int ena_ioctl(if_t, u_long, caddr_t);
153 static int ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *);
154 static void ena_update_host_info(struct ena_admin_host_info *, if_t);
155 static void ena_update_hwassist(struct ena_adapter *);
156 static int ena_setup_ifnet(device_t, struct ena_adapter *,
157 struct ena_com_dev_get_features_ctx *);
158 static int ena_enable_wc(struct resource *);
159 static int ena_set_queues_placement_policy(device_t, struct ena_com_dev *,
160 struct ena_admin_feature_llq_desc *, struct ena_llq_configurations *);
161 static uint32_t ena_calc_max_io_queue_num(device_t, struct ena_com_dev *,
162 struct ena_com_dev_get_features_ctx *);
163 static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *);
164 static int ena_rss_init_default(struct ena_adapter *);
165 static void ena_rss_init_default_deferred(void *);
166 static void ena_config_host_info(struct ena_com_dev *, device_t);
167 static int ena_attach(device_t);
168 static int ena_detach(device_t);
169 static int ena_device_init(struct ena_adapter *, device_t,
170 struct ena_com_dev_get_features_ctx *, int *);
171 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *);
172 static void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *);
173 static void unimplemented_aenq_handler(void *,
174 struct ena_admin_aenq_entry *);
175 static int ena_copy_eni_metrics(struct ena_adapter *);
176 static void ena_timer_service(void *);
177
178 static char ena_version[] = DEVICE_NAME DRV_MODULE_NAME " v" DRV_MODULE_VERSION;
179
180 static ena_vendor_info_t ena_vendor_info_array[] = {
181 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF, 0},
182 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF_RSERV0, 0},
183 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF, 0},
184 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF_RSERV0, 0},
185 /* Last entry */
186 { 0, 0, 0 }
187 };
188
189 /*
190 * Contains pointers to event handlers, e.g. link state chage.
191 */
192 static struct ena_aenq_handlers aenq_handlers;
193
194 void
195 ena_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
196 {
197 if (error != 0)
198 return;
199 *(bus_addr_t *) arg = segs[0].ds_addr;
200 }
201
202 int
203 ena_dma_alloc(device_t dmadev, bus_size_t size,
204 ena_mem_handle_t *dma, int mapflags, bus_size_t alignment)
205 {
206 struct ena_adapter* adapter = device_get_softc(dmadev);
207 uint32_t maxsize;
208 uint64_t dma_space_addr;
209 int error;
210
211 maxsize = ((size - 1) / PAGE_SIZE + 1) * PAGE_SIZE;
212
213 dma_space_addr = ENA_DMA_BIT_MASK(adapter->dma_width);
214 if (unlikely(dma_space_addr == 0))
215 dma_space_addr = BUS_SPACE_MAXADDR;
216
217 error = bus_dma_tag_create(bus_get_dma_tag(dmadev), /* parent */
218 alignment, 0, /* alignment, bounds */
219 dma_space_addr, /* lowaddr of exclusion window */
220 BUS_SPACE_MAXADDR,/* highaddr of exclusion window */
221 NULL, NULL, /* filter, filterarg */
222 maxsize, /* maxsize */
223 1, /* nsegments */
224 maxsize, /* maxsegsize */
225 BUS_DMA_ALLOCNOW, /* flags */
226 NULL, /* lockfunc */
227 NULL, /* lockarg */
228 &dma->tag);
229 if (unlikely(error != 0)) {
230 ena_trace(NULL, ENA_ALERT, "bus_dma_tag_create failed: %d\n", error);
231 goto fail_tag;
232 }
233
234 error = bus_dmamem_alloc(dma->tag, (void**) &dma->vaddr,
235 BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->map);
236 if (unlikely(error != 0)) {
237 ena_trace(NULL, ENA_ALERT, "bus_dmamem_alloc(%ju) failed: %d\n",
238 (uintmax_t)size, error);
239 goto fail_map_create;
240 }
241
242 dma->paddr = 0;
243 error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr,
244 size, ena_dmamap_callback, &dma->paddr, mapflags);
245 if (unlikely((error != 0) || (dma->paddr == 0))) {
246 ena_trace(NULL, ENA_ALERT, ": bus_dmamap_load failed: %d\n", error);
247 goto fail_map_load;
248 }
249
250 bus_dmamap_sync(dma->tag, dma->map,
251 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
252
253 return (0);
254
255 fail_map_load:
256 bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
257 fail_map_create:
258 bus_dma_tag_destroy(dma->tag);
259 fail_tag:
260 dma->tag = NULL;
261 dma->vaddr = NULL;
262 dma->paddr = 0;
263
264 return (error);
265 }
266
267 /*
268 * This function should generate unique key for the whole driver.
269 * If the key was already genereated in the previous call (for example
270 * for another adapter), then it should be returned instead.
271 */
272 void
273 ena_rss_key_fill(void *key, size_t size)
274 {
275 static bool key_generated;
276 static uint8_t default_key[ENA_HASH_KEY_SIZE];
277
278 KASSERT(size <= ENA_HASH_KEY_SIZE, ("Requested more bytes than ENA RSS key can hold"));
279
280 if (!key_generated) {
281 arc4random_buf(default_key, ENA_HASH_KEY_SIZE);
282 key_generated = true;
283 }
284
285 memcpy(key, default_key, size);
286 }
287
288 static void
289 ena_free_pci_resources(struct ena_adapter *adapter)
290 {
291 device_t pdev = adapter->pdev;
292
293 if (adapter->memory != NULL) {
294 bus_release_resource(pdev, SYS_RES_MEMORY,
295 PCIR_BAR(ENA_MEM_BAR), adapter->memory);
296 }
297
298 if (adapter->registers != NULL) {
299 bus_release_resource(pdev, SYS_RES_MEMORY,
300 PCIR_BAR(ENA_REG_BAR), adapter->registers);
301 }
302
303 if (adapter->msix != NULL) {
304 bus_release_resource(pdev, SYS_RES_MEMORY,
305 adapter->msix_rid, adapter->msix);
306 }
307 }
308
309 static int
310 ena_probe(device_t dev)
311 {
312 ena_vendor_info_t *ent;
313 char adapter_name[60];
314 uint16_t pci_vendor_id = 0;
315 uint16_t pci_device_id = 0;
316
317 pci_vendor_id = pci_get_vendor(dev);
318 pci_device_id = pci_get_device(dev);
319
320 ent = ena_vendor_info_array;
321 while (ent->vendor_id != 0) {
322 if ((pci_vendor_id == ent->vendor_id) &&
323 (pci_device_id == ent->device_id)) {
324 ena_trace(NULL, ENA_DBG, "vendor=%x device=%x\n",
325 pci_vendor_id, pci_device_id);
326
327 sprintf(adapter_name, DEVICE_DESC);
328 device_set_desc_copy(dev, adapter_name);
329 return (BUS_PROBE_DEFAULT);
330 }
331
332 ent++;
333
334 }
335
336 return (ENXIO);
337 }
338
339 static int
340 ena_change_mtu(if_t ifp, int new_mtu)
341 {
342 struct ena_adapter *adapter = if_getsoftc(ifp);
343 int rc;
344
345 if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) {
346 device_printf(adapter->pdev, "Invalid MTU setting. "
347 "new_mtu: %d max mtu: %d min mtu: %d\n",
348 new_mtu, adapter->max_mtu, ENA_MIN_MTU);
349 return (EINVAL);
350 }
351
352 rc = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
353 if (likely(rc == 0)) {
354 ena_trace(NULL, ENA_DBG, "set MTU to %d\n", new_mtu);
355 if_setmtu(ifp, new_mtu);
356 } else {
357 device_printf(adapter->pdev, "Failed to set MTU to %d\n",
358 new_mtu);
359 }
360
361 return (rc);
362 }
363
364 static inline void
365 ena_alloc_counters(counter_u64_t *begin, int size)
366 {
367 counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
368
369 for (; begin < end; ++begin)
370 *begin = counter_u64_alloc(M_WAITOK);
371 }
372
373 static inline void
374 ena_free_counters(counter_u64_t *begin, int size)
375 {
376 counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
377
378 for (; begin < end; ++begin)
379 counter_u64_free(*begin);
380 }
381
382 static inline void
383 ena_reset_counters(counter_u64_t *begin, int size)
384 {
385 counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
386
387 for (; begin < end; ++begin)
388 counter_u64_zero(*begin);
389 }
390
391 static void
392 ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring,
393 uint16_t qid)
394 {
395
396 ring->qid = qid;
397 ring->adapter = adapter;
398 ring->ena_dev = adapter->ena_dev;
399 ring->first_interrupt = false;
400 ring->no_interrupt_event_cnt = 0;
401 }
402
403 static void
404 ena_init_io_rings_basic(struct ena_adapter *adapter)
405 {
406 struct ena_com_dev *ena_dev;
407 struct ena_ring *txr, *rxr;
408 struct ena_que *que;
409 int i;
410
411 ena_dev = adapter->ena_dev;
412
413 for (i = 0; i < adapter->num_io_queues; i++) {
414 txr = &adapter->tx_ring[i];
415 rxr = &adapter->rx_ring[i];
416
417 /* TX/RX common ring state */
418 ena_init_io_rings_common(adapter, txr, i);
419 ena_init_io_rings_common(adapter, rxr, i);
420
421 /* TX specific ring state */
422 txr->tx_max_header_size = ena_dev->tx_max_header_size;
423 txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
424
425 que = &adapter->que[i];
426 que->adapter = adapter;
427 que->id = i;
428 que->tx_ring = txr;
429 que->rx_ring = rxr;
430
431 txr->que = que;
432 rxr->que = que;
433
434 rxr->empty_rx_queue = 0;
435 rxr->rx_mbuf_sz = ena_mbuf_sz;
436 }
437 }
438
439 static void
440 ena_init_io_rings_advanced(struct ena_adapter *adapter)
441 {
442 struct ena_ring *txr, *rxr;
443 int i;
444
445 for (i = 0; i < adapter->num_io_queues; i++) {
446 txr = &adapter->tx_ring[i];
447 rxr = &adapter->rx_ring[i];
448
449 /* Allocate a buf ring */
450 txr->buf_ring_size = adapter->buf_ring_size;
451 txr->br = buf_ring_alloc(txr->buf_ring_size, M_DEVBUF,
452 M_WAITOK, &txr->ring_mtx);
453
454 /* Allocate Tx statistics. */
455 ena_alloc_counters((counter_u64_t *)&txr->tx_stats,
456 sizeof(txr->tx_stats));
457
458 /* Allocate Rx statistics. */
459 ena_alloc_counters((counter_u64_t *)&rxr->rx_stats,
460 sizeof(rxr->rx_stats));
461
462 /* Initialize locks */
463 snprintf(txr->mtx_name, nitems(txr->mtx_name), "%s:tx(%d)",
464 device_get_nameunit(adapter->pdev), i);
465 snprintf(rxr->mtx_name, nitems(rxr->mtx_name), "%s:rx(%d)",
466 device_get_nameunit(adapter->pdev), i);
467
468 mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF);
469 }
470 }
471
472 static void
473 ena_init_io_rings(struct ena_adapter *adapter)
474 {
475 /*
476 * IO rings initialization can be divided into the 2 steps:
477 * 1. Initialize variables and fields with initial values and copy
478 * them from adapter/ena_dev (basic)
479 * 2. Allocate mutex, counters and buf_ring (advanced)
480 */
481 ena_init_io_rings_basic(adapter);
482 ena_init_io_rings_advanced(adapter);
483 }
484
485 static void
486 ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid)
487 {
488 struct ena_ring *txr = &adapter->tx_ring[qid];
489 struct ena_ring *rxr = &adapter->rx_ring[qid];
490
491 ena_free_counters((counter_u64_t *)&txr->tx_stats,
492 sizeof(txr->tx_stats));
493 ena_free_counters((counter_u64_t *)&rxr->rx_stats,
494 sizeof(rxr->rx_stats));
495
496 ENA_RING_MTX_LOCK(txr);
497 drbr_free(txr->br, M_DEVBUF);
498 ENA_RING_MTX_UNLOCK(txr);
499
500 mtx_destroy(&txr->ring_mtx);
501 }
502
503 static void
504 ena_free_all_io_rings_resources(struct ena_adapter *adapter)
505 {
506 int i;
507
508 for (i = 0; i < adapter->num_io_queues; i++)
509 ena_free_io_ring_resources(adapter, i);
510
511 }
512
513 static int
514 ena_setup_tx_dma_tag(struct ena_adapter *adapter)
515 {
516 int ret;
517
518 /* Create DMA tag for Tx buffers */
519 ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev),
520 1, 0, /* alignment, bounds */
521 ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window */
522 BUS_SPACE_MAXADDR, /* highaddr of excl window */
523 NULL, NULL, /* filter, filterarg */
524 ENA_TSO_MAXSIZE, /* maxsize */
525 adapter->max_tx_sgl_size - 1, /* nsegments */
526 ENA_TSO_MAXSIZE, /* maxsegsize */
527 0, /* flags */
528 NULL, /* lockfunc */
529 NULL, /* lockfuncarg */
530 &adapter->tx_buf_tag);
531
532 return (ret);
533 }
534
535 static int
536 ena_free_tx_dma_tag(struct ena_adapter *adapter)
537 {
538 int ret;
539
540 ret = bus_dma_tag_destroy(adapter->tx_buf_tag);
541
542 if (likely(ret == 0))
543 adapter->tx_buf_tag = NULL;
544
545 return (ret);
546 }
547
548 static int
549 ena_setup_rx_dma_tag(struct ena_adapter *adapter)
550 {
551 int ret;
552
553 /* Create DMA tag for Rx buffers*/
554 ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), /* parent */
555 1, 0, /* alignment, bounds */
556 ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window */
557 BUS_SPACE_MAXADDR, /* highaddr of excl window */
558 NULL, NULL, /* filter, filterarg */
559 ena_mbuf_sz, /* maxsize */
560 adapter->max_rx_sgl_size, /* nsegments */
561 ena_mbuf_sz, /* maxsegsize */
562 0, /* flags */
563 NULL, /* lockfunc */
564 NULL, /* lockarg */
565 &adapter->rx_buf_tag);
566
567 return (ret);
568 }
569
570 static int
571 ena_free_rx_dma_tag(struct ena_adapter *adapter)
572 {
573 int ret;
574
575 ret = bus_dma_tag_destroy(adapter->rx_buf_tag);
576
577 if (likely(ret == 0))
578 adapter->rx_buf_tag = NULL;
579
580 return (ret);
581 }
582
583 static void
584 ena_release_all_tx_dmamap(struct ena_ring *tx_ring)
585 {
586 struct ena_adapter *adapter = tx_ring->adapter;
587 struct ena_tx_buffer *tx_info;
588 bus_dma_tag_t tx_tag = adapter->tx_buf_tag;;
589 int i;
590 #ifdef DEV_NETMAP
591 struct ena_netmap_tx_info *nm_info;
592 int j;
593 #endif /* DEV_NETMAP */
594
595 for (i = 0; i < tx_ring->ring_size; ++i) {
596 tx_info = &tx_ring->tx_buffer_info[i];
597 #ifdef DEV_NETMAP
598 if (adapter->ifp->if_capenable & IFCAP_NETMAP) {
599 nm_info = &tx_info->nm_info;
600 for (j = 0; j < ENA_PKT_MAX_BUFS; ++j) {
601 if (nm_info->map_seg[j] != NULL) {
602 bus_dmamap_destroy(tx_tag,
603 nm_info->map_seg[j]);
604 nm_info->map_seg[j] = NULL;
605 }
606 }
607 }
608 #endif /* DEV_NETMAP */
609 if (tx_info->dmamap != NULL) {
610 bus_dmamap_destroy(tx_tag, tx_info->dmamap);
611 tx_info->dmamap = NULL;
612 }
613 }
614 }
615
616 /**
617 * ena_setup_tx_resources - allocate Tx resources (Descriptors)
618 * @adapter: network interface device structure
619 * @qid: queue index
620 *
621 * Returns 0 on success, otherwise on failure.
622 **/
623 static int
624 ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
625 {
626 struct ena_que *que = &adapter->que[qid];
627 struct ena_ring *tx_ring = que->tx_ring;
628 int size, i, err;
629 #ifdef DEV_NETMAP
630 bus_dmamap_t *map;
631 int j;
632
633 ena_netmap_reset_tx_ring(adapter, qid);
634 #endif /* DEV_NETMAP */
635
636 size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
637
638 tx_ring->tx_buffer_info = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
639 if (unlikely(tx_ring->tx_buffer_info == NULL))
640 return (ENOMEM);
641
642 size = sizeof(uint16_t) * tx_ring->ring_size;
643 tx_ring->free_tx_ids = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
644 if (unlikely(tx_ring->free_tx_ids == NULL))
645 goto err_buf_info_free;
646
647 size = tx_ring->tx_max_header_size;
648 tx_ring->push_buf_intermediate_buf = malloc(size, M_DEVBUF,
649 M_NOWAIT | M_ZERO);
650 if (unlikely(tx_ring->push_buf_intermediate_buf == NULL))
651 goto err_tx_ids_free;
652
653 /* Req id stack for TX OOO completions */
654 for (i = 0; i < tx_ring->ring_size; i++)
655 tx_ring->free_tx_ids[i] = i;
656
657 /* Reset TX statistics. */
658 ena_reset_counters((counter_u64_t *)&tx_ring->tx_stats,
659 sizeof(tx_ring->tx_stats));
660
661 tx_ring->next_to_use = 0;
662 tx_ring->next_to_clean = 0;
663 tx_ring->acum_pkts = 0;
664
665 /* Make sure that drbr is empty */
666 ENA_RING_MTX_LOCK(tx_ring);
667 drbr_flush(adapter->ifp, tx_ring->br);
668 ENA_RING_MTX_UNLOCK(tx_ring);
669
670 /* ... and create the buffer DMA maps */
671 for (i = 0; i < tx_ring->ring_size; i++) {
672 err = bus_dmamap_create(adapter->tx_buf_tag, 0,
673 &tx_ring->tx_buffer_info[i].dmamap);
674 if (unlikely(err != 0)) {
675 ena_trace(NULL, ENA_ALERT,
676 "Unable to create Tx DMA map for buffer %d\n",
677 i);
678 goto err_map_release;
679 }
680
681 #ifdef DEV_NETMAP
682 if (adapter->ifp->if_capenable & IFCAP_NETMAP) {
683 map = tx_ring->tx_buffer_info[i].nm_info.map_seg;
684 for (j = 0; j < ENA_PKT_MAX_BUFS; j++) {
685 err = bus_dmamap_create(adapter->tx_buf_tag, 0,
686 &map[j]);
687 if (unlikely(err != 0)) {
688 ena_trace(NULL, ENA_ALERT, "Unable to create "
689 "Tx DMA for buffer %d %d\n", i, j);
690 goto err_map_release;
691 }
692 }
693 }
694 #endif /* DEV_NETMAP */
695 }
696
697 /* Allocate taskqueues */
698 TASK_INIT(&tx_ring->enqueue_task, 0, ena_deferred_mq_start, tx_ring);
699 tx_ring->enqueue_tq = taskqueue_create_fast("ena_tx_enque", M_NOWAIT,
700 taskqueue_thread_enqueue, &tx_ring->enqueue_tq);
701 if (unlikely(tx_ring->enqueue_tq == NULL)) {
702 ena_trace(NULL, ENA_ALERT,
703 "Unable to create taskqueue for enqueue task\n");
704 i = tx_ring->ring_size;
705 goto err_map_release;
706 }
707
708 tx_ring->running = true;
709
710 taskqueue_start_threads(&tx_ring->enqueue_tq, 1, PI_NET,
711 "%s txeq %d", device_get_nameunit(adapter->pdev), que->cpu);
712
713 return (0);
714
715 err_map_release:
716 ena_release_all_tx_dmamap(tx_ring);
717 err_tx_ids_free:
718 free(tx_ring->free_tx_ids, M_DEVBUF);
719 tx_ring->free_tx_ids = NULL;
720 err_buf_info_free:
721 free(tx_ring->tx_buffer_info, M_DEVBUF);
722 tx_ring->tx_buffer_info = NULL;
723
724 return (ENOMEM);
725 }
726
727 /**
728 * ena_free_tx_resources - Free Tx Resources per Queue
729 * @adapter: network interface device structure
730 * @qid: queue index
731 *
732 * Free all transmit software resources
733 **/
734 static void
735 ena_free_tx_resources(struct ena_adapter *adapter, int qid)
736 {
737 struct ena_ring *tx_ring = &adapter->tx_ring[qid];
738 #ifdef DEV_NETMAP
739 struct ena_netmap_tx_info *nm_info;
740 int j;
741 #endif /* DEV_NETMAP */
742
743 while (taskqueue_cancel(tx_ring->enqueue_tq, &tx_ring->enqueue_task,
744 NULL))
745 taskqueue_drain(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
746
747 taskqueue_free(tx_ring->enqueue_tq);
748
749 ENA_RING_MTX_LOCK(tx_ring);
750 /* Flush buffer ring, */
751 drbr_flush(adapter->ifp, tx_ring->br);
752
753 /* Free buffer DMA maps, */
754 for (int i = 0; i < tx_ring->ring_size; i++) {
755 bus_dmamap_sync(adapter->tx_buf_tag,
756 tx_ring->tx_buffer_info[i].dmamap, BUS_DMASYNC_POSTWRITE);
757 bus_dmamap_unload(adapter->tx_buf_tag,
758 tx_ring->tx_buffer_info[i].dmamap);
759 bus_dmamap_destroy(adapter->tx_buf_tag,
760 tx_ring->tx_buffer_info[i].dmamap);
761
762 #ifdef DEV_NETMAP
763 if (adapter->ifp->if_capenable & IFCAP_NETMAP) {
764 nm_info = &tx_ring->tx_buffer_info[i].nm_info;
765 for (j = 0; j < ENA_PKT_MAX_BUFS; j++) {
766 if (nm_info->socket_buf_idx[j] != 0) {
767 bus_dmamap_sync(adapter->tx_buf_tag,
768 nm_info->map_seg[j],
769 BUS_DMASYNC_POSTWRITE);
770 ena_netmap_unload(adapter,
771 nm_info->map_seg[j]);
772 }
773 bus_dmamap_destroy(adapter->tx_buf_tag,
774 nm_info->map_seg[j]);
775 nm_info->socket_buf_idx[j] = 0;
776 }
777 }
778 #endif /* DEV_NETMAP */
779
780 m_freem(tx_ring->tx_buffer_info[i].mbuf);
781 tx_ring->tx_buffer_info[i].mbuf = NULL;
782 }
783 ENA_RING_MTX_UNLOCK(tx_ring);
784
785 /* And free allocated memory. */
786 free(tx_ring->tx_buffer_info, M_DEVBUF);
787 tx_ring->tx_buffer_info = NULL;
788
789 free(tx_ring->free_tx_ids, M_DEVBUF);
790 tx_ring->free_tx_ids = NULL;
791
792 free(tx_ring->push_buf_intermediate_buf, M_DEVBUF);
793 tx_ring->push_buf_intermediate_buf = NULL;
794 }
795
796 /**
797 * ena_setup_all_tx_resources - allocate all queues Tx resources
798 * @adapter: network interface device structure
799 *
800 * Returns 0 on success, otherwise on failure.
801 **/
802 static int
803 ena_setup_all_tx_resources(struct ena_adapter *adapter)
804 {
805 int i, rc;
806
807 for (i = 0; i < adapter->num_io_queues; i++) {
808 rc = ena_setup_tx_resources(adapter, i);
809 if (rc != 0) {
810 device_printf(adapter->pdev,
811 "Allocation for Tx Queue %u failed\n", i);
812 goto err_setup_tx;
813 }
814 }
815
816 return (0);
817
818 err_setup_tx:
819 /* Rewind the index freeing the rings as we go */
820 while (i--)
821 ena_free_tx_resources(adapter, i);
822 return (rc);
823 }
824
825 /**
826 * ena_free_all_tx_resources - Free Tx Resources for All Queues
827 * @adapter: network interface device structure
828 *
829 * Free all transmit software resources
830 **/
831 static void
832 ena_free_all_tx_resources(struct ena_adapter *adapter)
833 {
834 int i;
835
836 for (i = 0; i < adapter->num_io_queues; i++)
837 ena_free_tx_resources(adapter, i);
838 }
839
840 /**
841 * ena_setup_rx_resources - allocate Rx resources (Descriptors)
842 * @adapter: network interface device structure
843 * @qid: queue index
844 *
845 * Returns 0 on success, otherwise on failure.
846 **/
847 static int
848 ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid)
849 {
850 struct ena_que *que = &adapter->que[qid];
851 struct ena_ring *rx_ring = que->rx_ring;
852 int size, err, i;
853
854 size = sizeof(struct ena_rx_buffer) * rx_ring->ring_size;
855
856 #ifdef DEV_NETMAP
857 ena_netmap_reset_rx_ring(adapter, qid);
858 rx_ring->initialized = false;
859 #endif /* DEV_NETMAP */
860
861 /*
862 * Alloc extra element so in rx path
863 * we can always prefetch rx_info + 1
864 */
865 size += sizeof(struct ena_rx_buffer);
866
867 rx_ring->rx_buffer_info = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
868
869 size = sizeof(uint16_t) * rx_ring->ring_size;
870 rx_ring->free_rx_ids = malloc(size, M_DEVBUF, M_WAITOK);
871
872 for (i = 0; i < rx_ring->ring_size; i++)
873 rx_ring->free_rx_ids[i] = i;
874
875 /* Reset RX statistics. */
876 ena_reset_counters((counter_u64_t *)&rx_ring->rx_stats,
877 sizeof(rx_ring->rx_stats));
878
879 rx_ring->next_to_clean = 0;
880 rx_ring->next_to_use = 0;
881
882 /* ... and create the buffer DMA maps */
883 for (i = 0; i < rx_ring->ring_size; i++) {
884 err = bus_dmamap_create(adapter->rx_buf_tag, 0,
885 &(rx_ring->rx_buffer_info[i].map));
886 if (err != 0) {
887 ena_trace(NULL, ENA_ALERT,
888 "Unable to create Rx DMA map for buffer %d\n", i);
889 goto err_buf_info_unmap;
890 }
891 }
892
893 /* Create LRO for the ring */
894 if ((adapter->ifp->if_capenable & IFCAP_LRO) != 0) {
895 int err = tcp_lro_init(&rx_ring->lro);
896 if (err != 0) {
897 device_printf(adapter->pdev,
898 "LRO[%d] Initialization failed!\n", qid);
899 } else {
900 ena_trace(NULL, ENA_INFO,
901 "RX Soft LRO[%d] Initialized\n", qid);
902 rx_ring->lro.ifp = adapter->ifp;
903 }
904 }
905
906 return (0);
907
908 err_buf_info_unmap:
909 while (i--) {
910 bus_dmamap_destroy(adapter->rx_buf_tag,
911 rx_ring->rx_buffer_info[i].map);
912 }
913
914 free(rx_ring->free_rx_ids, M_DEVBUF);
915 rx_ring->free_rx_ids = NULL;
916 free(rx_ring->rx_buffer_info, M_DEVBUF);
917 rx_ring->rx_buffer_info = NULL;
918 return (ENOMEM);
919 }
920
921 /**
922 * ena_free_rx_resources - Free Rx Resources
923 * @adapter: network interface device structure
924 * @qid: queue index
925 *
926 * Free all receive software resources
927 **/
928 static void
929 ena_free_rx_resources(struct ena_adapter *adapter, unsigned int qid)
930 {
931 struct ena_ring *rx_ring = &adapter->rx_ring[qid];
932
933 /* Free buffer DMA maps, */
934 for (int i = 0; i < rx_ring->ring_size; i++) {
935 bus_dmamap_sync(adapter->rx_buf_tag,
936 rx_ring->rx_buffer_info[i].map, BUS_DMASYNC_POSTREAD);
937 m_freem(rx_ring->rx_buffer_info[i].mbuf);
938 rx_ring->rx_buffer_info[i].mbuf = NULL;
939 bus_dmamap_unload(adapter->rx_buf_tag,
940 rx_ring->rx_buffer_info[i].map);
941 bus_dmamap_destroy(adapter->rx_buf_tag,
942 rx_ring->rx_buffer_info[i].map);
943 }
944
945 /* free LRO resources, */
946 tcp_lro_free(&rx_ring->lro);
947
948 /* free allocated memory */
949 free(rx_ring->rx_buffer_info, M_DEVBUF);
950 rx_ring->rx_buffer_info = NULL;
951
952 free(rx_ring->free_rx_ids, M_DEVBUF);
953 rx_ring->free_rx_ids = NULL;
954 }
955
956 /**
957 * ena_setup_all_rx_resources - allocate all queues Rx resources
958 * @adapter: network interface device structure
959 *
960 * Returns 0 on success, otherwise on failure.
961 **/
962 static int
963 ena_setup_all_rx_resources(struct ena_adapter *adapter)
964 {
965 int i, rc = 0;
966
967 for (i = 0; i < adapter->num_io_queues; i++) {
968 rc = ena_setup_rx_resources(adapter, i);
969 if (rc != 0) {
970 device_printf(adapter->pdev,
971 "Allocation for Rx Queue %u failed\n", i);
972 goto err_setup_rx;
973 }
974 }
975 return (0);
976
977 err_setup_rx:
978 /* rewind the index freeing the rings as we go */
979 while (i--)
980 ena_free_rx_resources(adapter, i);
981 return (rc);
982 }
983
984 /**
985 * ena_free_all_rx_resources - Free Rx resources for all queues
986 * @adapter: network interface device structure
987 *
988 * Free all receive software resources
989 **/
990 static void
991 ena_free_all_rx_resources(struct ena_adapter *adapter)
992 {
993 int i;
994
995 for (i = 0; i < adapter->num_io_queues; i++)
996 ena_free_rx_resources(adapter, i);
997 }
998
999 static inline int
1000 ena_alloc_rx_mbuf(struct ena_adapter *adapter,
1001 struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info)
1002 {
1003 struct ena_com_buf *ena_buf;
1004 bus_dma_segment_t segs[1];
1005 int nsegs, error;
1006 int mlen;
1007
1008 /* if previous allocated frag is not used */
1009 if (unlikely(rx_info->mbuf != NULL))
1010 return (0);
1011
1012 /* Get mbuf using UMA allocator */
1013 rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1014 rx_ring->rx_mbuf_sz);
1015
1016 if (unlikely(rx_info->mbuf == NULL)) {
1017 counter_u64_add(rx_ring->rx_stats.mjum_alloc_fail, 1);
1018 rx_info->mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
1019 if (unlikely(rx_info->mbuf == NULL)) {
1020 counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
1021 return (ENOMEM);
1022 }
1023 mlen = MCLBYTES;
1024 } else {
1025 mlen = rx_ring->rx_mbuf_sz;
1026 }
1027 /* Set mbuf length*/
1028 rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = mlen;
1029
1030 /* Map packets for DMA */
1031 ena_trace(NULL, ENA_DBG | ENA_RSC | ENA_RXPTH,
1032 "Using tag %p for buffers' DMA mapping, mbuf %p len: %d\n",
1033 adapter->rx_buf_tag,rx_info->mbuf, rx_info->mbuf->m_len);
1034 error = bus_dmamap_load_mbuf_sg(adapter->rx_buf_tag, rx_info->map,
1035 rx_info->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
1036 if (unlikely((error != 0) || (nsegs != 1))) {
1037 ena_trace(NULL, ENA_WARNING, "failed to map mbuf, error: %d, "
1038 "nsegs: %d\n", error, nsegs);
1039 counter_u64_add(rx_ring->rx_stats.dma_mapping_err, 1);
1040 goto exit;
1041
1042 }
1043
1044 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_PREREAD);
1045
1046 ena_buf = &rx_info->ena_buf;
1047 ena_buf->paddr = segs[0].ds_addr;
1048 ena_buf->len = mlen;
1049
1050 ena_trace(NULL, ENA_DBG | ENA_RSC | ENA_RXPTH,
1051 "ALLOC RX BUF: mbuf %p, rx_info %p, len %d, paddr %#jx\n",
1052 rx_info->mbuf, rx_info,ena_buf->len, (uintmax_t)ena_buf->paddr);
1053
1054 return (0);
1055
1056 exit:
1057 m_freem(rx_info->mbuf);
1058 rx_info->mbuf = NULL;
1059 return (EFAULT);
1060 }
1061
1062 static void
1063 ena_free_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring,
1064 struct ena_rx_buffer *rx_info)
1065 {
1066
1067 if (rx_info->mbuf == NULL) {
1068 ena_trace(NULL, ENA_WARNING, "Trying to free unallocated buffer\n");
1069 return;
1070 }
1071
1072 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
1073 BUS_DMASYNC_POSTREAD);
1074 bus_dmamap_unload(adapter->rx_buf_tag, rx_info->map);
1075 m_freem(rx_info->mbuf);
1076 rx_info->mbuf = NULL;
1077 }
1078
1079 /**
1080 * ena_refill_rx_bufs - Refills ring with descriptors
1081 * @rx_ring: the ring which we want to feed with free descriptors
1082 * @num: number of descriptors to refill
1083 * Refills the ring with newly allocated DMA-mapped mbufs for receiving
1084 **/
1085 int
1086 ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num)
1087 {
1088 struct ena_adapter *adapter = rx_ring->adapter;
1089 uint16_t next_to_use, req_id;
1090 uint32_t i;
1091 int rc;
1092
1093 ena_trace(NULL, ENA_DBG | ENA_RXPTH | ENA_RSC, "refill qid: %d\n",
1094 rx_ring->qid);
1095
1096 next_to_use = rx_ring->next_to_use;
1097
1098 for (i = 0; i < num; i++) {
1099 struct ena_rx_buffer *rx_info;
1100
1101 ena_trace(NULL, ENA_DBG | ENA_RXPTH | ENA_RSC,
1102 "RX buffer - next to use: %d\n", next_to_use);
1103
1104 req_id = rx_ring->free_rx_ids[next_to_use];
1105 rx_info = &rx_ring->rx_buffer_info[req_id];
1106 #ifdef DEV_NETMAP
1107 if (ena_rx_ring_in_netmap(adapter, rx_ring->qid))
1108 rc = ena_netmap_alloc_rx_slot(adapter, rx_ring, rx_info);
1109 else
1110 #endif /* DEV_NETMAP */
1111 rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info);
1112 if (unlikely(rc != 0)) {
1113 ena_trace(NULL, ENA_WARNING,
1114 "failed to alloc buffer for rx queue %d\n",
1115 rx_ring->qid);
1116 break;
1117 }
1118 rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
1119 &rx_info->ena_buf, req_id);
1120 if (unlikely(rc != 0)) {
1121 ena_trace(NULL, ENA_WARNING,
1122 "failed to add buffer for rx queue %d\n",
1123 rx_ring->qid);
1124 break;
1125 }
1126 next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
1127 rx_ring->ring_size);
1128 }
1129
1130 if (unlikely(i < num)) {
1131 counter_u64_add(rx_ring->rx_stats.refil_partial, 1);
1132 ena_trace(NULL, ENA_WARNING,
1133 "refilled rx qid %d with only %d mbufs (from %d)\n",
1134 rx_ring->qid, i, num);
1135 }
1136
1137 if (likely(i != 0))
1138 ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
1139
1140 rx_ring->next_to_use = next_to_use;
1141 return (i);
1142 }
1143
1144 int
1145 ena_update_buf_ring_size(struct ena_adapter *adapter,
1146 uint32_t new_buf_ring_size)
1147 {
1148 uint32_t old_buf_ring_size;
1149 int rc = 0;
1150 bool dev_was_up;
1151
1152 ENA_LOCK_LOCK(adapter);
1153
1154 old_buf_ring_size = adapter->buf_ring_size;
1155 adapter->buf_ring_size = new_buf_ring_size;
1156
1157 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1158 ena_down(adapter);
1159
1160 /* Reconfigure buf ring for all Tx rings. */
1161 ena_free_all_io_rings_resources(adapter);
1162 ena_init_io_rings_advanced(adapter);
1163 if (dev_was_up) {
1164 /*
1165 * If ena_up() fails, it's not because of recent buf_ring size
1166 * changes. Because of that, we just want to revert old drbr
1167 * value and trigger the reset because something else had to
1168 * go wrong.
1169 */
1170 rc = ena_up(adapter);
1171 if (unlikely(rc != 0)) {
1172 device_printf(adapter->pdev,
1173 "Failed to configure device after setting new drbr size: %u. Reverting old value: %u and triggering the reset\n",
1174 new_buf_ring_size, old_buf_ring_size);
1175
1176 /* Revert old size and trigger the reset */
1177 adapter->buf_ring_size = old_buf_ring_size;
1178 ena_free_all_io_rings_resources(adapter);
1179 ena_init_io_rings_advanced(adapter);
1180
1181 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET,
1182 adapter);
1183 ena_trigger_reset(adapter, ENA_REGS_RESET_OS_TRIGGER);
1184
1185 }
1186 }
1187
1188 ENA_LOCK_UNLOCK(adapter);
1189
1190 return (rc);
1191 }
1192
1193 int
1194 ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size,
1195 uint32_t new_rx_size)
1196 {
1197 uint32_t old_tx_size, old_rx_size;
1198 int rc = 0;
1199 bool dev_was_up;
1200
1201 ENA_LOCK_LOCK(adapter);
1202
1203 old_tx_size = adapter->requested_tx_ring_size;
1204 old_rx_size = adapter->requested_rx_ring_size;
1205 adapter->requested_tx_ring_size = new_tx_size;
1206 adapter->requested_rx_ring_size = new_rx_size;
1207
1208 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1209 ena_down(adapter);
1210
1211 /* Configure queues with new size. */
1212 ena_init_io_rings_basic(adapter);
1213 if (dev_was_up) {
1214 rc = ena_up(adapter);
1215 if (unlikely(rc != 0)) {
1216 device_printf(adapter->pdev,
1217 "Failed to configure device with the new sizes - Tx: %u Rx: %u. Reverting old values - Tx: %u Rx: %u\n",
1218 new_tx_size, new_rx_size, old_tx_size, old_rx_size);
1219
1220 /* Revert old size. */
1221 adapter->requested_tx_ring_size = old_tx_size;
1222 adapter->requested_rx_ring_size = old_rx_size;
1223 ena_init_io_rings_basic(adapter);
1224
1225 /* And try again. */
1226 rc = ena_up(adapter);
1227 if (unlikely(rc != 0)) {
1228 device_printf(adapter->pdev,
1229 "Failed to revert old queue sizes. Triggering device reset.\n");
1230 /*
1231 * If we've failed again, something had to go
1232 * wrong. After reset, the device should try to
1233 * go up
1234 */
1235 ENA_FLAG_SET_ATOMIC(
1236 ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1237 ena_trigger_reset(adapter,
1238 ENA_REGS_RESET_OS_TRIGGER);
1239 }
1240 }
1241 }
1242
1243 ENA_LOCK_UNLOCK(adapter);
1244
1245 return (rc);
1246 }
1247
1248 static void
1249 ena_update_io_rings(struct ena_adapter *adapter, uint32_t num)
1250 {
1251 ena_free_all_io_rings_resources(adapter);
1252 /* Force indirection table to be reinitialized */
1253 ena_com_rss_destroy(adapter->ena_dev);
1254
1255 adapter->num_io_queues = num;
1256 ena_init_io_rings(adapter);
1257 }
1258
1259 /* Caller should sanitize new_num */
1260 int
1261 ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num)
1262 {
1263 uint32_t old_num;
1264 int rc = 0;
1265 bool dev_was_up;
1266
1267 ENA_LOCK_LOCK(adapter);
1268
1269 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1270 old_num = adapter->num_io_queues;
1271 ena_down(adapter);
1272
1273 ena_update_io_rings(adapter, new_num);
1274
1275 if (dev_was_up) {
1276 rc = ena_up(adapter);
1277 if (unlikely(rc != 0)) {
1278 device_printf(adapter->pdev,
1279 "Failed to configure device with %u IO queues. "
1280 "Reverting to previous value: %u\n",
1281 new_num, old_num);
1282
1283 ena_update_io_rings(adapter, old_num);
1284
1285 rc = ena_up(adapter);
1286 if (unlikely(rc != 0)) {
1287 device_printf(adapter->pdev,
1288 "Failed to revert to previous setup IO "
1289 "queues. Triggering device reset.\n");
1290 ENA_FLAG_SET_ATOMIC(
1291 ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1292 ena_trigger_reset(adapter,
1293 ENA_REGS_RESET_OS_TRIGGER);
1294 }
1295 }
1296 }
1297
1298 ENA_LOCK_UNLOCK(adapter);
1299
1300 return (rc);
1301 }
1302
1303 static void
1304 ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid)
1305 {
1306 struct ena_ring *rx_ring = &adapter->rx_ring[qid];
1307 unsigned int i;
1308
1309 for (i = 0; i < rx_ring->ring_size; i++) {
1310 struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
1311
1312 if (rx_info->mbuf != NULL)
1313 ena_free_rx_mbuf(adapter, rx_ring, rx_info);
1314 #ifdef DEV_NETMAP
1315 if (((if_getflags(adapter->ifp) & IFF_DYING) == 0) &&
1316 (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1317 if (rx_info->netmap_buf_idx != 0)
1318 ena_netmap_free_rx_slot(adapter, rx_ring,
1319 rx_info);
1320 }
1321 #endif /* DEV_NETMAP */
1322 }
1323 }
1324
1325 /**
1326 * ena_refill_all_rx_bufs - allocate all queues Rx buffers
1327 * @adapter: network interface device structure
1328 *
1329 */
1330 static void
1331 ena_refill_all_rx_bufs(struct ena_adapter *adapter)
1332 {
1333 struct ena_ring *rx_ring;
1334 int i, rc, bufs_num;
1335
1336 for (i = 0; i < adapter->num_io_queues; i++) {
1337 rx_ring = &adapter->rx_ring[i];
1338 bufs_num = rx_ring->ring_size - 1;
1339 rc = ena_refill_rx_bufs(rx_ring, bufs_num);
1340 if (unlikely(rc != bufs_num))
1341 ena_trace(NULL, ENA_WARNING, "refilling Queue %d failed. "
1342 "Allocated %d buffers from: %d\n", i, rc, bufs_num);
1343 #ifdef DEV_NETMAP
1344 rx_ring->initialized = true;
1345 #endif /* DEV_NETMAP */
1346 }
1347 }
1348
1349 static void
1350 ena_free_all_rx_bufs(struct ena_adapter *adapter)
1351 {
1352 int i;
1353
1354 for (i = 0; i < adapter->num_io_queues; i++)
1355 ena_free_rx_bufs(adapter, i);
1356 }
1357
1358 /**
1359 * ena_free_tx_bufs - Free Tx Buffers per Queue
1360 * @adapter: network interface device structure
1361 * @qid: queue index
1362 **/
1363 static void
1364 ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid)
1365 {
1366 bool print_once = true;
1367 struct ena_ring *tx_ring = &adapter->tx_ring[qid];
1368
1369 ENA_RING_MTX_LOCK(tx_ring);
1370 for (int i = 0; i < tx_ring->ring_size; i++) {
1371 struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
1372
1373 if (tx_info->mbuf == NULL)
1374 continue;
1375
1376 if (print_once) {
1377 device_printf(adapter->pdev,
1378 "free uncompleted tx mbuf qid %d idx 0x%x\n",
1379 qid, i);
1380 print_once = false;
1381 } else {
1382 ena_trace(NULL, ENA_DBG,
1383 "free uncompleted tx mbuf qid %d idx 0x%x\n",
1384 qid, i);
1385 }
1386
1387 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
1388 BUS_DMASYNC_POSTWRITE);
1389 bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
1390
1391 m_free(tx_info->mbuf);
1392 tx_info->mbuf = NULL;
1393 }
1394 ENA_RING_MTX_UNLOCK(tx_ring);
1395 }
1396
1397 static void
1398 ena_free_all_tx_bufs(struct ena_adapter *adapter)
1399 {
1400
1401 for (int i = 0; i < adapter->num_io_queues; i++)
1402 ena_free_tx_bufs(adapter, i);
1403 }
1404
1405 static void
1406 ena_destroy_all_tx_queues(struct ena_adapter *adapter)
1407 {
1408 uint16_t ena_qid;
1409 int i;
1410
1411 for (i = 0; i < adapter->num_io_queues; i++) {
1412 ena_qid = ENA_IO_TXQ_IDX(i);
1413 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1414 }
1415 }
1416
1417 static void
1418 ena_destroy_all_rx_queues(struct ena_adapter *adapter)
1419 {
1420 uint16_t ena_qid;
1421 int i;
1422
1423 for (i = 0; i < adapter->num_io_queues; i++) {
1424 ena_qid = ENA_IO_RXQ_IDX(i);
1425 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1426 }
1427 }
1428
1429 static void
1430 ena_destroy_all_io_queues(struct ena_adapter *adapter)
1431 {
1432 struct ena_que *queue;
1433 int i;
1434
1435 for (i = 0; i < adapter->num_io_queues; i++) {
1436 queue = &adapter->que[i];
1437 while (taskqueue_cancel(queue->cleanup_tq,
1438 &queue->cleanup_task, NULL))
1439 taskqueue_drain(queue->cleanup_tq,
1440 &queue->cleanup_task);
1441 taskqueue_free(queue->cleanup_tq);
1442 }
1443
1444 ena_destroy_all_tx_queues(adapter);
1445 ena_destroy_all_rx_queues(adapter);
1446 }
1447
1448 static int
1449 ena_create_io_queues(struct ena_adapter *adapter)
1450 {
1451 struct ena_com_dev *ena_dev = adapter->ena_dev;
1452 struct ena_com_create_io_ctx ctx;
1453 struct ena_ring *ring;
1454 struct ena_que *queue;
1455 uint16_t ena_qid;
1456 uint32_t msix_vector;
1457 int rc, i;
1458
1459 /* Create TX queues */
1460 for (i = 0; i < adapter->num_io_queues; i++) {
1461 msix_vector = ENA_IO_IRQ_IDX(i);
1462 ena_qid = ENA_IO_TXQ_IDX(i);
1463 ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1464 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1465 ctx.queue_size = adapter->requested_tx_ring_size;
1466 ctx.msix_vector = msix_vector;
1467 ctx.qid = ena_qid;
1468 rc = ena_com_create_io_queue(ena_dev, &ctx);
1469 if (rc != 0) {
1470 device_printf(adapter->pdev,
1471 "Failed to create io TX queue #%d rc: %d\n", i, rc);
1472 goto err_tx;
1473 }
1474 ring = &adapter->tx_ring[i];
1475 rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1476 &ring->ena_com_io_sq,
1477 &ring->ena_com_io_cq);
1478 if (rc != 0) {
1479 device_printf(adapter->pdev,
1480 "Failed to get TX queue handlers. TX queue num"
1481 " %d rc: %d\n", i, rc);
1482 ena_com_destroy_io_queue(ena_dev, ena_qid);
1483 goto err_tx;
1484 }
1485 }
1486
1487 /* Create RX queues */
1488 for (i = 0; i < adapter->num_io_queues; i++) {
1489 msix_vector = ENA_IO_IRQ_IDX(i);
1490 ena_qid = ENA_IO_RXQ_IDX(i);
1491 ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1492 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1493 ctx.queue_size = adapter->requested_rx_ring_size;
1494 ctx.msix_vector = msix_vector;
1495 ctx.qid = ena_qid;
1496 rc = ena_com_create_io_queue(ena_dev, &ctx);
1497 if (unlikely(rc != 0)) {
1498 device_printf(adapter->pdev,
1499 "Failed to create io RX queue[%d] rc: %d\n", i, rc);
1500 goto err_rx;
1501 }
1502
1503 ring = &adapter->rx_ring[i];
1504 rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1505 &ring->ena_com_io_sq,
1506 &ring->ena_com_io_cq);
1507 if (unlikely(rc != 0)) {
1508 device_printf(adapter->pdev,
1509 "Failed to get RX queue handlers. RX queue num"
1510 " %d rc: %d\n", i, rc);
1511 ena_com_destroy_io_queue(ena_dev, ena_qid);
1512 goto err_rx;
1513 }
1514 }
1515
1516 for (i = 0; i < adapter->num_io_queues; i++) {
1517 queue = &adapter->que[i];
1518
1519 NET_TASK_INIT(&queue->cleanup_task, 0, ena_cleanup, queue);
1520 queue->cleanup_tq = taskqueue_create_fast("ena cleanup",
1521 M_WAITOK, taskqueue_thread_enqueue, &queue->cleanup_tq);
1522
1523 taskqueue_start_threads(&queue->cleanup_tq, 1, PI_NET,
1524 "%s queue %d cleanup",
1525 device_get_nameunit(adapter->pdev), i);
1526 }
1527
1528 return (0);
1529
1530 err_rx:
1531 while (i--)
1532 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
1533 i = adapter->num_io_queues;
1534 err_tx:
1535 while (i--)
1536 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
1537
1538 return (ENXIO);
1539 }
1540
1541 /*********************************************************************
1542 *
1543 * MSIX & Interrupt Service routine
1544 *
1545 **********************************************************************/
1546
1547 /**
1548 * ena_handle_msix - MSIX Interrupt Handler for admin/async queue
1549 * @arg: interrupt number
1550 **/
1551 static void
1552 ena_intr_msix_mgmnt(void *arg)
1553 {
1554 struct ena_adapter *adapter = (struct ena_adapter *)arg;
1555
1556 ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1557 if (likely(ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)))
1558 ena_com_aenq_intr_handler(adapter->ena_dev, arg);
1559 }
1560
1561 /**
1562 * ena_handle_msix - MSIX Interrupt Handler for Tx/Rx
1563 * @arg: queue
1564 **/
1565 static int
1566 ena_handle_msix(void *arg)
1567 {
1568 struct ena_que *queue = arg;
1569 struct ena_adapter *adapter = queue->adapter;
1570 if_t ifp = adapter->ifp;
1571
1572 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
1573 return (FILTER_STRAY);
1574
1575 taskqueue_enqueue(queue->cleanup_tq, &queue->cleanup_task);
1576
1577 return (FILTER_HANDLED);
1578 }
1579
1580 static int
1581 ena_enable_msix(struct ena_adapter *adapter)
1582 {
1583 device_t dev = adapter->pdev;
1584 int msix_vecs, msix_req;
1585 int i, rc = 0;
1586
1587 if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
1588 device_printf(dev, "Error, MSI-X is already enabled\n");
1589 return (EINVAL);
1590 }
1591
1592 /* Reserved the max msix vectors we might need */
1593 msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
1594
1595 adapter->msix_entries = malloc(msix_vecs * sizeof(struct msix_entry),
1596 M_DEVBUF, M_WAITOK | M_ZERO);
1597
1598 ena_trace(NULL, ENA_DBG, "trying to enable MSI-X, vectors: %d\n", msix_vecs);
1599
1600 for (i = 0; i < msix_vecs; i++) {
1601 adapter->msix_entries[i].entry = i;
1602 /* Vectors must start from 1 */
1603 adapter->msix_entries[i].vector = i + 1;
1604 }
1605
1606 msix_req = msix_vecs;
1607 rc = pci_alloc_msix(dev, &msix_vecs);
1608 if (unlikely(rc != 0)) {
1609 device_printf(dev,
1610 "Failed to enable MSIX, vectors %d rc %d\n", msix_vecs, rc);
1611
1612 rc = ENOSPC;
1613 goto err_msix_free;
1614 }
1615
1616 if (msix_vecs != msix_req) {
1617 if (msix_vecs == ENA_ADMIN_MSIX_VEC) {
1618 device_printf(dev,
1619 "Not enough number of MSI-x allocated: %d\n",
1620 msix_vecs);
1621 pci_release_msi(dev);
1622 rc = ENOSPC;
1623 goto err_msix_free;
1624 }
1625 device_printf(dev, "Enable only %d MSI-x (out of %d), reduce "
1626 "the number of queues\n", msix_vecs, msix_req);
1627 }
1628
1629 adapter->msix_vecs = msix_vecs;
1630 ENA_FLAG_SET_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
1631
1632 return (0);
1633
1634 err_msix_free:
1635 free(adapter->msix_entries, M_DEVBUF);
1636 adapter->msix_entries = NULL;
1637
1638 return (rc);
1639 }
1640
1641 static void
1642 ena_setup_mgmnt_intr(struct ena_adapter *adapter)
1643 {
1644
1645 snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
1646 ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
1647 device_get_nameunit(adapter->pdev));
1648 /*
1649 * Handler is NULL on purpose, it will be set
1650 * when mgmnt interrupt is acquired
1651 */
1652 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = NULL;
1653 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
1654 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
1655 adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector;
1656 }
1657
1658 static int
1659 ena_setup_io_intr(struct ena_adapter *adapter)
1660 {
1661 static int last_bind_cpu = -1;
1662 int irq_idx;
1663
1664 if (adapter->msix_entries == NULL)
1665 return (EINVAL);
1666
1667 for (int i = 0; i < adapter->num_io_queues; i++) {
1668 irq_idx = ENA_IO_IRQ_IDX(i);
1669
1670 snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
1671 "%s-TxRx-%d", device_get_nameunit(adapter->pdev), i);
1672 adapter->irq_tbl[irq_idx].handler = ena_handle_msix;
1673 adapter->irq_tbl[irq_idx].data = &adapter->que[i];
1674 adapter->irq_tbl[irq_idx].vector =
1675 adapter->msix_entries[irq_idx].vector;
1676 ena_trace(NULL, ENA_INFO | ENA_IOQ, "ena_setup_io_intr vector: %d\n",
1677 adapter->msix_entries[irq_idx].vector);
1678
1679 /*
1680 * We want to bind rings to the corresponding cpu
1681 * using something similar to the RSS round-robin technique.
1682 */
1683 if (unlikely(last_bind_cpu < 0))
1684 last_bind_cpu = CPU_FIRST();
1685 adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
1686 last_bind_cpu;
1687 last_bind_cpu = CPU_NEXT(last_bind_cpu);
1688 }
1689
1690 return (0);
1691 }
1692
1693 static int
1694 ena_request_mgmnt_irq(struct ena_adapter *adapter)
1695 {
1696 struct ena_irq *irq;
1697 unsigned long flags;
1698 int rc, rcc;
1699
1700 flags = RF_ACTIVE | RF_SHAREABLE;
1701
1702 irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1703 irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
1704 &irq->vector, flags);
1705
1706 if (unlikely(irq->res == NULL)) {
1707 device_printf(adapter->pdev, "could not allocate "
1708 "irq vector: %d\n", irq->vector);
1709 return (ENXIO);
1710 }
1711
1712 rc = bus_setup_intr(adapter->pdev, irq->res,
1713 INTR_TYPE_NET | INTR_MPSAFE, NULL, ena_intr_msix_mgmnt,
1714 irq->data, &irq->cookie);
1715 if (unlikely(rc != 0)) {
1716 device_printf(adapter->pdev, "failed to register "
1717 "interrupt handler for irq %ju: %d\n",
1718 rman_get_start(irq->res), rc);
1719 goto err_res_free;
1720 }
1721 irq->requested = true;
1722
1723 return (rc);
1724
1725 err_res_free:
1726 ena_trace(NULL, ENA_INFO | ENA_ADMQ, "releasing resource for irq %d\n",
1727 irq->vector);
1728 rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1729 irq->vector, irq->res);
1730 if (unlikely(rcc != 0))
1731 device_printf(adapter->pdev, "dev has no parent while "
1732 "releasing res for irq: %d\n", irq->vector);
1733 irq->res = NULL;
1734
1735 return (rc);
1736 }
1737
1738 static int
1739 ena_request_io_irq(struct ena_adapter *adapter)
1740 {
1741 struct ena_irq *irq;
1742 unsigned long flags = 0;
1743 int rc = 0, i, rcc;
1744
1745 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter))) {
1746 device_printf(adapter->pdev,
1747 "failed to request I/O IRQ: MSI-X is not enabled\n");
1748 return (EINVAL);
1749 } else {
1750 flags = RF_ACTIVE | RF_SHAREABLE;
1751 }
1752
1753 for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1754 irq = &adapter->irq_tbl[i];
1755
1756 if (unlikely(irq->requested))
1757 continue;
1758
1759 irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
1760 &irq->vector, flags);
1761 if (unlikely(irq->res == NULL)) {
1762 rc = ENOMEM;
1763 device_printf(adapter->pdev, "could not allocate "
1764 "irq vector: %d\n", irq->vector);
1765 goto err;
1766 }
1767
1768 rc = bus_setup_intr(adapter->pdev, irq->res,
1769 INTR_TYPE_NET | INTR_MPSAFE, irq->handler, NULL,
1770 irq->data, &irq->cookie);
1771 if (unlikely(rc != 0)) {
1772 device_printf(adapter->pdev, "failed to register "
1773 "interrupt handler for irq %ju: %d\n",
1774 rman_get_start(irq->res), rc);
1775 goto err;
1776 }
1777 irq->requested = true;
1778
1779 ena_trace(NULL, ENA_INFO, "queue %d - cpu %d\n",
1780 i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
1781 }
1782
1783 return (rc);
1784
1785 err:
1786
1787 for (; i >= ENA_IO_IRQ_FIRST_IDX; i--) {
1788 irq = &adapter->irq_tbl[i];
1789 rcc = 0;
1790
1791 /* Once we entered err: section and irq->requested is true we
1792 free both intr and resources */
1793 if (irq->requested)
1794 rcc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
1795 if (unlikely(rcc != 0))
1796 device_printf(adapter->pdev, "could not release"
1797 " irq: %d, error: %d\n", irq->vector, rcc);
1798
1799 /* If we entred err: section without irq->requested set we know
1800 it was bus_alloc_resource_any() that needs cleanup, provided
1801 res is not NULL. In case res is NULL no work in needed in
1802 this iteration */
1803 rcc = 0;
1804 if (irq->res != NULL) {
1805 rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1806 irq->vector, irq->res);
1807 }
1808 if (unlikely(rcc != 0))
1809 device_printf(adapter->pdev, "dev has no parent while "
1810 "releasing res for irq: %d\n", irq->vector);
1811 irq->requested = false;
1812 irq->res = NULL;
1813 }
1814
1815 return (rc);
1816 }
1817
1818 static void
1819 ena_free_mgmnt_irq(struct ena_adapter *adapter)
1820 {
1821 struct ena_irq *irq;
1822 int rc;
1823
1824 irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1825 if (irq->requested) {
1826 ena_trace(NULL, ENA_INFO | ENA_ADMQ, "tear down irq: %d\n",
1827 irq->vector);
1828 rc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
1829 if (unlikely(rc != 0))
1830 device_printf(adapter->pdev, "failed to tear "
1831 "down irq: %d\n", irq->vector);
1832 irq->requested = 0;
1833 }
1834
1835 if (irq->res != NULL) {
1836 ena_trace(NULL, ENA_INFO | ENA_ADMQ, "release resource irq: %d\n",
1837 irq->vector);
1838 rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1839 irq->vector, irq->res);
1840 irq->res = NULL;
1841 if (unlikely(rc != 0))
1842 device_printf(adapter->pdev, "dev has no parent while "
1843 "releasing res for irq: %d\n", irq->vector);
1844 }
1845 }
1846
1847 static void
1848 ena_free_io_irq(struct ena_adapter *adapter)
1849 {
1850 struct ena_irq *irq;
1851 int rc;
1852
1853 for (int i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1854 irq = &adapter->irq_tbl[i];
1855 if (irq->requested) {
1856 ena_trace(NULL, ENA_INFO | ENA_IOQ, "tear down irq: %d\n",
1857 irq->vector);
1858 rc = bus_teardown_intr(adapter->pdev, irq->res,
1859 irq->cookie);
1860 if (unlikely(rc != 0)) {
1861 device_printf(adapter->pdev, "failed to tear "
1862 "down irq: %d\n", irq->vector);
1863 }
1864 irq->requested = 0;
1865 }
1866
1867 if (irq->res != NULL) {
1868 ena_trace(NULL, ENA_INFO | ENA_IOQ, "release resource irq: %d\n",
1869 irq->vector);
1870 rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1871 irq->vector, irq->res);
1872 irq->res = NULL;
1873 if (unlikely(rc != 0)) {
1874 device_printf(adapter->pdev, "dev has no parent"
1875 " while releasing res for irq: %d\n",
1876 irq->vector);
1877 }
1878 }
1879 }
1880 }
1881
1882 static void
1883 ena_free_irqs(struct ena_adapter* adapter)
1884 {
1885
1886 ena_free_io_irq(adapter);
1887 ena_free_mgmnt_irq(adapter);
1888 ena_disable_msix(adapter);
1889 }
1890
1891 static void
1892 ena_disable_msix(struct ena_adapter *adapter)
1893 {
1894
1895 if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
1896 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
1897 pci_release_msi(adapter->pdev);
1898 }
1899
1900 adapter->msix_vecs = 0;
1901 if (adapter->msix_entries != NULL)
1902 free(adapter->msix_entries, M_DEVBUF);
1903 adapter->msix_entries = NULL;
1904 }
1905
1906 static void
1907 ena_unmask_all_io_irqs(struct ena_adapter *adapter)
1908 {
1909 struct ena_com_io_cq* io_cq;
1910 struct ena_eth_io_intr_reg intr_reg;
1911 uint16_t ena_qid;
1912 int i;
1913
1914 /* Unmask interrupts for all queues */
1915 for (i = 0; i < adapter->num_io_queues; i++) {
1916 ena_qid = ENA_IO_TXQ_IDX(i);
1917 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1918 ena_com_update_intr_reg(&intr_reg, 0, 0, true);
1919 ena_com_unmask_intr(io_cq, &intr_reg);
1920 }
1921 }
1922
1923 /* Configure the Rx forwarding */
1924 static int
1925 ena_rss_configure(struct ena_adapter *adapter)
1926 {
1927 struct ena_com_dev *ena_dev = adapter->ena_dev;
1928 int rc;
1929
1930 /* In case the RSS table was destroyed */
1931 if (!ena_dev->rss.tbl_log_size) {
1932 rc = ena_rss_init_default(adapter);
1933 if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
1934 device_printf(adapter->pdev,
1935 "WARNING: RSS was not properly re-initialized,"
1936 " it will affect bandwidth\n");
1937 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_RSS_ACTIVE, adapter);
1938 return (rc);
1939 }
1940 }
1941
1942 /* Set indirect table */
1943 rc = ena_com_indirect_table_set(ena_dev);
1944 if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
1945 return (rc);
1946
1947 /* Configure hash function (if supported) */
1948 rc = ena_com_set_hash_function(ena_dev);
1949 if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
1950 return (rc);
1951
1952 /* Configure hash inputs (if supported) */
1953 rc = ena_com_set_hash_ctrl(ena_dev);
1954 if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
1955 return (rc);
1956
1957 return (0);
1958 }
1959
1960 static int
1961 ena_up_complete(struct ena_adapter *adapter)
1962 {
1963 int rc;
1964
1965 if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
1966 rc = ena_rss_configure(adapter);
1967 if (rc != 0) {
1968 device_printf(adapter->pdev,
1969 "Failed to configure RSS\n");
1970 return (rc);
1971 }
1972 }
1973
1974 rc = ena_change_mtu(adapter->ifp, adapter->ifp->if_mtu);
1975 if (unlikely(rc != 0))
1976 return (rc);
1977
1978 ena_refill_all_rx_bufs(adapter);
1979 ena_reset_counters((counter_u64_t *)&adapter->hw_stats,
1980 sizeof(adapter->hw_stats));
1981
1982 return (0);
1983 }
1984
1985 static void
1986 set_io_rings_size(struct ena_adapter *adapter, int new_tx_size,
1987 int new_rx_size)
1988 {
1989 int i;
1990
1991 for (i = 0; i < adapter->num_io_queues; i++) {
1992 adapter->tx_ring[i].ring_size = new_tx_size;
1993 adapter->rx_ring[i].ring_size = new_rx_size;
1994 }
1995 }
1996
1997 static int
1998 create_queues_with_size_backoff(struct ena_adapter *adapter)
1999 {
2000 int rc;
2001 uint32_t cur_rx_ring_size, cur_tx_ring_size;
2002 uint32_t new_rx_ring_size, new_tx_ring_size;
2003
2004 /*
2005 * Current queue sizes might be set to smaller than the requested
2006 * ones due to past queue allocation failures.
2007 */
2008 set_io_rings_size(adapter, adapter->requested_tx_ring_size,
2009 adapter->requested_rx_ring_size);
2010
2011 while (1) {
2012 /* Allocate transmit descriptors */
2013 rc = ena_setup_all_tx_resources(adapter);
2014 if (unlikely(rc != 0)) {
2015 ena_trace(NULL, ENA_ALERT, "err_setup_tx\n");
2016 goto err_setup_tx;
2017 }
2018
2019 /* Allocate receive descriptors */
2020 rc = ena_setup_all_rx_resources(adapter);
2021 if (unlikely(rc != 0)) {
2022 ena_trace(NULL, ENA_ALERT, "err_setup_rx\n");
2023 goto err_setup_rx;
2024 }
2025
2026 /* Create IO queues for Rx & Tx */
2027 rc = ena_create_io_queues(adapter);
2028 if (unlikely(rc != 0)) {
2029 ena_trace(NULL, ENA_ALERT,
2030 "create IO queues failed\n");
2031 goto err_io_que;
2032 }
2033
2034 return (0);
2035
2036 err_io_que:
2037 ena_free_all_rx_resources(adapter);
2038 err_setup_rx:
2039 ena_free_all_tx_resources(adapter);
2040 err_setup_tx:
2041 /*
2042 * Lower the ring size if ENOMEM. Otherwise, return the
2043 * error straightaway.
2044 */
2045 if (unlikely(rc != ENOMEM)) {
2046 ena_trace(NULL, ENA_ALERT,
2047 "Queue creation failed with error code: %d\n", rc);
2048 return (rc);
2049 }
2050
2051 cur_tx_ring_size = adapter->tx_ring[0].ring_size;
2052 cur_rx_ring_size = adapter->rx_ring[0].ring_size;
2053
2054 device_printf(adapter->pdev,
2055 "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
2056 cur_tx_ring_size, cur_rx_ring_size);
2057
2058 new_tx_ring_size = cur_tx_ring_size;
2059 new_rx_ring_size = cur_rx_ring_size;
2060
2061 /*
2062 * Decrease the size of a larger queue, or decrease both if they are
2063 * the same size.
2064 */
2065 if (cur_rx_ring_size <= cur_tx_ring_size)
2066 new_tx_ring_size = cur_tx_ring_size / 2;
2067 if (cur_rx_ring_size >= cur_tx_ring_size)
2068 new_rx_ring_size = cur_rx_ring_size / 2;
2069
2070 if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
2071 new_rx_ring_size < ENA_MIN_RING_SIZE) {
2072 device_printf(adapter->pdev,
2073 "Queue creation failed with the smallest possible queue size"
2074 "of %d for both queues. Not retrying with smaller queues\n",
2075 ENA_MIN_RING_SIZE);
2076 return (rc);
2077 }
2078
2079 set_io_rings_size(adapter, new_tx_ring_size, new_rx_ring_size);
2080 }
2081 }
2082
2083 int
2084 ena_up(struct ena_adapter *adapter)
2085 {
2086 int rc = 0;
2087
2088 if (unlikely(device_is_attached(adapter->pdev) == 0)) {
2089 device_printf(adapter->pdev, "device is not attached!\n");
2090 return (ENXIO);
2091 }
2092
2093 if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
2094 return (0);
2095
2096 device_printf(adapter->pdev, "device is going UP\n");
2097
2098 /* setup interrupts for IO queues */
2099 rc = ena_setup_io_intr(adapter);
2100 if (unlikely(rc != 0)) {
2101 ena_trace(NULL, ENA_ALERT, "error setting up IO interrupt\n");
2102 goto error;
2103 }
2104 rc = ena_request_io_irq(adapter);
2105 if (unlikely(rc != 0)) {
2106 ena_trace(NULL, ENA_ALERT, "err_req_irq\n");
2107 goto error;
2108 }
2109
2110 device_printf(adapter->pdev,
2111 "Creating %u IO queues. Rx queue size: %d, Tx queue size: %d, "
2112 "LLQ is %s\n",
2113 adapter->num_io_queues,
2114 adapter->requested_rx_ring_size,
2115 adapter->requested_tx_ring_size,
2116 (adapter->ena_dev->tx_mem_queue_type ==
2117 ENA_ADMIN_PLACEMENT_POLICY_DEV) ? "ENABLED" : "DISABLED");
2118
2119 rc = create_queues_with_size_backoff(adapter);
2120 if (unlikely(rc != 0)) {
2121 ena_trace(NULL, ENA_ALERT,
2122 "error creating queues with size backoff\n");
2123 goto err_create_queues_with_backoff;
2124 }
2125
2126 if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
2127 if_link_state_change(adapter->ifp, LINK_STATE_UP);
2128
2129 rc = ena_up_complete(adapter);
2130 if (unlikely(rc != 0))
2131 goto err_up_complete;
2132
2133 counter_u64_add(adapter->dev_stats.interface_up, 1);
2134
2135 ena_update_hwassist(adapter);
2136
2137 if_setdrvflagbits(adapter->ifp, IFF_DRV_RUNNING,
2138 IFF_DRV_OACTIVE);
2139
2140 /* Activate timer service only if the device is running.
2141 * If this flag is not set, it means that the driver is being
2142 * reset and timer service will be activated afterwards.
2143 */
2144 if (ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)) {
2145 callout_reset_sbt(&adapter->timer_service, SBT_1S,
2146 SBT_1S, ena_timer_service, (void *)adapter, 0);
2147 }
2148
2149 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP, adapter);
2150
2151 ena_unmask_all_io_irqs(adapter);
2152
2153 return (0);
2154
2155 err_up_complete:
2156 ena_destroy_all_io_queues(adapter);
2157 ena_free_all_rx_resources(adapter);
2158 ena_free_all_tx_resources(adapter);
2159 err_create_queues_with_backoff:
2160 ena_free_io_irq(adapter);
2161 error:
2162 return (rc);
2163 }
2164
2165 static uint64_t
2166 ena_get_counter(if_t ifp, ift_counter cnt)
2167 {
2168 struct ena_adapter *adapter;
2169 struct ena_hw_stats *stats;
2170
2171 adapter = if_getsoftc(ifp);
2172 stats = &adapter->hw_stats;
2173
2174 switch (cnt) {
2175 case IFCOUNTER_IPACKETS:
2176 return (counter_u64_fetch(stats->rx_packets));
2177 case IFCOUNTER_OPACKETS:
2178 return (counter_u64_fetch(stats->tx_packets));
2179 case IFCOUNTER_IBYTES:
2180 return (counter_u64_fetch(stats->rx_bytes));
2181 case IFCOUNTER_OBYTES:
2182 return (counter_u64_fetch(stats->tx_bytes));
2183 case IFCOUNTER_IQDROPS:
2184 return (counter_u64_fetch(stats->rx_drops));
2185 case IFCOUNTER_OQDROPS:
2186 return (counter_u64_fetch(stats->tx_drops));
2187 default:
2188 return (if_get_counter_default(ifp, cnt));
2189 }
2190 }
2191
2192 static int
2193 ena_media_change(if_t ifp)
2194 {
2195 /* Media Change is not supported by firmware */
2196 return (0);
2197 }
2198
2199 static void
2200 ena_media_status(if_t ifp, struct ifmediareq *ifmr)
2201 {
2202 struct ena_adapter *adapter = if_getsoftc(ifp);
2203 ena_trace(NULL, ENA_DBG, "enter\n");
2204
2205 ENA_LOCK_LOCK(adapter);
2206
2207 ifmr->ifm_status = IFM_AVALID;
2208 ifmr->ifm_active = IFM_ETHER;
2209
2210 if (!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) {
2211 ENA_LOCK_UNLOCK(adapter);
2212 ena_trace(NULL, ENA_INFO, "Link is down\n");
2213 return;
2214 }
2215
2216 ifmr->ifm_status |= IFM_ACTIVE;
2217 ifmr->ifm_active |= IFM_UNKNOWN | IFM_FDX;
2218
2219 ENA_LOCK_UNLOCK(adapter);
2220 }
2221
2222 static void
2223 ena_init(void *arg)
2224 {
2225 struct ena_adapter *adapter = (struct ena_adapter *)arg;
2226
2227 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) {
2228 ENA_LOCK_LOCK(adapter);
2229 ena_up(adapter);
2230 ENA_LOCK_UNLOCK(adapter);
2231 }
2232 }
2233
2234 static int
2235 ena_ioctl(if_t ifp, u_long command, caddr_t data)
2236 {
2237 struct ena_adapter *adapter;
2238 struct ifreq *ifr;
2239 int rc;
2240
2241 adapter = ifp->if_softc;
2242 ifr = (struct ifreq *)data;
2243
2244 /*
2245 * Acquiring lock to prevent from running up and down routines parallel.
2246 */
2247 rc = 0;
2248 switch (command) {
2249 case SIOCSIFMTU:
2250 if (ifp->if_mtu == ifr->ifr_mtu)
2251 break;
2252 ENA_LOCK_LOCK(adapter);
2253 ena_down(adapter);
2254
2255 ena_change_mtu(ifp, ifr->ifr_mtu);
2256
2257 rc = ena_up(adapter);
2258 ENA_LOCK_UNLOCK(adapter);
2259 break;
2260
2261 case SIOCSIFFLAGS:
2262 if ((ifp->if_flags & IFF_UP) != 0) {
2263 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2264 if ((ifp->if_flags & (IFF_PROMISC |
2265 IFF_ALLMULTI)) != 0) {
2266 device_printf(adapter->pdev,
2267 "ioctl promisc/allmulti\n");
2268 }
2269 } else {
2270 ENA_LOCK_LOCK(adapter);
2271 rc = ena_up(adapter);
2272 ENA_LOCK_UNLOCK(adapter);
2273 }
2274 } else {
2275 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2276 ENA_LOCK_LOCK(adapter);
2277 ena_down(adapter);
2278 ENA_LOCK_UNLOCK(adapter);
2279 }
2280 }
2281 break;
2282
2283 case SIOCADDMULTI:
2284 case SIOCDELMULTI:
2285 break;
2286
2287 case SIOCSIFMEDIA:
2288 case SIOCGIFMEDIA:
2289 rc = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
2290 break;
2291
2292 case SIOCSIFCAP:
2293 {
2294 int reinit = 0;
2295
2296 if (ifr->ifr_reqcap != ifp->if_capenable) {
2297 ifp->if_capenable = ifr->ifr_reqcap;
2298 reinit = 1;
2299 }
2300
2301 if ((reinit != 0) &&
2302 ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0)) {
2303 ENA_LOCK_LOCK(adapter);
2304 ena_down(adapter);
2305 rc = ena_up(adapter);
2306 ENA_LOCK_UNLOCK(adapter);
2307 }
2308 }
2309
2310 break;
2311 default:
2312 rc = ether_ioctl(ifp, command, data);
2313 break;
2314 }
2315
2316 return (rc);
2317 }
2318
2319 static int
2320 ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *feat)
2321 {
2322 int caps = 0;
2323
2324 if ((feat->offload.tx &
2325 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2326 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK |
2327 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)) != 0)
2328 caps |= IFCAP_TXCSUM;
2329
2330 if ((feat->offload.tx &
2331 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK |
2332 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)) != 0)
2333 caps |= IFCAP_TXCSUM_IPV6;
2334
2335 if ((feat->offload.tx &
2336 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) != 0)
2337 caps |= IFCAP_TSO4;
2338
2339 if ((feat->offload.tx &
2340 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) != 0)
2341 caps |= IFCAP_TSO6;
2342
2343 if ((feat->offload.rx_supported &
2344 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK |
2345 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK)) != 0)
2346 caps |= IFCAP_RXCSUM;
2347
2348 if ((feat->offload.rx_supported &
2349 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) != 0)
2350 caps |= IFCAP_RXCSUM_IPV6;
2351
2352 caps |= IFCAP_LRO | IFCAP_JUMBO_MTU;
2353
2354 return (caps);
2355 }
2356
2357 static void
2358 ena_update_host_info(struct ena_admin_host_info *host_info, if_t ifp)
2359 {
2360
2361 host_info->supported_network_features[0] =
2362 (uint32_t)if_getcapabilities(ifp);
2363 }
2364
2365 static void
2366 ena_update_hwassist(struct ena_adapter *adapter)
2367 {
2368 if_t ifp = adapter->ifp;
2369 uint32_t feat = adapter->tx_offload_cap;
2370 int cap = if_getcapenable(ifp);
2371 int flags = 0;
2372
2373 if_clearhwassist(ifp);
2374
2375 if ((cap & IFCAP_TXCSUM) != 0) {
2376 if ((feat &
2377 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) != 0)
2378 flags |= CSUM_IP;
2379 if ((feat &
2380 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2381 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)) != 0)
2382 flags |= CSUM_IP_UDP | CSUM_IP_TCP;
2383 }
2384
2385 if ((cap & IFCAP_TXCSUM_IPV6) != 0)
2386 flags |= CSUM_IP6_UDP | CSUM_IP6_TCP;
2387
2388 if ((cap & IFCAP_TSO4) != 0)
2389 flags |= CSUM_IP_TSO;
2390
2391 if ((cap & IFCAP_TSO6) != 0)
2392 flags |= CSUM_IP6_TSO;
2393
2394 if_sethwassistbits(ifp, flags, 0);
2395 }
2396
2397 static int
2398 ena_setup_ifnet(device_t pdev, struct ena_adapter *adapter,
2399 struct ena_com_dev_get_features_ctx *feat)
2400 {
2401 if_t ifp;
2402 int caps = 0;
2403
2404 ifp = adapter->ifp = if_gethandle(IFT_ETHER);
2405 if (unlikely(ifp == NULL)) {
2406 ena_trace(NULL, ENA_ALERT, "can not allocate ifnet structure\n");
2407 return (ENXIO);
2408 }
2409 if_initname(ifp, device_get_name(pdev), device_get_unit(pdev));
2410 if_setdev(ifp, pdev);
2411 if_setsoftc(ifp, adapter);
2412
2413 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
2414 IFF_KNOWSEPOCH);
2415 if_setinitfn(ifp, ena_init);
2416 if_settransmitfn(ifp, ena_mq_start);
2417 if_setqflushfn(ifp, ena_qflush);
2418 if_setioctlfn(ifp, ena_ioctl);
2419 if_setgetcounterfn(ifp, ena_get_counter);
2420
2421 if_setsendqlen(ifp, adapter->requested_tx_ring_size);
2422 if_setsendqready(ifp);
2423 if_setmtu(ifp, ETHERMTU);
2424 if_setbaudrate(ifp, 0);
2425 /* Zeroize capabilities... */
2426 if_setcapabilities(ifp, 0);
2427 if_setcapenable(ifp, 0);
2428 /* check hardware support */
2429 caps = ena_get_dev_offloads(feat);
2430 /* ... and set them */
2431 if_setcapabilitiesbit(ifp, caps, 0);
2432
2433 /* TSO parameters */
2434 ifp->if_hw_tsomax = ENA_TSO_MAXSIZE -
2435 (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
2436 ifp->if_hw_tsomaxsegcount = adapter->max_tx_sgl_size - 1;
2437 ifp->if_hw_tsomaxsegsize = ENA_TSO_MAXSIZE;
2438
2439 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
2440 if_setcapenable(ifp, if_getcapabilities(ifp));
2441
2442 /*
2443 * Specify the media types supported by this adapter and register
2444 * callbacks to update media and link information
2445 */
2446 ifmedia_init(&adapter->media, IFM_IMASK,
2447 ena_media_change, ena_media_status);
2448 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2449 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2450
2451 ether_ifattach(ifp, adapter->mac_addr);
2452
2453 return (0);
2454 }
2455
2456 void
2457 ena_down(struct ena_adapter *adapter)
2458 {
2459 int rc;
2460
2461 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
2462 return;
2463
2464 device_printf(adapter->pdev, "device is going DOWN\n");
2465
2466 callout_drain(&adapter->timer_service);
2467
2468 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP, adapter);
2469 if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE,
2470 IFF_DRV_RUNNING);
2471
2472 ena_free_io_irq(adapter);
2473
2474 if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) {
2475 rc = ena_com_dev_reset(adapter->ena_dev,
2476 adapter->reset_reason);
2477 if (unlikely(rc != 0))
2478 device_printf(adapter->pdev,
2479 "Device reset failed\n");
2480 }
2481
2482 ena_destroy_all_io_queues(adapter);
2483
2484 ena_free_all_tx_bufs(adapter);
2485 ena_free_all_rx_bufs(adapter);
2486 ena_free_all_tx_resources(adapter);
2487 ena_free_all_rx_resources(adapter);
2488
2489 counter_u64_add(adapter->dev_stats.interface_down, 1);
2490 }
2491
2492 static uint32_t
2493 ena_calc_max_io_queue_num(device_t pdev, struct ena_com_dev *ena_dev,
2494 struct ena_com_dev_get_features_ctx *get_feat_ctx)
2495 {
2496 uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
2497
2498 /* Regular queues capabilities */
2499 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
2500 struct ena_admin_queue_ext_feature_fields *max_queue_ext =
2501 &get_feat_ctx->max_queue_ext.max_queue_ext;
2502 io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num,
2503 max_queue_ext->max_rx_cq_num);
2504
2505 io_tx_sq_num = max_queue_ext->max_tx_sq_num;
2506 io_tx_cq_num = max_queue_ext->max_tx_cq_num;
2507 } else {
2508 struct ena_admin_queue_feature_desc *max_queues =
2509 &get_feat_ctx->max_queues;
2510 io_tx_sq_num = max_queues->max_sq_num;
2511 io_tx_cq_num = max_queues->max_cq_num;
2512 io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num);
2513 }
2514
2515 /* In case of LLQ use the llq fields for the tx SQ/CQ */
2516 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
2517 io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
2518
2519 max_num_io_queues = min_t(uint32_t, mp_ncpus, ENA_MAX_NUM_IO_QUEUES);
2520 max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_rx_num);
2521 max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_sq_num);
2522 max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_cq_num);
2523 /* 1 IRQ for for mgmnt and 1 IRQ for each TX/RX pair */
2524 max_num_io_queues = min_t(uint32_t, max_num_io_queues,
2525 pci_msix_count(pdev) - 1);
2526
2527 return (max_num_io_queues);
2528 }
2529
2530 static int
2531 ena_enable_wc(struct resource *res)
2532 {
2533 #if defined(__i386) || defined(__amd64) || defined(__aarch64__)
2534 vm_offset_t va;
2535 vm_size_t len;
2536 int rc;
2537
2538 va = (vm_offset_t)rman_get_virtual(res);
2539 len = rman_get_size(res);
2540 /* Enable write combining */
2541 rc = pmap_change_attr(va, len, VM_MEMATTR_WRITE_COMBINING);
2542 if (unlikely(rc != 0)) {
2543 ena_trace(NULL, ENA_ALERT, "pmap_change_attr failed, %d\n", rc);
2544 return (rc);
2545 }
2546
2547 return (0);
2548 #endif
2549 return (EOPNOTSUPP);
2550 }
2551
2552 static int
2553 ena_set_queues_placement_policy(device_t pdev, struct ena_com_dev *ena_dev,
2554 struct ena_admin_feature_llq_desc *llq,
2555 struct ena_llq_configurations *llq_default_configurations)
2556 {
2557 struct ena_adapter *adapter = device_get_softc(pdev);
2558 int rc, rid;
2559 uint32_t llq_feature_mask;
2560
2561 llq_feature_mask = 1 << ENA_ADMIN_LLQ;
2562 if (!(ena_dev->supported_features & llq_feature_mask)) {
2563 device_printf(pdev,
2564 "LLQ is not supported. Fallback to host mode policy.\n");
2565 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2566 return (0);
2567 }
2568
2569 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
2570 if (unlikely(rc != 0)) {
2571 device_printf(pdev, "Failed to configure the device mode. "
2572 "Fallback to host mode policy.\n");
2573 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2574 return (0);
2575 }
2576
2577 /* Nothing to config, exit */
2578 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
2579 return (0);
2580
2581 /* Try to allocate resources for LLQ bar */
2582 rid = PCIR_BAR(ENA_MEM_BAR);
2583 adapter->memory = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
2584 &rid, RF_ACTIVE);
2585 if (unlikely(adapter->memory == NULL)) {
2586 device_printf(pdev, "unable to allocate LLQ bar resource. "
2587 "Fallback to host mode policy.\n");
2588 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2589 return (0);
2590 }
2591
2592 /* Enable write combining for better LLQ performance */
2593 rc = ena_enable_wc(adapter->memory);
2594 if (unlikely(rc != 0)) {
2595 device_printf(pdev, "failed to enable write combining.\n");
2596 return (rc);
2597 }
2598
2599 /*
2600 * Save virtual address of the device's memory region
2601 * for the ena_com layer.
2602 */
2603 ena_dev->mem_bar = rman_get_virtual(adapter->memory);
2604
2605 return (0);
2606 }
2607
2608 static inline
2609 void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
2610 {
2611 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
2612 llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
2613 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
2614 llq_config->llq_num_decs_before_header =
2615 ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
2616 llq_config->llq_ring_entry_size_value = 128;
2617 }
2618
2619 static int
2620 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
2621 {
2622 struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
2623 struct ena_com_dev *ena_dev = ctx->ena_dev;
2624 uint32_t tx_queue_size = ENA_DEFAULT_RING_SIZE;
2625 uint32_t rx_queue_size = ENA_DEFAULT_RING_SIZE;
2626 uint32_t max_tx_queue_size;
2627 uint32_t max_rx_queue_size;
2628
2629 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
2630 struct ena_admin_queue_ext_feature_fields *max_queue_ext =
2631 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
2632 max_rx_queue_size = min_t(uint32_t,
2633 max_queue_ext->max_rx_cq_depth,
2634 max_queue_ext->max_rx_sq_depth);
2635 max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
2636
2637 if (ena_dev->tx_mem_queue_type ==
2638 ENA_ADMIN_PLACEMENT_POLICY_DEV)
2639 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2640 llq->max_llq_depth);
2641 else
2642 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2643 max_queue_ext->max_tx_sq_depth);
2644
2645 ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2646 max_queue_ext->max_per_packet_tx_descs);
2647 ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2648 max_queue_ext->max_per_packet_rx_descs);
2649 } else {
2650 struct ena_admin_queue_feature_desc *max_queues =
2651 &ctx->get_feat_ctx->max_queues;
2652 max_rx_queue_size = min_t(uint32_t,
2653 max_queues->max_cq_depth,
2654 max_queues->max_sq_depth);
2655 max_tx_queue_size = max_queues->max_cq_depth;
2656
2657 if (ena_dev->tx_mem_queue_type ==
2658 ENA_ADMIN_PLACEMENT_POLICY_DEV)
2659 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2660 llq->max_llq_depth);
2661 else
2662 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2663 max_queues->max_sq_depth);
2664
2665 ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2666 max_queues->max_packet_tx_descs);
2667 ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2668 max_queues->max_packet_rx_descs);
2669 }
2670
2671 /* round down to the nearest power of 2 */
2672 max_tx_queue_size = 1 << (flsl(max_tx_queue_size) - 1);
2673 max_rx_queue_size = 1 << (flsl(max_rx_queue_size) - 1);
2674
2675 tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
2676 max_tx_queue_size);
2677 rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
2678 max_rx_queue_size);
2679
2680 tx_queue_size = 1 << (flsl(tx_queue_size) - 1);
2681 rx_queue_size = 1 << (flsl(rx_queue_size) - 1);
2682
2683 ctx->max_tx_queue_size = max_tx_queue_size;
2684 ctx->max_rx_queue_size = max_rx_queue_size;
2685 ctx->tx_queue_size = tx_queue_size;
2686 ctx->rx_queue_size = rx_queue_size;
2687
2688 return (0);
2689 }
2690
2691 static int
2692 ena_rss_init_default(struct ena_adapter *adapter)
2693 {
2694 struct ena_com_dev *ena_dev = adapter->ena_dev;
2695 device_t dev = adapter->pdev;
2696 int qid, rc, i;
2697
2698 rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
2699 if (unlikely(rc != 0)) {
2700 device_printf(dev, "Cannot init indirect table\n");
2701 return (rc);
2702 }
2703
2704 for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
2705 qid = i % adapter->num_io_queues;
2706 rc = ena_com_indirect_table_fill_entry(ena_dev, i,
2707 ENA_IO_RXQ_IDX(qid));
2708 if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
2709 device_printf(dev, "Cannot fill indirect table\n");
2710 goto err_rss_destroy;
2711 }
2712 }
2713
2714 #ifdef RSS
2715 uint8_t rss_algo = rss_gethashalgo();
2716 if (rss_algo == RSS_HASH_TOEPLITZ) {
2717 uint8_t hash_key[RSS_KEYSIZE];
2718
2719 rss_getkey(hash_key);
2720 rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ,
2721 hash_key, RSS_KEYSIZE, 0xFFFFFFFF);
2722 } else
2723 #endif
2724 rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL,
2725 ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
2726 if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
2727 device_printf(dev, "Cannot fill hash function\n");
2728 goto err_rss_destroy;
2729 }
2730
2731 rc = ena_com_set_default_hash_ctrl(ena_dev);
2732 if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
2733 device_printf(dev, "Cannot fill hash control\n");
2734 goto err_rss_destroy;
2735 }
2736
2737 return (0);
2738
2739 err_rss_destroy:
2740 ena_com_rss_destroy(ena_dev);
2741 return (rc);
2742 }
2743
2744 static void
2745 ena_rss_init_default_deferred(void *arg)
2746 {
2747 struct ena_adapter *adapter;
2748 devclass_t dc;
2749 int max;
2750 int rc;
2751
2752 dc = devclass_find("ena");
2753 if (unlikely(dc == NULL)) {
2754 ena_trace(NULL, ENA_ALERT, "No devclass ena\n");
2755 return;
2756 }
2757
2758 max = devclass_get_maxunit(dc);
2759 while (max-- >= 0) {
2760 adapter = devclass_get_softc(dc, max);
2761 if (adapter != NULL) {
2762 rc = ena_rss_init_default(adapter);
2763 ENA_FLAG_SET_ATOMIC(ENA_FLAG_RSS_ACTIVE, adapter);
2764 if (unlikely(rc != 0)) {
2765 device_printf(adapter->pdev,
2766 "WARNING: RSS was not properly initialized,"
2767 " it will affect bandwidth\n");
2768 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_RSS_ACTIVE, adapter);
2769 }
2770 }
2771 }
2772 }
2773 SYSINIT(ena_rss_init, SI_SUB_KICK_SCHEDULER, SI_ORDER_SECOND, ena_rss_init_default_deferred, NULL);
2774
2775 static void
2776 ena_config_host_info(struct ena_com_dev *ena_dev, device_t dev)
2777 {
2778 struct ena_admin_host_info *host_info;
2779 uintptr_t rid;
2780 int rc;
2781
2782 /* Allocate only the host info */
2783 rc = ena_com_allocate_host_info(ena_dev);
2784 if (unlikely(rc != 0)) {
2785 ena_trace(NULL, ENA_ALERT, "Cannot allocate host info\n");
2786 return;
2787 }
2788
2789 host_info = ena_dev->host_attr.host_info;
2790
2791 if (pci_get_id(dev, PCI_ID_RID, &rid) == 0)
2792 host_info->bdf = rid;
2793 host_info->os_type = ENA_ADMIN_OS_FREEBSD;
2794 host_info->kernel_ver = osreldate;
2795
2796 sprintf(host_info->kernel_ver_str, "%d", osreldate);
2797 host_info->os_dist = 0;
2798 strncpy(host_info->os_dist_str, osrelease,
2799 sizeof(host_info->os_dist_str) - 1);
2800
2801 host_info->driver_version =
2802 (DRV_MODULE_VER_MAJOR) |
2803 (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
2804 (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
2805 host_info->num_cpus = mp_ncpus;
2806 host_info->driver_supported_features =
2807 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK;
2808
2809 rc = ena_com_set_host_attributes(ena_dev);
2810 if (unlikely(rc != 0)) {
2811 if (rc == EOPNOTSUPP)
2812 ena_trace(NULL, ENA_WARNING, "Cannot set host attributes\n");
2813 else
2814 ena_trace(NULL, ENA_ALERT, "Cannot set host attributes\n");
2815
2816 goto err;
2817 }
2818
2819 return;
2820
2821 err:
2822 ena_com_delete_host_info(ena_dev);
2823 }
2824
2825 static int
2826 ena_device_init(struct ena_adapter *adapter, device_t pdev,
2827 struct ena_com_dev_get_features_ctx *get_feat_ctx, int *wd_active)
2828 {
2829 struct ena_com_dev* ena_dev = adapter->ena_dev;
2830 bool readless_supported;
2831 uint32_t aenq_groups;
2832 int dma_width;
2833 int rc;
2834
2835 rc = ena_com_mmio_reg_read_request_init(ena_dev);
2836 if (unlikely(rc != 0)) {
2837 device_printf(pdev, "failed to init mmio read less\n");
2838 return (rc);
2839 }
2840
2841 /*
2842 * The PCIe configuration space revision id indicate if mmio reg
2843 * read is disabled
2844 */
2845 readless_supported = !(pci_get_revid(pdev) & ENA_MMIO_DISABLE_REG_READ);
2846 ena_com_set_mmio_read_mode(ena_dev, readless_supported);
2847
2848 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
2849 if (unlikely(rc != 0)) {
2850 device_printf(pdev, "Can not reset device\n");
2851 goto err_mmio_read_less;
2852 }
2853
2854 rc = ena_com_validate_version(ena_dev);
2855 if (unlikely(rc != 0)) {
2856 device_printf(pdev, "device version is too low\n");
2857 goto err_mmio_read_less;
2858 }
2859
2860 dma_width = ena_com_get_dma_width(ena_dev);
2861 if (unlikely(dma_width < 0)) {
2862 device_printf(pdev, "Invalid dma width value %d", dma_width);
2863 rc = dma_width;
2864 goto err_mmio_read_less;
2865 }
2866 adapter->dma_width = dma_width;
2867
2868 /* ENA admin level init */
2869 rc = ena_com_admin_init(ena_dev, &aenq_handlers);
2870 if (unlikely(rc != 0)) {
2871 device_printf(pdev,
2872 "Can not initialize ena admin queue with device\n");
2873 goto err_mmio_read_less;
2874 }
2875
2876 /*
2877 * To enable the msix interrupts the driver needs to know the number
2878 * of queues. So the driver uses polling mode to retrieve this
2879 * information
2880 */
2881 ena_com_set_admin_polling_mode(ena_dev, true);
2882
2883 ena_config_host_info(ena_dev, pdev);
2884
2885 /* Get Device Attributes */
2886 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
2887 if (unlikely(rc != 0)) {
2888 device_printf(pdev,
2889 "Cannot get attribute for ena device rc: %d\n", rc);
2890 goto err_admin_init;
2891 }
2892
2893 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
2894 BIT(ENA_ADMIN_FATAL_ERROR) |
2895 BIT(ENA_ADMIN_WARNING) |
2896 BIT(ENA_ADMIN_NOTIFICATION) |
2897 BIT(ENA_ADMIN_KEEP_ALIVE);
2898
2899 aenq_groups &= get_feat_ctx->aenq.supported_groups;
2900 rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
2901 if (unlikely(rc != 0)) {
2902 device_printf(pdev, "Cannot configure aenq groups rc: %d\n", rc);
2903 goto err_admin_init;
2904 }
2905
2906 *wd_active = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
2907
2908 return (0);
2909
2910 err_admin_init:
2911 ena_com_delete_host_info(ena_dev);
2912 ena_com_admin_destroy(ena_dev);
2913 err_mmio_read_less:
2914 ena_com_mmio_reg_read_request_destroy(ena_dev);
2915
2916 return (rc);
2917 }
2918
2919 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
2920 {
2921 struct ena_com_dev *ena_dev = adapter->ena_dev;
2922 int rc;
2923
2924 rc = ena_enable_msix(adapter);
2925 if (unlikely(rc != 0)) {
2926 device_printf(adapter->pdev, "Error with MSI-X enablement\n");
2927 return (rc);
2928 }
2929
2930 ena_setup_mgmnt_intr(adapter);
2931
2932 rc = ena_request_mgmnt_irq(adapter);
2933 if (unlikely(rc != 0)) {
2934 device_printf(adapter->pdev, "Cannot setup mgmnt queue intr\n");
2935 goto err_disable_msix;
2936 }
2937
2938 ena_com_set_admin_polling_mode(ena_dev, false);
2939
2940 ena_com_admin_aenq_enable(ena_dev);
2941
2942 return (0);
2943
2944 err_disable_msix:
2945 ena_disable_msix(adapter);
2946
2947 return (rc);
2948 }
2949
2950 /* Function called on ENA_ADMIN_KEEP_ALIVE event */
2951 static void ena_keep_alive_wd(void *adapter_data,
2952 struct ena_admin_aenq_entry *aenq_e)
2953 {
2954 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
2955 struct ena_admin_aenq_keep_alive_desc *desc;
2956 sbintime_t stime;
2957 uint64_t rx_drops;
2958 uint64_t tx_drops;
2959
2960 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
2961
2962 rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low;
2963 tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low;
2964 counter_u64_zero(adapter->hw_stats.rx_drops);
2965 counter_u64_add(adapter->hw_stats.rx_drops, rx_drops);
2966 counter_u64_zero(adapter->hw_stats.tx_drops);
2967 counter_u64_add(adapter->hw_stats.tx_drops, tx_drops);
2968
2969 stime = getsbinuptime();
2970 atomic_store_rel_64(&adapter->keep_alive_timestamp, stime);
2971 }
2972
2973 /* Check for keep alive expiration */
2974 static void check_for_missing_keep_alive(struct ena_adapter *adapter)
2975 {
2976 sbintime_t timestamp, time;
2977
2978 if (adapter->wd_active == 0)
2979 return;
2980
2981 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
2982 return;
2983
2984 timestamp = atomic_load_acq_64(&adapter->keep_alive_timestamp);
2985 time = getsbinuptime() - timestamp;
2986 if (unlikely(time > adapter->keep_alive_timeout)) {
2987 device_printf(adapter->pdev,
2988 "Keep alive watchdog timeout.\n");
2989 counter_u64_add(adapter->dev_stats.wd_expired, 1);
2990 ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
2991 }
2992 }
2993
2994 /* Check if admin queue is enabled */
2995 static void check_for_admin_com_state(struct ena_adapter *adapter)
2996 {
2997 if (unlikely(ena_com_get_admin_running_state(adapter->ena_dev) ==
2998 false)) {
2999 device_printf(adapter->pdev,
3000 "ENA admin queue is not in running state!\n");
3001 counter_u64_add(adapter->dev_stats.admin_q_pause, 1);
3002 ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO);
3003 }
3004 }
3005
3006 static int
3007 check_for_rx_interrupt_queue(struct ena_adapter *adapter,
3008 struct ena_ring *rx_ring)
3009 {
3010 if (likely(rx_ring->first_interrupt))
3011 return (0);
3012
3013 if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
3014 return (0);
3015
3016 rx_ring->no_interrupt_event_cnt++;
3017
3018 if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
3019 device_printf(adapter->pdev, "Potential MSIX issue on Rx side "
3020 "Queue = %d. Reset the device\n", rx_ring->qid);
3021 ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
3022 return (EIO);
3023 }
3024
3025 return (0);
3026 }
3027
3028 static int
3029 check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
3030 struct ena_ring *tx_ring)
3031 {
3032 struct bintime curtime, time;
3033 struct ena_tx_buffer *tx_buf;
3034 sbintime_t time_offset;
3035 uint32_t missed_tx = 0;
3036 int i, rc = 0;
3037
3038 getbinuptime(&curtime);
3039
3040 for (i = 0; i < tx_ring->ring_size; i++) {
3041 tx_buf = &tx_ring->tx_buffer_info[i];
3042
3043 if (bintime_isset(&tx_buf->timestamp) == 0)
3044 continue;
3045
3046 time = curtime;
3047 bintime_sub(&time, &tx_buf->timestamp);
3048 time_offset = bttosbt(time);
3049
3050 if (unlikely(!tx_ring->first_interrupt &&
3051 time_offset > 2 * adapter->missing_tx_timeout)) {
3052 /*
3053 * If after graceful period interrupt is still not
3054 * received, we schedule a reset.
3055 */
3056 device_printf(adapter->pdev,
3057 "Potential MSIX issue on Tx side Queue = %d. "
3058 "Reset the device\n", tx_ring->qid);
3059 ena_trigger_reset(adapter,
3060 ENA_REGS_RESET_MISS_INTERRUPT);
3061 return (EIO);
3062 }
3063
3064 /* Check again if packet is still waiting */
3065 if (unlikely(time_offset > adapter->missing_tx_timeout)) {
3066
3067 if (!tx_buf->print_once)
3068 ena_trace(NULL, ENA_WARNING, "Found a Tx that wasn't "
3069 "completed on time, qid %d, index %d.\n",
3070 tx_ring->qid, i);
3071
3072 tx_buf->print_once = true;
3073 missed_tx++;
3074 }
3075 }
3076
3077 if (unlikely(missed_tx > adapter->missing_tx_threshold)) {
3078 device_printf(adapter->pdev,
3079 "The number of lost tx completion is above the threshold "
3080 "(%d > %d). Reset the device\n",
3081 missed_tx, adapter->missing_tx_threshold);
3082 ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_TX_CMPL);
3083 rc = EIO;
3084 }
3085
3086 counter_u64_add(tx_ring->tx_stats.missing_tx_comp, missed_tx);
3087
3088 return (rc);
3089 }
3090
3091 /*
3092 * Check for TX which were not completed on time.
3093 * Timeout is defined by "missing_tx_timeout".
3094 * Reset will be performed if number of incompleted
3095 * transactions exceeds "missing_tx_threshold".
3096 */
3097 static void
3098 check_for_missing_completions(struct ena_adapter *adapter)
3099 {
3100 struct ena_ring *tx_ring;
3101 struct ena_ring *rx_ring;
3102 int i, budget, rc;
3103
3104 /* Make sure the driver doesn't turn the device in other process */
3105 rmb();
3106
3107 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3108 return;
3109
3110 if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
3111 return;
3112
3113 if (adapter->missing_tx_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3114 return;
3115
3116 budget = adapter->missing_tx_max_queues;
3117
3118 for (i = adapter->next_monitored_tx_qid; i < adapter->num_io_queues; i++) {
3119 tx_ring = &adapter->tx_ring[i];
3120 rx_ring = &adapter->rx_ring[i];
3121
3122 rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
3123 if (unlikely(rc != 0))
3124 return;
3125
3126 rc = check_for_rx_interrupt_queue(adapter, rx_ring);
3127 if (unlikely(rc != 0))
3128 return;
3129
3130 budget--;
3131 if (budget == 0) {
3132 i++;
3133 break;
3134 }
3135 }
3136
3137 adapter->next_monitored_tx_qid = i % adapter->num_io_queues;
3138 }
3139
3140 /* trigger rx cleanup after 2 consecutive detections */
3141 #define EMPTY_RX_REFILL 2
3142 /* For the rare case where the device runs out of Rx descriptors and the
3143 * msix handler failed to refill new Rx descriptors (due to a lack of memory
3144 * for example).
3145 * This case will lead to a deadlock:
3146 * The device won't send interrupts since all the new Rx packets will be dropped
3147 * The msix handler won't allocate new Rx descriptors so the device won't be
3148 * able to send new packets.
3149 *
3150 * When such a situation is detected - execute rx cleanup task in another thread
3151 */
3152 static void
3153 check_for_empty_rx_ring(struct ena_adapter *adapter)
3154 {
3155 struct ena_ring *rx_ring;
3156 int i, refill_required;
3157
3158 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3159 return;
3160
3161 if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
3162 return;
3163
3164 for (i = 0; i < adapter->num_io_queues; i++) {
3165 rx_ring = &adapter->rx_ring[i];
3166
3167 refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
3168 if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
3169 rx_ring->empty_rx_queue++;
3170
3171 if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
3172 counter_u64_add(rx_ring->rx_stats.empty_rx_ring,
3173 1);
3174
3175 device_printf(adapter->pdev,
3176 "trigger refill for ring %d\n", i);
3177
3178 taskqueue_enqueue(rx_ring->que->cleanup_tq,
3179 &rx_ring->que->cleanup_task);
3180 rx_ring->empty_rx_queue = 0;
3181 }
3182 } else {
3183 rx_ring->empty_rx_queue = 0;
3184 }
3185 }
3186 }
3187
3188 static void ena_update_hints(struct ena_adapter *adapter,
3189 struct ena_admin_ena_hw_hints *hints)
3190 {
3191 struct ena_com_dev *ena_dev = adapter->ena_dev;
3192
3193 if (hints->admin_completion_tx_timeout)
3194 ena_dev->admin_queue.completion_timeout =
3195 hints->admin_completion_tx_timeout * 1000;
3196
3197 if (hints->mmio_read_timeout)
3198 /* convert to usec */
3199 ena_dev->mmio_read.reg_read_to =
3200 hints->mmio_read_timeout * 1000;
3201
3202 if (hints->missed_tx_completion_count_threshold_to_reset)
3203 adapter->missing_tx_threshold =
3204 hints->missed_tx_completion_count_threshold_to_reset;
3205
3206 if (hints->missing_tx_completion_timeout) {
3207 if (hints->missing_tx_completion_timeout ==
3208 ENA_HW_HINTS_NO_TIMEOUT)
3209 adapter->missing_tx_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3210 else
3211 adapter->missing_tx_timeout =
3212 SBT_1MS * hints->missing_tx_completion_timeout;
3213 }
3214
3215 if (hints->driver_watchdog_timeout) {
3216 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3217 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3218 else
3219 adapter->keep_alive_timeout =
3220 SBT_1MS * hints->driver_watchdog_timeout;
3221 }
3222 }
3223
3224 /**
3225 * ena_copy_eni_metrics - Get and copy ENI metrics from the HW.
3226 * @adapter: ENA device adapter
3227 *
3228 * Returns 0 on success, EOPNOTSUPP if current HW doesn't support those metrics
3229 * and other error codes on failure.
3230 *
3231 * This function can possibly cause a race with other calls to the admin queue.
3232 * Because of that, the caller should either lock this function or make sure
3233 * that there is no race in the current context.
3234 */
3235 static int
3236 ena_copy_eni_metrics(struct ena_adapter *adapter)
3237 {
3238 static bool print_once = true;
3239 int rc;
3240
3241 rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_metrics);
3242
3243 if (rc != 0) {
3244 if (rc == ENA_COM_UNSUPPORTED) {
3245 if (print_once) {
3246 device_printf(adapter->pdev,
3247 "Retrieving ENI metrics is not supported.\n");
3248 print_once = false;
3249 } else {
3250 ena_trace(NULL, ENA_DBG,
3251 "Retrieving ENI metrics is not supported.\n");
3252 }
3253 } else {
3254 device_printf(adapter->pdev,
3255 "Failed to get ENI metrics: %d\n", rc);
3256 }
3257 }
3258
3259 return (rc);
3260 }
3261
3262 static void
3263 ena_timer_service(void *data)
3264 {
3265 struct ena_adapter *adapter = (struct ena_adapter *)data;
3266 struct ena_admin_host_info *host_info =
3267 adapter->ena_dev->host_attr.host_info;
3268
3269 check_for_missing_keep_alive(adapter);
3270
3271 check_for_admin_com_state(adapter);
3272
3273 check_for_missing_completions(adapter);
3274
3275 check_for_empty_rx_ring(adapter);
3276
3277 /*
3278 * User controller update of the ENI metrics.
3279 * If the delay was set to 0, then the stats shouldn't be updated at
3280 * all.
3281 * Otherwise, wait 'eni_metrics_sample_interval' seconds, before
3282 * updating stats.
3283 * As timer service is executed every second, it's enough to increment
3284 * appropriate counter each time the timer service is executed.
3285 */
3286 if ((adapter->eni_metrics_sample_interval != 0) &&
3287 (++adapter->eni_metrics_sample_interval_cnt >=
3288 adapter->eni_metrics_sample_interval)) {
3289 /*
3290 * There is no race with other admin queue calls, as:
3291 * - Timer service runs after interface is up, so all
3292 * configuration calls to the admin queue are finished.
3293 * - After interface is up, the driver doesn't use (at least
3294 * for now) other functions writing to the admin queue.
3295 *
3296 * It may change in the future, so in that situation, the lock
3297 * will be needed. ENA_LOCK_*() cannot be used for that purpose,
3298 * as callout ena_timer_service is protected by them. It could
3299 * lead to the deadlock if callout_drain() would hold the lock
3300 * before ena_copy_eni_metrics() was executed. It's advised to
3301 * use separate lock in that situation which will be used only
3302 * for the admin queue.
3303 */
3304 (void)ena_copy_eni_metrics(adapter);
3305 adapter->eni_metrics_sample_interval_cnt = 0;
3306 }
3307
3308
3309 if (host_info != NULL)
3310 ena_update_host_info(host_info, adapter->ifp);
3311
3312 if (unlikely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3313 device_printf(adapter->pdev, "Trigger reset is on\n");
3314 taskqueue_enqueue(adapter->reset_tq, &adapter->reset_task);
3315 return;
3316 }
3317
3318 /*
3319 * Schedule another timeout one second from now.
3320 */
3321 callout_schedule_sbt(&adapter->timer_service, SBT_1S, SBT_1S, 0);
3322 }
3323
3324 void
3325 ena_destroy_device(struct ena_adapter *adapter, bool graceful)
3326 {
3327 if_t ifp = adapter->ifp;
3328 struct ena_com_dev *ena_dev = adapter->ena_dev;
3329 bool dev_up;
3330
3331 if (!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))
3332 return;
3333
3334 if_link_state_change(ifp, LINK_STATE_DOWN);
3335
3336 callout_drain(&adapter->timer_service);
3337
3338 dev_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
3339 if (dev_up)
3340 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
3341
3342 if (!graceful)
3343 ena_com_set_admin_running_state(ena_dev, false);
3344
3345 if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3346 ena_down(adapter);
3347
3348 /*
3349 * Stop the device from sending AENQ events (if the device was up, and
3350 * the trigger reset was on, ena_down already performs device reset)
3351 */
3352 if (!(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter) && dev_up))
3353 ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
3354
3355 ena_free_mgmnt_irq(adapter);
3356
3357 ena_disable_msix(adapter);
3358
3359 /*
3360 * IO rings resources should be freed because `ena_restore_device()`
3361 * calls (not directly) `ena_enable_msix()`, which re-allocates MSIX
3362 * vectors. The amount of MSIX vectors after destroy-restore may be
3363 * different than before. Therefore, IO rings resources should be
3364 * established from scratch each time.
3365 */
3366 ena_free_all_io_rings_resources(adapter);
3367
3368 ena_com_abort_admin_commands(ena_dev);
3369
3370 ena_com_wait_for_abort_completion(ena_dev);
3371
3372 ena_com_admin_destroy(ena_dev);
3373
3374 ena_com_mmio_reg_read_request_destroy(ena_dev);
3375
3376 adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3377
3378 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
3379 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3380 }
3381
3382 static int
3383 ena_device_validate_params(struct ena_adapter *adapter,
3384 struct ena_com_dev_get_features_ctx *get_feat_ctx)
3385 {
3386
3387 if (memcmp(get_feat_ctx->dev_attr.mac_addr, adapter->mac_addr,
3388 ETHER_ADDR_LEN) != 0) {
3389 device_printf(adapter->pdev,
3390 "Error, mac address are different\n");
3391 return (EINVAL);
3392 }
3393
3394 if (get_feat_ctx->dev_attr.max_mtu < if_getmtu(adapter->ifp)) {
3395 device_printf(adapter->pdev,
3396 "Error, device max mtu is smaller than ifp MTU\n");
3397 return (EINVAL);
3398 }
3399
3400 return 0;
3401 }
3402
3403 int
3404 ena_restore_device(struct ena_adapter *adapter)
3405 {
3406 struct ena_com_dev_get_features_ctx get_feat_ctx;
3407 struct ena_com_dev *ena_dev = adapter->ena_dev;
3408 if_t ifp = adapter->ifp;
3409 device_t dev = adapter->pdev;
3410 int wd_active;
3411 int rc;
3412
3413 ENA_FLAG_SET_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3414
3415 rc = ena_device_init(adapter, dev, &get_feat_ctx, &wd_active);
3416 if (rc != 0) {
3417 device_printf(dev, "Cannot initialize device\n");
3418 goto err;
3419 }
3420 /*
3421 * Only enable WD if it was enabled before reset, so it won't override
3422 * value set by the user by the sysctl.
3423 */
3424 if (adapter->wd_active != 0)
3425 adapter->wd_active = wd_active;
3426
3427 rc = ena_device_validate_params(adapter, &get_feat_ctx);
3428 if (rc != 0) {
3429 device_printf(dev, "Validation of device parameters failed\n");
3430 goto err_device_destroy;
3431 }
3432
3433 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3434 /* Make sure we don't have a race with AENQ Links state handler */
3435 if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
3436 if_link_state_change(ifp, LINK_STATE_UP);
3437
3438 rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3439 if (rc != 0) {
3440 device_printf(dev, "Enable MSI-X failed\n");
3441 goto err_device_destroy;
3442 }
3443
3444 /*
3445 * Effective value of used MSIX vectors should be the same as before
3446 * `ena_destroy_device()`, if possible, or closest to it if less vectors
3447 * are available.
3448 */
3449 if ((adapter->msix_vecs - ENA_ADMIN_MSIX_VEC) < adapter->num_io_queues)
3450 adapter->num_io_queues =
3451 adapter->msix_vecs - ENA_ADMIN_MSIX_VEC;
3452
3453 /* Re-initialize rings basic information */
3454 ena_init_io_rings(adapter);
3455
3456 /* If the interface was up before the reset bring it up */
3457 if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter)) {
3458 rc = ena_up(adapter);
3459 if (rc != 0) {
3460 device_printf(dev, "Failed to create I/O queues\n");
3461 goto err_disable_msix;
3462 }
3463 }
3464
3465 /* Indicate that device is running again and ready to work */
3466 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3467
3468 if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter)) {
3469 /*
3470 * As the AENQ handlers weren't executed during reset because
3471 * the flag ENA_FLAG_DEVICE_RUNNING was turned off, the
3472 * timestamp must be updated again That will prevent next reset
3473 * caused by missing keep alive.
3474 */
3475 adapter->keep_alive_timestamp = getsbinuptime();
3476 callout_reset_sbt(&adapter->timer_service, SBT_1S, SBT_1S,
3477 ena_timer_service, (void *)adapter, 0);
3478 }
3479 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
3480
3481 device_printf(dev,
3482 "Device reset completed successfully, Driver info: %s\n", ena_version);
3483
3484 return (rc);
3485
3486 err_disable_msix:
3487 ena_free_mgmnt_irq(adapter);
3488 ena_disable_msix(adapter);
3489 err_device_destroy:
3490 ena_com_abort_admin_commands(ena_dev);
3491 ena_com_wait_for_abort_completion(ena_dev);
3492 ena_com_admin_destroy(ena_dev);
3493 ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
3494 ena_com_mmio_reg_read_request_destroy(ena_dev);
3495 err:
3496 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3497 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3498 device_printf(dev, "Reset attempt failed. Can not reset the device\n");
3499
3500 return (rc);
3501 }
3502
3503 static void
3504 ena_reset_task(void *arg, int pending)
3505 {
3506 struct ena_adapter *adapter = (struct ena_adapter *)arg;
3507
3508 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3509 device_printf(adapter->pdev,
3510 "device reset scheduled but trigger_reset is off\n");
3511 return;
3512 }
3513
3514 ENA_LOCK_LOCK(adapter);
3515 ena_destroy_device(adapter, false);
3516 ena_restore_device(adapter);
3517 ENA_LOCK_UNLOCK(adapter);
3518 }
3519
3520 /**
3521 * ena_attach - Device Initialization Routine
3522 * @pdev: device information struct
3523 *
3524 * Returns 0 on success, otherwise on failure.
3525 *
3526 * ena_attach initializes an adapter identified by a device structure.
3527 * The OS initialization, configuring of the adapter private structure,
3528 * and a hardware reset occur.
3529 **/
3530 static int
3531 ena_attach(device_t pdev)
3532 {
3533 struct ena_com_dev_get_features_ctx get_feat_ctx;
3534 struct ena_llq_configurations llq_config;
3535 struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
3536 static int version_printed;
3537 struct ena_adapter *adapter;
3538 struct ena_com_dev *ena_dev = NULL;
3539 uint32_t max_num_io_queues;
3540 int msix_rid;
3541 int rid, rc;
3542
3543 adapter = device_get_softc(pdev);
3544 adapter->pdev = pdev;
3545
3546 ENA_LOCK_INIT(adapter);
3547
3548 /*
3549 * Set up the timer service - driver is responsible for avoiding
3550 * concurrency, as the callout won't be using any locking inside.
3551 */
3552 callout_init(&adapter->timer_service, true);
3553 adapter->keep_alive_timeout = DEFAULT_KEEP_ALIVE_TO;
3554 adapter->missing_tx_timeout = DEFAULT_TX_CMP_TO;
3555 adapter->missing_tx_max_queues = DEFAULT_TX_MONITORED_QUEUES;
3556 adapter->missing_tx_threshold = DEFAULT_TX_CMP_THRESHOLD;
3557
3558 if (version_printed++ == 0)
3559 device_printf(pdev, "%s\n", ena_version);
3560
3561 /* Allocate memory for ena_dev structure */
3562 ena_dev = malloc(sizeof(struct ena_com_dev), M_DEVBUF,
3563 M_WAITOK | M_ZERO);
3564
3565 adapter->ena_dev = ena_dev;
3566 ena_dev->dmadev = pdev;
3567
3568 rid = PCIR_BAR(ENA_REG_BAR);
3569 adapter->memory = NULL;
3570 adapter->registers = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
3571 &rid, RF_ACTIVE);
3572 if (unlikely(adapter->registers == NULL)) {
3573 device_printf(pdev,
3574 "unable to allocate bus resource: registers!\n");
3575 rc = ENOMEM;
3576 goto err_dev_free;
3577 }
3578
3579 /* MSIx vector table may reside on BAR0 with registers or on BAR1. */
3580 msix_rid = pci_msix_table_bar(pdev);
3581 if (msix_rid != rid) {
3582 adapter->msix = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
3583 &msix_rid, RF_ACTIVE);
3584 if (unlikely(adapter->msix == NULL)) {
3585 device_printf(pdev,
3586 "unable to allocate bus resource: msix!\n");
3587 rc = ENOMEM;
3588 goto err_pci_free;
3589 }
3590 adapter->msix_rid = msix_rid;
3591 }
3592
3593 ena_dev->bus = malloc(sizeof(struct ena_bus), M_DEVBUF,
3594 M_WAITOK | M_ZERO);
3595
3596 /* Store register resources */
3597 ((struct ena_bus*)(ena_dev->bus))->reg_bar_t =
3598 rman_get_bustag(adapter->registers);
3599 ((struct ena_bus*)(ena_dev->bus))->reg_bar_h =
3600 rman_get_bushandle(adapter->registers);
3601
3602 if (unlikely(((struct ena_bus*)(ena_dev->bus))->reg_bar_h == 0)) {
3603 device_printf(pdev, "failed to pmap registers bar\n");
3604 rc = ENXIO;
3605 goto err_bus_free;
3606 }
3607
3608 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3609
3610 /* Initially clear all the flags */
3611 ENA_FLAG_ZERO(adapter);
3612
3613 /* Device initialization */
3614 rc = ena_device_init(adapter, pdev, &get_feat_ctx, &adapter->wd_active);
3615 if (unlikely(rc != 0)) {
3616 device_printf(pdev, "ENA device init failed! (err: %d)\n", rc);
3617 rc = ENXIO;
3618 goto err_bus_free;
3619 }
3620
3621 set_default_llq_configurations(&llq_config);
3622
3623 rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx.llq,
3624 &llq_config);
3625 if (unlikely(rc != 0)) {
3626 device_printf(pdev, "failed to set placement policy\n");
3627 goto err_com_free;
3628 }
3629
3630 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3631 adapter->disable_meta_caching =
3632 !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags &
3633 BIT(ENA_ADMIN_DISABLE_META_CACHING));
3634
3635 adapter->keep_alive_timestamp = getsbinuptime();
3636
3637 adapter->tx_offload_cap = get_feat_ctx.offload.tx;
3638
3639 memcpy(adapter->mac_addr, get_feat_ctx.dev_attr.mac_addr,
3640 ETHER_ADDR_LEN);
3641
3642 calc_queue_ctx.pdev = pdev;
3643 calc_queue_ctx.ena_dev = ena_dev;
3644 calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
3645
3646 /* Calculate initial and maximum IO queue number and size */
3647 max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev,
3648 &get_feat_ctx);
3649 rc = ena_calc_io_queue_size(&calc_queue_ctx);
3650 if (unlikely((rc != 0) || (max_num_io_queues <= 0))) {
3651 rc = EFAULT;
3652 goto err_com_free;
3653 }
3654
3655 adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size;
3656 adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size;
3657 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size;
3658 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size;
3659 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
3660 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
3661
3662 adapter->max_num_io_queues = max_num_io_queues;
3663
3664 adapter->buf_ring_size = ENA_DEFAULT_BUF_RING_SIZE;
3665
3666 adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu;
3667
3668 adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3669
3670 /* set up dma tags for rx and tx buffers */
3671 rc = ena_setup_tx_dma_tag(adapter);
3672 if (unlikely(rc != 0)) {
3673 device_printf(pdev, "Failed to create TX DMA tag\n");
3674 goto err_com_free;
3675 }
3676
3677 rc = ena_setup_rx_dma_tag(adapter);
3678 if (unlikely(rc != 0)) {
3679 device_printf(pdev, "Failed to create RX DMA tag\n");
3680 goto err_tx_tag_free;
3681 }
3682
3683 /*
3684 * The amount of requested MSIX vectors is equal to
3685 * adapter::max_num_io_queues (see `ena_enable_msix()`), plus a constant
3686 * number of admin queue interrupts. The former is initially determined
3687 * by HW capabilities (see `ena_calc_max_io_queue_num())` but may not be
3688 * achieved if there are not enough system resources. By default, the
3689 * number of effectively used IO queues is the same but later on it can
3690 * be limited by the user using sysctl interface.
3691 */
3692 rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3693 if (unlikely(rc != 0)) {
3694 device_printf(pdev,
3695 "Failed to enable and set the admin interrupts\n");
3696 goto err_io_free;
3697 }
3698 /* By default all of allocated MSIX vectors are actively used */
3699 adapter->num_io_queues = adapter->msix_vecs - ENA_ADMIN_MSIX_VEC;
3700
3701 /* initialize rings basic information */
3702 ena_init_io_rings(adapter);
3703
3704 /* setup network interface */
3705 rc = ena_setup_ifnet(pdev, adapter, &get_feat_ctx);
3706 if (unlikely(rc != 0)) {
3707 device_printf(pdev, "Error with network interface setup\n");
3708 goto err_msix_free;
3709 }
3710
3711 /* Initialize reset task queue */
3712 TASK_INIT(&adapter->reset_task, 0, ena_reset_task, adapter);
3713 adapter->reset_tq = taskqueue_create("ena_reset_enqueue",
3714 M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->reset_tq);
3715 taskqueue_start_threads(&adapter->reset_tq, 1, PI_NET,
3716 "%s rstq", device_get_nameunit(adapter->pdev));
3717
3718 /* Initialize statistics */
3719 ena_alloc_counters((counter_u64_t *)&adapter->dev_stats,
3720 sizeof(struct ena_stats_dev));
3721 ena_alloc_counters((counter_u64_t *)&adapter->hw_stats,
3722 sizeof(struct ena_hw_stats));
3723 ena_sysctl_add_nodes(adapter);
3724
3725 #ifdef DEV_NETMAP
3726 rc = ena_netmap_attach(adapter);
3727 if (rc != 0) {
3728 device_printf(pdev, "netmap attach failed: %d\n", rc);
3729 goto err_detach;
3730 }
3731 #endif /* DEV_NETMAP */
3732
3733 /* Tell the stack that the interface is not active */
3734 if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
3735 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3736
3737 return (0);
3738
3739 #ifdef DEV_NETMAP
3740 err_detach:
3741 ether_ifdetach(adapter->ifp);
3742 #endif /* DEV_NETMAP */
3743 err_msix_free:
3744 ena_com_dev_reset(adapter->ena_dev, ENA_REGS_RESET_INIT_ERR);
3745 ena_free_mgmnt_irq(adapter);
3746 ena_disable_msix(adapter);
3747 err_io_free:
3748 ena_free_all_io_rings_resources(adapter);
3749 ena_free_rx_dma_tag(adapter);
3750 err_tx_tag_free:
3751 ena_free_tx_dma_tag(adapter);
3752 err_com_free:
3753 ena_com_admin_destroy(ena_dev);
3754 ena_com_delete_host_info(ena_dev);
3755 ena_com_mmio_reg_read_request_destroy(ena_dev);
3756 err_bus_free:
3757 free(ena_dev->bus, M_DEVBUF);
3758 err_pci_free:
3759 ena_free_pci_resources(adapter);
3760 err_dev_free:
3761 free(ena_dev, M_DEVBUF);
3762
3763 return (rc);
3764 }
3765
3766 /**
3767 * ena_detach - Device Removal Routine
3768 * @pdev: device information struct
3769 *
3770 * ena_detach is called by the device subsystem to alert the driver
3771 * that it should release a PCI device.
3772 **/
3773 static int
3774 ena_detach(device_t pdev)
3775 {
3776 struct ena_adapter *adapter = device_get_softc(pdev);
3777 struct ena_com_dev *ena_dev = adapter->ena_dev;
3778 int rc;
3779
3780 /* Make sure VLANS are not using driver */
3781 if (adapter->ifp->if_vlantrunk != NULL) {
3782 device_printf(adapter->pdev ,"VLAN is in use, detach first\n");
3783 return (EBUSY);
3784 }
3785
3786 ether_ifdetach(adapter->ifp);
3787
3788 /* Stop timer service */
3789 ENA_LOCK_LOCK(adapter);
3790 callout_drain(&adapter->timer_service);
3791 ENA_LOCK_UNLOCK(adapter);
3792
3793 /* Release reset task */
3794 while (taskqueue_cancel(adapter->reset_tq, &adapter->reset_task, NULL))
3795 taskqueue_drain(adapter->reset_tq, &adapter->reset_task);
3796 taskqueue_free(adapter->reset_tq);
3797
3798 ENA_LOCK_LOCK(adapter);
3799 ena_down(adapter);
3800 ena_destroy_device(adapter, true);
3801 ENA_LOCK_UNLOCK(adapter);
3802
3803 #ifdef DEV_NETMAP
3804 netmap_detach(adapter->ifp);
3805 #endif /* DEV_NETMAP */
3806
3807 ena_free_counters((counter_u64_t *)&adapter->hw_stats,
3808 sizeof(struct ena_hw_stats));
3809 ena_free_counters((counter_u64_t *)&adapter->dev_stats,
3810 sizeof(struct ena_stats_dev));
3811
3812 rc = ena_free_rx_dma_tag(adapter);
3813 if (unlikely(rc != 0))
3814 device_printf(adapter->pdev,
3815 "Unmapped RX DMA tag associations\n");
3816
3817 rc = ena_free_tx_dma_tag(adapter);
3818 if (unlikely(rc != 0))
3819 device_printf(adapter->pdev,
3820 "Unmapped TX DMA tag associations\n");
3821
3822 ena_free_irqs(adapter);
3823
3824 ena_free_pci_resources(adapter);
3825
3826 if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter)))
3827 ena_com_rss_destroy(ena_dev);
3828
3829 ena_com_delete_host_info(ena_dev);
3830
3831 ENA_LOCK_DESTROY(adapter);
3832
3833 if_free(adapter->ifp);
3834
3835 if (ena_dev->bus != NULL)
3836 free(ena_dev->bus, M_DEVBUF);
3837
3838 if (ena_dev != NULL)
3839 free(ena_dev, M_DEVBUF);
3840
3841 return (bus_generic_detach(pdev));
3842 }
3843
3844 /******************************************************************************
3845 ******************************** AENQ Handlers *******************************
3846 *****************************************************************************/
3847 /**
3848 * ena_update_on_link_change:
3849 * Notify the network interface about the change in link status
3850 **/
3851 static void
3852 ena_update_on_link_change(void *adapter_data,
3853 struct ena_admin_aenq_entry *aenq_e)
3854 {
3855 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3856 struct ena_admin_aenq_link_change_desc *aenq_desc;
3857 int status;
3858 if_t ifp;
3859
3860 aenq_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e;
3861 ifp = adapter->ifp;
3862 status = aenq_desc->flags &
3863 ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
3864
3865 if (status != 0) {
3866 device_printf(adapter->pdev, "link is UP\n");
3867 ENA_FLAG_SET_ATOMIC(ENA_FLAG_LINK_UP, adapter);
3868 if (!ENA_FLAG_ISSET(ENA_FLAG_ONGOING_RESET, adapter))
3869 if_link_state_change(ifp, LINK_STATE_UP);
3870 } else {
3871 device_printf(adapter->pdev, "link is DOWN\n");
3872 if_link_state_change(ifp, LINK_STATE_DOWN);
3873 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_LINK_UP, adapter);
3874 }
3875 }
3876
3877 static void ena_notification(void *adapter_data,
3878 struct ena_admin_aenq_entry *aenq_e)
3879 {
3880 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3881 struct ena_admin_ena_hw_hints *hints;
3882
3883 ENA_WARN(NULL, aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
3884 "Invalid group(%x) expected %x\n", aenq_e->aenq_common_desc.group,
3885 ENA_ADMIN_NOTIFICATION);
3886
3887 switch (aenq_e->aenq_common_desc.syndrome) {
3888 case ENA_ADMIN_UPDATE_HINTS:
3889 hints =
3890 (struct ena_admin_ena_hw_hints *)(&aenq_e->inline_data_w4);
3891 ena_update_hints(adapter, hints);
3892 break;
3893 default:
3894 device_printf(adapter->pdev,
3895 "Invalid aenq notification link state %d\n",
3896 aenq_e->aenq_common_desc.syndrome);
3897 }
3898 }
3899
3900 /**
3901 * This handler will called for unknown event group or unimplemented handlers
3902 **/
3903 static void
3904 unimplemented_aenq_handler(void *adapter_data,
3905 struct ena_admin_aenq_entry *aenq_e)
3906 {
3907 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3908
3909 device_printf(adapter->pdev,
3910 "Unknown event was received or event with unimplemented handler\n");
3911 }
3912
3913 static struct ena_aenq_handlers aenq_handlers = {
3914 .handlers = {
3915 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
3916 [ENA_ADMIN_NOTIFICATION] = ena_notification,
3917 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
3918 },
3919 .unimplemented_handler = unimplemented_aenq_handler
3920 };
3921
3922 /*********************************************************************
3923 * FreeBSD Device Interface Entry Points
3924 *********************************************************************/
3925
3926 static device_method_t ena_methods[] = {
3927 /* Device interface */
3928 DEVMETHOD(device_probe, ena_probe),
3929 DEVMETHOD(device_attach, ena_attach),
3930 DEVMETHOD(device_detach, ena_detach),
3931 DEVMETHOD_END
3932 };
3933
3934 static driver_t ena_driver = {
3935 "ena", ena_methods, sizeof(struct ena_adapter),
3936 };
3937
3938 devclass_t ena_devclass;
3939 DRIVER_MODULE(ena, pci, ena_driver, ena_devclass, 0, 0);
3940 MODULE_PNP_INFO("U16:vendor;U16:device", pci, ena, ena_vendor_info_array,
3941 nitems(ena_vendor_info_array) - 1);
3942 MODULE_DEPEND(ena, pci, 1, 1, 1);
3943 MODULE_DEPEND(ena, ether, 1, 1, 1);
3944 #ifdef DEV_NETMAP
3945 MODULE_DEPEND(ena, netmap, 1, 1, 1);
3946 #endif /* DEV_NETMAP */
3947
3948 /*********************************************************************/
Cache object: 4f3f0723d19ecedefb493ba1562e75e8
|