1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2015-2021 Amazon.com, Inc. or its affiliates.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 #include <sys/cdefs.h>
31 #include <sys/param.h>
32 __FBSDID("$FreeBSD$");
33
34 #include "opt_rss.h"
35
36 #include "ena_sysctl.h"
37 #include "ena_rss.h"
38
39 static void ena_sysctl_add_wd(struct ena_adapter *);
40 static void ena_sysctl_add_stats(struct ena_adapter *);
41 static void ena_sysctl_add_eni_metrics(struct ena_adapter *);
42 static void ena_sysctl_add_tuneables(struct ena_adapter *);
43 /* Kernel option RSS prevents manipulation of key hash and indirection table. */
44 #ifndef RSS
45 static void ena_sysctl_add_rss(struct ena_adapter *);
46 #endif
47 static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS);
48 static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS);
49 static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS);
50 static int ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS);
51 #ifndef RSS
52 static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS);
53 static int ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS);
54 #endif
55
56 /* Limit max ENI sample rate to be an hour. */
57 #define ENI_METRICS_MAX_SAMPLE_INTERVAL 3600
58 #define ENA_HASH_KEY_MSG_SIZE (ENA_HASH_KEY_SIZE * 2 + 1)
59
60 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
61 "ENA driver parameters");
62
63 /*
64 * Logging level for changing verbosity of the output
65 */
66 int ena_log_level = ENA_INFO;
67 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN,
68 &ena_log_level, 0, "Logging level indicating verbosity of the logs");
69
70 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD,
71 DRV_MODULE_VERSION, "ENA driver version");
72
73 /*
74 * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead).
75 * Using 9k mbufs in low memory conditions might cause allocation to take a lot
76 * of time and lead to the OS instability as it needs to look for the contiguous
77 * pages.
78 * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if
79 * the network performance is the priority, the 9k mbufs can be used.
80 */
81 int ena_enable_9k_mbufs = 0;
82 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN,
83 &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors");
84
85 /*
86 * Force the driver to use large LLQ (Low Latency Queue) header. Defaults to
87 * false. This option may be important for platforms, which often handle packet
88 * headers on Tx with total header size greater than 96B, as it may
89 * reduce the latency.
90 * It also reduces the maximum Tx queue size by half, so it may cause more Tx
91 * packet drops.
92 */
93 bool ena_force_large_llq_header = false;
94 SYSCTL_BOOL(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN,
95 &ena_force_large_llq_header, 0,
96 "Increases maximum supported header size in LLQ mode to 224 bytes, while reducing the maximum Tx queue size by half.\n");
97
98 int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE;
99
100 void
101 ena_sysctl_add_nodes(struct ena_adapter *adapter)
102 {
103 ena_sysctl_add_wd(adapter);
104 ena_sysctl_add_stats(adapter);
105 ena_sysctl_add_eni_metrics(adapter);
106 ena_sysctl_add_tuneables(adapter);
107 #ifndef RSS
108 ena_sysctl_add_rss(adapter);
109 #endif
110 }
111
112 static void
113 ena_sysctl_add_wd(struct ena_adapter *adapter)
114 {
115 device_t dev;
116
117 struct sysctl_ctx_list *ctx;
118 struct sysctl_oid *tree;
119 struct sysctl_oid_list *child;
120
121 dev = adapter->pdev;
122
123 ctx = device_get_sysctl_ctx(dev);
124 tree = device_get_sysctl_tree(dev);
125 child = SYSCTL_CHILDREN(tree);
126
127 /* Sysctl calls for Watchdog service */
128 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active",
129 CTLFLAG_RWTUN, &adapter->wd_active, 0,
130 "Watchdog is active");
131
132 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout",
133 CTLFLAG_RWTUN, &adapter->keep_alive_timeout,
134 "Timeout for Keep Alive messages");
135
136 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout",
137 CTLFLAG_RWTUN, &adapter->missing_tx_timeout,
138 "Timeout for TX completion");
139
140 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues",
141 CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0,
142 "Number of TX queues to check per run");
143
144 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold",
145 CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0,
146 "Max number of timeouted packets");
147 }
148
149 static void
150 ena_sysctl_add_stats(struct ena_adapter *adapter)
151 {
152 device_t dev;
153
154 struct ena_ring *tx_ring;
155 struct ena_ring *rx_ring;
156
157 struct ena_hw_stats *hw_stats;
158 struct ena_stats_dev *dev_stats;
159 struct ena_stats_tx *tx_stats;
160 struct ena_stats_rx *rx_stats;
161 struct ena_com_stats_admin *admin_stats;
162
163 struct sysctl_ctx_list *ctx;
164 struct sysctl_oid *tree;
165 struct sysctl_oid_list *child;
166
167 struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node;
168 struct sysctl_oid *admin_node;
169 struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list;
170 struct sysctl_oid_list *admin_list;
171
172 #define QUEUE_NAME_LEN 32
173 char namebuf[QUEUE_NAME_LEN];
174 int i;
175
176 dev = adapter->pdev;
177
178 ctx = device_get_sysctl_ctx(dev);
179 tree = device_get_sysctl_tree(dev);
180 child = SYSCTL_CHILDREN(tree);
181
182 tx_ring = adapter->tx_ring;
183 rx_ring = adapter->rx_ring;
184
185 hw_stats = &adapter->hw_stats;
186 dev_stats = &adapter->dev_stats;
187 admin_stats = &adapter->ena_dev->admin_queue.stats;
188
189 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired",
190 CTLFLAG_RD, &dev_stats->wd_expired,
191 "Watchdog expiry count");
192 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up",
193 CTLFLAG_RD, &dev_stats->interface_up,
194 "Network interface up count");
195 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down",
196 CTLFLAG_RD, &dev_stats->interface_down,
197 "Network interface down count");
198 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause",
199 CTLFLAG_RD, &dev_stats->admin_q_pause,
200 "Admin queue pauses");
201
202 for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) {
203 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
204
205 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
206 namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name");
207 queue_list = SYSCTL_CHILDREN(queue_node);
208
209 adapter->que[i].oid = queue_node;
210
211 /* TX specific stats */
212 tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO,
213 "tx_ring", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring");
214 tx_list = SYSCTL_CHILDREN(tx_node);
215
216 tx_stats = &tx_ring->tx_stats;
217
218 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
219 "count", CTLFLAG_RD,
220 &tx_stats->cnt, "Packets sent");
221 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
222 "bytes", CTLFLAG_RD,
223 &tx_stats->bytes, "Bytes sent");
224 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
225 "prepare_ctx_err", CTLFLAG_RD,
226 &tx_stats->prepare_ctx_err,
227 "TX buffer preparation failures");
228 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
229 "dma_mapping_err", CTLFLAG_RD,
230 &tx_stats->dma_mapping_err, "DMA mapping failures");
231 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
232 "doorbells", CTLFLAG_RD,
233 &tx_stats->doorbells, "Queue doorbells");
234 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
235 "missing_tx_comp", CTLFLAG_RD,
236 &tx_stats->missing_tx_comp, "TX completions missed");
237 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
238 "bad_req_id", CTLFLAG_RD,
239 &tx_stats->bad_req_id, "Bad request id count");
240 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
241 "mbuf_collapses", CTLFLAG_RD,
242 &tx_stats->collapse,
243 "Mbuf collapse count");
244 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
245 "mbuf_collapse_err", CTLFLAG_RD,
246 &tx_stats->collapse_err,
247 "Mbuf collapse failures");
248 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
249 "queue_wakeups", CTLFLAG_RD,
250 &tx_stats->queue_wakeup, "Queue wakeups");
251 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
252 "queue_stops", CTLFLAG_RD,
253 &tx_stats->queue_stop, "Queue stops");
254 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
255 "llq_buffer_copy", CTLFLAG_RD,
256 &tx_stats->llq_buffer_copy,
257 "Header copies for llq transaction");
258 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
259 "unmask_interrupt_num", CTLFLAG_RD,
260 &tx_stats->unmask_interrupt_num,
261 "Unmasked interrupt count");
262
263 /* RX specific stats */
264 rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO,
265 "rx_ring", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring");
266 rx_list = SYSCTL_CHILDREN(rx_node);
267
268 rx_stats = &rx_ring->rx_stats;
269
270 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
271 "count", CTLFLAG_RD,
272 &rx_stats->cnt, "Packets received");
273 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
274 "bytes", CTLFLAG_RD,
275 &rx_stats->bytes, "Bytes received");
276 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
277 "refil_partial", CTLFLAG_RD,
278 &rx_stats->refil_partial, "Partial refilled mbufs");
279 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
280 "csum_bad", CTLFLAG_RD,
281 &rx_stats->csum_bad, "Bad RX checksum");
282 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
283 "mbuf_alloc_fail", CTLFLAG_RD,
284 &rx_stats->mbuf_alloc_fail, "Failed mbuf allocs");
285 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
286 "mjum_alloc_fail", CTLFLAG_RD,
287 &rx_stats->mjum_alloc_fail, "Failed jumbo mbuf allocs");
288 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
289 "dma_mapping_err", CTLFLAG_RD,
290 &rx_stats->dma_mapping_err, "DMA mapping errors");
291 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
292 "bad_desc_num", CTLFLAG_RD,
293 &rx_stats->bad_desc_num, "Bad descriptor count");
294 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
295 "bad_req_id", CTLFLAG_RD,
296 &rx_stats->bad_req_id, "Bad request id count");
297 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
298 "empty_rx_ring", CTLFLAG_RD,
299 &rx_stats->empty_rx_ring, "RX descriptors depletion count");
300 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
301 "csum_good", CTLFLAG_RD,
302 &rx_stats->csum_good, "Valid RX checksum calculations");
303 }
304
305 /* Stats read from device */
306 hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats",
307 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware");
308 hw_list = SYSCTL_CHILDREN(hw_node);
309
310 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD,
311 &hw_stats->rx_packets, "Packets received");
312 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD,
313 &hw_stats->tx_packets, "Packets transmitted");
314 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD,
315 &hw_stats->rx_bytes, "Bytes received");
316 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD,
317 &hw_stats->tx_bytes, "Bytes transmitted");
318 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD,
319 &hw_stats->rx_drops, "Receive packet drops");
320 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD,
321 &hw_stats->tx_drops, "Transmit packet drops");
322
323 /* ENA Admin queue stats */
324 admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats",
325 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics");
326 admin_list = SYSCTL_CHILDREN(admin_node);
327
328 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD,
329 &admin_stats->aborted_cmd, 0, "Aborted commands");
330 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD,
331 &admin_stats->submitted_cmd, 0, "Submitted commands");
332 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD,
333 &admin_stats->completed_cmd, 0, "Completed commands");
334 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD,
335 &admin_stats->out_of_space, 0, "Queue out of space");
336 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD,
337 &admin_stats->no_completion, 0, "Commands not completed");
338 }
339
340 static void
341 ena_sysctl_add_eni_metrics(struct ena_adapter *adapter)
342 {
343 device_t dev;
344 struct ena_admin_eni_stats *eni_metrics;
345
346 struct sysctl_ctx_list *ctx;
347 struct sysctl_oid *tree;
348 struct sysctl_oid_list *child;
349
350 struct sysctl_oid *eni_node;
351 struct sysctl_oid_list *eni_list;
352
353 dev = adapter->pdev;
354
355 ctx = device_get_sysctl_ctx(dev);
356 tree = device_get_sysctl_tree(dev);
357 child = SYSCTL_CHILDREN(tree);
358
359 eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics",
360 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics");
361 eni_list = SYSCTL_CHILDREN(eni_node);
362
363 eni_metrics = &adapter->eni_metrics;
364
365 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded",
366 CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0,
367 "Inbound BW allowance exceeded");
368 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded",
369 CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0,
370 "Outbound BW allowance exceeded");
371 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded",
372 CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0,
373 "PPS allowance exceeded");
374 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded",
375 CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0,
376 "Connection tracking allowance exceeded");
377 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded",
378 CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0,
379 "Linklocal packet rate allowance exceeded");
380
381 /*
382 * Tuneable, which determines how often ENI metrics will be read.
383 * 0 means it's turned off. Maximum allowed value is limited by:
384 * ENI_METRICS_MAX_SAMPLE_INTERVAL.
385 */
386 SYSCTL_ADD_PROC(ctx, eni_list, OID_AUTO, "sample_interval",
387 CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
388 ena_sysctl_eni_metrics_interval, "SU",
389 "Interval in seconds for updating ENI emetrics. 0 turns off the update.");
390 }
391
392 static void
393 ena_sysctl_add_tuneables(struct ena_adapter *adapter)
394 {
395 device_t dev;
396
397 struct sysctl_ctx_list *ctx;
398 struct sysctl_oid *tree;
399 struct sysctl_oid_list *child;
400
401 dev = adapter->pdev;
402
403 ctx = device_get_sysctl_ctx(dev);
404 tree = device_get_sysctl_tree(dev);
405 child = SYSCTL_CHILDREN(tree);
406
407 /* Tuneable number of buffers in the buf-ring (drbr) */
408 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size",
409 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
410 ena_sysctl_buf_ring_size, "I",
411 "Size of the Tx buffer ring (drbr).");
412
413 /* Tuneable number of the Rx ring size */
414 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size",
415 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
416 ena_sysctl_rx_queue_size, "I",
417 "Size of the Rx ring. The size should be a power of 2.");
418
419 /* Tuneable number of IO queues */
420 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb",
421 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
422 ena_sysctl_io_queues_nb, "I", "Number of IO queues.");
423 }
424
425 /* Kernel option RSS prevents manipulation of key hash and indirection table. */
426 #ifndef RSS
427 static void
428 ena_sysctl_add_rss(struct ena_adapter *adapter)
429 {
430 device_t dev;
431
432 struct sysctl_ctx_list *ctx;
433 struct sysctl_oid *tree;
434 struct sysctl_oid_list *child;
435
436 dev = adapter->pdev;
437
438 ctx = device_get_sysctl_ctx(dev);
439 tree = device_get_sysctl_tree(dev);
440 child = SYSCTL_CHILDREN(tree);
441
442 /* RSS options */
443 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss",
444 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options.");
445 child = SYSCTL_CHILDREN(tree);
446
447 /* RSS hash key */
448 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key",
449 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
450 ena_sysctl_rss_key, "A", "RSS key.");
451
452 /* Tuneable RSS indirection table */
453 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table",
454 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
455 ena_sysctl_rss_indir_table, "A", "RSS indirection table.");
456
457 /* RSS indirection table size */
458 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size",
459 CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0,
460 "RSS indirection table size.");
461 }
462 #endif /* RSS */
463
464
465 /*
466 * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes.
467 *
468 * Whether the nodes are registered or unregistered depends on a delta between
469 * the `old` and `new` parameters, representing the number of queues.
470 *
471 * This function is used to hide sysctl attributes for queue nodes which aren't
472 * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`).
473 *
474 * NOTE:
475 * All unregistered nodes must be registered again at detach, i.e. by a call to
476 * this function.
477 */
478 void
479 ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new)
480 {
481 device_t dev;
482 struct sysctl_oid *oid;
483 int min, max, i;
484
485 dev = adapter->pdev;
486 min = MIN(old, new);
487 max = MIN(MAX(old, new), adapter->max_num_io_queues);
488
489 for (i = min; i < max; ++i) {
490 oid = adapter->que[i].oid;
491
492 sysctl_wlock();
493 if (old > new)
494 sysctl_unregister_oid(oid);
495 else
496 sysctl_register_oid(oid);
497 sysctl_wunlock();
498 }
499 }
500
501 static int
502 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS)
503 {
504 struct ena_adapter *adapter = arg1;
505 uint32_t val;
506 int error;
507
508 ENA_LOCK_LOCK();
509 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
510 error = EINVAL;
511 goto unlock;
512 }
513
514 val = 0;
515 error = sysctl_wire_old_buffer(req, sizeof(val));
516 if (error == 0) {
517 val = adapter->buf_ring_size;
518 error = sysctl_handle_32(oidp, &val, 0, req);
519 }
520 if (error != 0 || req->newptr == NULL)
521 goto unlock;
522
523 if (!powerof2(val) || val == 0) {
524 ena_log(adapter->pdev, ERR,
525 "Requested new Tx buffer ring size (%u) is not a power of 2\n",
526 val);
527 error = EINVAL;
528 goto unlock;
529 }
530
531 if (val != adapter->buf_ring_size) {
532 ena_log(adapter->pdev, INFO,
533 "Requested new Tx buffer ring size: %d. Old size: %d\n",
534 val, adapter->buf_ring_size);
535
536 error = ena_update_buf_ring_size(adapter, val);
537 } else {
538 ena_log(adapter->pdev, ERR,
539 "New Tx buffer ring size is the same as already used: %u\n",
540 adapter->buf_ring_size);
541 }
542
543 unlock:
544 ENA_LOCK_UNLOCK();
545
546 return (error);
547 }
548
549 static int
550 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS)
551 {
552 struct ena_adapter *adapter = arg1;
553 uint32_t val;
554 int error;
555
556 ENA_LOCK_LOCK();
557 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
558 error = EINVAL;
559 goto unlock;
560 }
561
562 val = 0;
563 error = sysctl_wire_old_buffer(req, sizeof(val));
564 if (error == 0) {
565 val = adapter->requested_rx_ring_size;
566 error = sysctl_handle_32(oidp, &val, 0, req);
567 }
568 if (error != 0 || req->newptr == NULL)
569 goto unlock;
570
571 if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) {
572 ena_log(adapter->pdev, ERR,
573 "Requested new Rx queue size (%u) is out of range: [%u, %u]\n",
574 val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size);
575 error = EINVAL;
576 goto unlock;
577 }
578
579 /* Check if the parameter is power of 2 */
580 if (!powerof2(val)) {
581 ena_log(adapter->pdev, ERR,
582 "Requested new Rx queue size (%u) is not a power of 2\n",
583 val);
584 error = EINVAL;
585 goto unlock;
586 }
587
588 if (val != adapter->requested_rx_ring_size) {
589 ena_log(adapter->pdev, INFO,
590 "Requested new Rx queue size: %u. Old size: %u\n",
591 val, adapter->requested_rx_ring_size);
592
593 error = ena_update_queue_size(adapter,
594 adapter->requested_tx_ring_size, val);
595 } else {
596 ena_log(adapter->pdev, ERR,
597 "New Rx queue size is the same as already used: %u\n",
598 adapter->requested_rx_ring_size);
599 }
600
601 unlock:
602 ENA_LOCK_UNLOCK();
603
604 return (error);
605 }
606
607 /*
608 * Change number of effectively used IO queues adapter->num_io_queues
609 */
610 static int
611 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS)
612 {
613 struct ena_adapter *adapter = arg1;
614 uint32_t old_num_queues, tmp = 0;
615 int error;
616
617 ENA_LOCK_LOCK();
618 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
619 error = EINVAL;
620 goto unlock;
621 }
622
623 error = sysctl_wire_old_buffer(req, sizeof(tmp));
624 if (error == 0) {
625 tmp = adapter->num_io_queues;
626 error = sysctl_handle_int(oidp, &tmp, 0, req);
627 }
628 if (error != 0 || req->newptr == NULL)
629 goto unlock;
630
631 if (tmp == 0) {
632 ena_log(adapter->pdev, ERR,
633 "Requested number of IO queues is zero\n");
634 error = EINVAL;
635 goto unlock;
636 }
637
638 /*
639 * The adapter::max_num_io_queues is the HW capability. The system
640 * resources availability may potentially be a tighter limit. Therefore
641 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs`
642 * always holds true, while the `adapter::msix_vecs` is variable across
643 * device reset (`ena_destroy_device()` + `ena_restore_device()`).
644 */
645 if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) {
646 ena_log(adapter->pdev, ERR,
647 "Requested number of IO queues is higher than maximum "
648 "allowed (%u)\n", adapter->msix_vecs - ENA_ADMIN_MSIX_VEC);
649 error = EINVAL;
650 goto unlock;
651 }
652 if (tmp == adapter->num_io_queues) {
653 ena_log(adapter->pdev, ERR,
654 "Requested number of IO queues is equal to current value "
655 "(%u)\n", adapter->num_io_queues);
656 } else {
657 ena_log(adapter->pdev, INFO,
658 "Requested new number of IO queues: %u, current value: "
659 "%u\n", tmp, adapter->num_io_queues);
660
661 old_num_queues = adapter->num_io_queues;
662 error = ena_update_io_queue_nb(adapter, tmp);
663 if (error != 0)
664 return (error);
665
666 ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp);
667 }
668
669 unlock:
670 ENA_LOCK_UNLOCK();
671
672 return (error);
673 }
674
675 static int
676 ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS)
677 {
678 struct ena_adapter *adapter = arg1;
679 uint16_t interval;
680 int error;
681
682 ENA_LOCK_LOCK();
683 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
684 error = EINVAL;
685 goto unlock;
686 }
687
688 error = sysctl_wire_old_buffer(req, sizeof(interval));
689 if (error == 0) {
690 interval = adapter->eni_metrics_sample_interval;
691 error = sysctl_handle_16(oidp, &interval, 0, req);
692 }
693 if (error != 0 || req->newptr == NULL)
694 goto unlock;
695
696 if (interval > ENI_METRICS_MAX_SAMPLE_INTERVAL) {
697 ena_log(adapter->pdev, ERR,
698 "ENI metrics update interval is out of range - maximum allowed value: %d seconds\n",
699 ENI_METRICS_MAX_SAMPLE_INTERVAL);
700 error = EINVAL;
701 goto unlock;
702 }
703
704 if (interval == 0) {
705 ena_log(adapter->pdev, INFO,
706 "ENI metrics update is now turned off\n");
707 bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics));
708 } else {
709 ena_log(adapter->pdev, INFO,
710 "ENI metrics update interval is set to: %"PRIu16" seconds\n",
711 interval);
712 }
713
714 adapter->eni_metrics_sample_interval = interval;
715
716 unlock:
717 ENA_LOCK_UNLOCK();
718
719 return (0);
720 }
721
722 #ifndef RSS
723 /*
724 * Change the Receive Side Scaling hash key.
725 */
726 static int
727 ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS)
728 {
729 struct ena_adapter *adapter = arg1;
730 struct ena_com_dev *ena_dev = adapter->ena_dev;
731 enum ena_admin_hash_functions ena_func;
732 char msg[ENA_HASH_KEY_MSG_SIZE];
733 char elem[3] = { 0 };
734 char *endp;
735 u8 rss_key[ENA_HASH_KEY_SIZE];
736 int error, i;
737
738 ENA_LOCK_LOCK();
739 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
740 error = EINVAL;
741 goto unlock;
742 }
743
744 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
745 error = ENOTSUP;
746 goto unlock;
747 }
748
749 error = sysctl_wire_old_buffer(req, sizeof(msg));
750 if (error != 0)
751 goto unlock;
752
753 error = ena_com_get_hash_function(adapter->ena_dev, &ena_func);
754 if (error != 0) {
755 device_printf(adapter->pdev, "Cannot get hash function\n");
756 goto unlock;
757 }
758
759 if (ena_func != ENA_ADMIN_TOEPLITZ) {
760 error = EINVAL;
761 device_printf(adapter->pdev, "Unsupported hash algorithm\n");
762 goto unlock;
763 }
764
765 error = ena_rss_get_hash_key(ena_dev, rss_key);
766 if (error != 0) {
767 device_printf(adapter->pdev, "Cannot get hash key\n");
768 goto unlock;
769 }
770
771 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i)
772 snprintf(&msg[i * 2], 3, "%02x", rss_key[i]);
773
774 error = sysctl_handle_string(oidp, msg, sizeof(msg), req);
775 if (error != 0 || req->newptr == NULL)
776 goto unlock;
777
778 if (strlen(msg) != sizeof(msg) - 1) {
779 error = EINVAL;
780 device_printf(adapter->pdev, "Invalid key size\n");
781 goto unlock;
782 }
783
784 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) {
785 strncpy(elem, &msg[i * 2], 2);
786 rss_key[i] = strtol(elem, &endp, 16);
787
788 /* Both hex nibbles in the string must be valid to continue. */
789 if (endp == elem || *endp != '\0' || rss_key[i] < 0) {
790 error = EINVAL;
791 device_printf(adapter->pdev,
792 "Invalid key hex value: '%c'\n", *endp);
793 goto unlock;
794 }
795 }
796
797 error = ena_rss_set_hash(ena_dev, rss_key);
798 if (error != 0)
799 device_printf(adapter->pdev, "Cannot fill hash key\n");
800
801 unlock:
802 ENA_LOCK_UNLOCK();
803
804 return (error);
805 }
806
807 /*
808 * Change the Receive Side Scaling indirection table.
809 *
810 * The sysctl entry string consists of one or more `x:y` keypairs, where
811 * x stands for the table index and y for its new value.
812 * Table indices that don't need to be updated can be omitted from the string
813 * and will retain their existing values. If an index is entered more than once,
814 * the last value is used.
815 *
816 * Example:
817 * To update two selected indices in the RSS indirection table, e.g. setting
818 * index 0 to queue 5 and then index 5 to queue 0, the below command should be
819 * used:
820 * sysctl dev.ena.0.rss.indir_table="0:5 5:0"
821 */
822 static int
823 ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS)
824 {
825 int num_queues, error;
826 struct ena_adapter *adapter = arg1;
827 struct ena_com_dev *ena_dev;
828 struct ena_indir *indir;
829 char *msg, *buf, *endp;
830 uint32_t idx, value;
831
832 ENA_LOCK_LOCK();
833 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
834 error = EINVAL;
835 goto unlock;
836 }
837
838 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
839 error = ENOTSUP;
840 goto unlock;
841 }
842
843 ena_dev = adapter->ena_dev;
844 indir = adapter->rss_indir;
845 msg = indir->sysctl_buf;
846
847 if (unlikely(indir == NULL)) {
848 error = ENOTSUP;
849 goto unlock;
850 }
851
852 error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req);
853 if (error != 0 || req->newptr == NULL)
854 goto unlock;
855
856 num_queues = adapter->num_io_queues;
857
858 /*
859 * This sysctl expects msg to be a list of `x:y` record pairs,
860 * where x is the indirection table index and y is its value.
861 */
862 for (buf = msg; *buf != '\0'; buf = endp) {
863 idx = strtol(buf, &endp, 10);
864
865 if (endp == buf || idx < 0) {
866 device_printf(adapter->pdev, "Invalid index: %s\n",
867 buf);
868 error = EINVAL;
869 break;
870 }
871
872 if (idx >= ENA_RX_RSS_TABLE_SIZE) {
873 device_printf(adapter->pdev, "Index %d out of range\n",
874 idx);
875 error = ERANGE;
876 break;
877 }
878
879 buf = endp;
880
881 if (*buf++ != ':') {
882 device_printf(adapter->pdev, "Missing ':' separator\n");
883 error = EINVAL;
884 break;
885 }
886
887 value = strtol(buf, &endp, 10);
888
889 if (endp == buf || value < 0) {
890 device_printf(adapter->pdev, "Invalid value: %s\n",
891 buf);
892 error = EINVAL;
893 break;
894 }
895
896 if (value >= num_queues) {
897 device_printf(adapter->pdev, "Value %d out of range\n",
898 value);
899 error = ERANGE;
900 break;
901 }
902
903 indir->table[idx] = value;
904 }
905
906 if (error != 0) /* Reload indirection table with last good data. */
907 ena_rss_indir_get(adapter, indir->table);
908
909 /* At this point msg has been clobbered by sysctl_handle_string. */
910 ena_rss_copy_indir_buf(msg, indir->table);
911
912 if (error == 0)
913 error = ena_rss_indir_set(adapter, indir->table);
914
915 unlock:
916 ENA_LOCK_UNLOCK();
917
918 return (error);
919 }
920 #endif /* RSS */
Cache object: 8fda07fb449766f30ac0d3d8fdfd731d
|