| 
     1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause
    3  *
    4  * Copyright (c) 2015-2021 Amazon.com, Inc. or its affiliates.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  *
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  *
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
   22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
   28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   29  */
   30 #include <sys/cdefs.h>
   31 #include <sys/param.h>
   32 __FBSDID("$FreeBSD$");
   33 
   34 #include "opt_rss.h"
   35 
   36 #include "ena_rss.h"
   37 #include "ena_sysctl.h"
   38 
   39 static void ena_sysctl_add_wd(struct ena_adapter *);
   40 static void ena_sysctl_add_stats(struct ena_adapter *);
   41 static void ena_sysctl_add_eni_metrics(struct ena_adapter *);
   42 static void ena_sysctl_add_tuneables(struct ena_adapter *);
   43 /* Kernel option RSS prevents manipulation of key hash and indirection table. */
   44 #ifndef RSS
   45 static void ena_sysctl_add_rss(struct ena_adapter *);
   46 #endif
   47 static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS);
   48 static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS);
   49 static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS);
   50 static int ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS);
   51 #ifndef RSS
   52 static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS);
   53 static int ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS);
   54 #endif
   55 
   56 /* Limit max ENI sample rate to be an hour. */
   57 #define ENI_METRICS_MAX_SAMPLE_INTERVAL 3600
   58 #define ENA_HASH_KEY_MSG_SIZE (ENA_HASH_KEY_SIZE * 2 + 1)
   59 
   60 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
   61     "ENA driver parameters");
   62 
   63 /*
   64  * Logging level for changing verbosity of the output
   65  */
   66 int ena_log_level = ENA_INFO;
   67 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN, &ena_log_level, 0,
   68     "Logging level indicating verbosity of the logs");
   69 
   70 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD,
   71     ENA_DRV_MODULE_VERSION, "ENA driver version");
   72 
   73 /*
   74  * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead).
   75  * Using 9k mbufs in low memory conditions might cause allocation to take a lot
   76  * of time and lead to the OS instability as it needs to look for the contiguous
   77  * pages.
   78  * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if
   79  * the network performance is the priority, the 9k mbufs can be used.
   80  */
   81 int ena_enable_9k_mbufs = 0;
   82 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN,
   83     &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors");
   84 
   85 /*
   86  * Force the driver to use large LLQ (Low Latency Queue) header. Defaults to
   87  * false. This option may be important for platforms, which often handle packet
   88  * headers on Tx with total header size greater than 96B, as it may
   89  * reduce the latency.
   90  * It also reduces the maximum Tx queue size by half, so it may cause more Tx
   91  * packet drops.
   92  */
   93 bool ena_force_large_llq_header = false;
   94 SYSCTL_BOOL(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN,
   95     &ena_force_large_llq_header, 0,
   96     "Increases maximum supported header size in LLQ mode to 224 bytes, while reducing the maximum Tx queue size by half.\n");
   97 
   98 int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE;
   99 
  100 void
  101 ena_sysctl_add_nodes(struct ena_adapter *adapter)
  102 {
  103         ena_sysctl_add_wd(adapter);
  104         ena_sysctl_add_stats(adapter);
  105         ena_sysctl_add_eni_metrics(adapter);
  106         ena_sysctl_add_tuneables(adapter);
  107 #ifndef RSS
  108         ena_sysctl_add_rss(adapter);
  109 #endif
  110 }
  111 
  112 static void
  113 ena_sysctl_add_wd(struct ena_adapter *adapter)
  114 {
  115         device_t dev;
  116 
  117         struct sysctl_ctx_list *ctx;
  118         struct sysctl_oid *tree;
  119         struct sysctl_oid_list *child;
  120 
  121         dev = adapter->pdev;
  122 
  123         ctx = device_get_sysctl_ctx(dev);
  124         tree = device_get_sysctl_tree(dev);
  125         child = SYSCTL_CHILDREN(tree);
  126 
  127         /* Sysctl calls for Watchdog service */
  128         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active", CTLFLAG_RWTUN,
  129             &adapter->wd_active, 0, "Watchdog is active");
  130 
  131         SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout",
  132             CTLFLAG_RWTUN, &adapter->keep_alive_timeout,
  133             "Timeout for Keep Alive messages");
  134 
  135         SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout",
  136             CTLFLAG_RWTUN, &adapter->missing_tx_timeout,
  137             "Timeout for TX completion");
  138 
  139         SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues",
  140             CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0,
  141             "Number of TX queues to check per run");
  142 
  143         SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold",
  144             CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0,
  145             "Max number of timeouted packets");
  146 }
  147 
  148 static void
  149 ena_sysctl_add_stats(struct ena_adapter *adapter)
  150 {
  151         device_t dev;
  152 
  153         struct ena_ring *tx_ring;
  154         struct ena_ring *rx_ring;
  155 
  156         struct ena_hw_stats *hw_stats;
  157         struct ena_stats_dev *dev_stats;
  158         struct ena_stats_tx *tx_stats;
  159         struct ena_stats_rx *rx_stats;
  160         struct ena_com_stats_admin *admin_stats;
  161 
  162         struct sysctl_ctx_list *ctx;
  163         struct sysctl_oid *tree;
  164         struct sysctl_oid_list *child;
  165 
  166         struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node;
  167         struct sysctl_oid *admin_node;
  168         struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list;
  169         struct sysctl_oid_list *admin_list;
  170 
  171 #define QUEUE_NAME_LEN 32
  172         char namebuf[QUEUE_NAME_LEN];
  173         int i;
  174 
  175         dev = adapter->pdev;
  176 
  177         ctx = device_get_sysctl_ctx(dev);
  178         tree = device_get_sysctl_tree(dev);
  179         child = SYSCTL_CHILDREN(tree);
  180 
  181         tx_ring = adapter->tx_ring;
  182         rx_ring = adapter->rx_ring;
  183 
  184         hw_stats = &adapter->hw_stats;
  185         dev_stats = &adapter->dev_stats;
  186         admin_stats = &adapter->ena_dev->admin_queue.stats;
  187 
  188         SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired", CTLFLAG_RD,
  189             &dev_stats->wd_expired, "Watchdog expiry count");
  190         SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up", CTLFLAG_RD,
  191             &dev_stats->interface_up, "Network interface up count");
  192         SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down",
  193             CTLFLAG_RD, &dev_stats->interface_down,
  194             "Network interface down count");
  195         SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause",
  196             CTLFLAG_RD, &dev_stats->admin_q_pause, "Admin queue pauses");
  197 
  198         for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) {
  199                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
  200 
  201                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
  202                     CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name");
  203                 queue_list = SYSCTL_CHILDREN(queue_node);
  204 
  205                 adapter->que[i].oid = queue_node;
  206 
  207 #ifdef RSS
  208                 /* Common stats */
  209                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "cpu", CTLFLAG_RD,
  210                     &adapter->que[i].cpu, 0, "CPU affinity");
  211                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "domain", CTLFLAG_RD,
  212                     &adapter->que[i].domain, 0, "NUMA domain");
  213 #endif
  214 
  215                 /* TX specific stats */
  216                 tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "tx_ring",
  217                     CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring");
  218                 tx_list = SYSCTL_CHILDREN(tx_node);
  219 
  220                 tx_stats = &tx_ring->tx_stats;
  221 
  222                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "count",
  223                     CTLFLAG_RD, &tx_stats->cnt, "Packets sent");
  224                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bytes",
  225                     CTLFLAG_RD, &tx_stats->bytes, "Bytes sent");
  226                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  227                     "prepare_ctx_err", CTLFLAG_RD, &tx_stats->prepare_ctx_err,
  228                     "TX buffer preparation failures");
  229                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  230                     "dma_mapping_err", CTLFLAG_RD, &tx_stats->dma_mapping_err,
  231                     "DMA mapping failures");
  232                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "doorbells",
  233                     CTLFLAG_RD, &tx_stats->doorbells, "Queue doorbells");
  234                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  235                     "missing_tx_comp", CTLFLAG_RD, &tx_stats->missing_tx_comp,
  236                     "TX completions missed");
  237                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bad_req_id",
  238                     CTLFLAG_RD, &tx_stats->bad_req_id, "Bad request id count");
  239                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "mbuf_collapses",
  240                     CTLFLAG_RD, &tx_stats->collapse, "Mbuf collapse count");
  241                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  242                     "mbuf_collapse_err", CTLFLAG_RD, &tx_stats->collapse_err,
  243                     "Mbuf collapse failures");
  244                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_wakeups",
  245                     CTLFLAG_RD, &tx_stats->queue_wakeup, "Queue wakeups");
  246                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_stops",
  247                     CTLFLAG_RD, &tx_stats->queue_stop, "Queue stops");
  248                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  249                     "llq_buffer_copy", CTLFLAG_RD, &tx_stats->llq_buffer_copy,
  250                     "Header copies for llq transaction");
  251                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  252                     "unmask_interrupt_num", CTLFLAG_RD,
  253                     &tx_stats->unmask_interrupt_num,
  254                     "Unmasked interrupt count");
  255 
  256                 /* RX specific stats */
  257                 rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "rx_ring",
  258                     CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring");
  259                 rx_list = SYSCTL_CHILDREN(rx_node);
  260 
  261                 rx_stats = &rx_ring->rx_stats;
  262 
  263                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "count",
  264                     CTLFLAG_RD, &rx_stats->cnt, "Packets received");
  265                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bytes",
  266                     CTLFLAG_RD, &rx_stats->bytes, "Bytes received");
  267                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "refil_partial",
  268                     CTLFLAG_RD, &rx_stats->refil_partial,
  269                     "Partial refilled mbufs");
  270                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_bad",
  271                     CTLFLAG_RD, &rx_stats->csum_bad, "Bad RX checksum");
  272                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
  273                     "mbuf_alloc_fail", CTLFLAG_RD, &rx_stats->mbuf_alloc_fail,
  274                     "Failed mbuf allocs");
  275                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
  276                     "mjum_alloc_fail", CTLFLAG_RD, &rx_stats->mjum_alloc_fail,
  277                     "Failed jumbo mbuf allocs");
  278                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
  279                     "dma_mapping_err", CTLFLAG_RD, &rx_stats->dma_mapping_err,
  280                     "DMA mapping errors");
  281                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_desc_num",
  282                     CTLFLAG_RD, &rx_stats->bad_desc_num,
  283                     "Bad descriptor count");
  284                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_req_id",
  285                     CTLFLAG_RD, &rx_stats->bad_req_id, "Bad request id count");
  286                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "empty_rx_ring",
  287                     CTLFLAG_RD, &rx_stats->empty_rx_ring,
  288                     "RX descriptors depletion count");
  289                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_good",
  290                     CTLFLAG_RD, &rx_stats->csum_good,
  291                     "Valid RX checksum calculations");
  292         }
  293 
  294         /* Stats read from device */
  295         hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats",
  296             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware");
  297         hw_list = SYSCTL_CHILDREN(hw_node);
  298 
  299         SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD,
  300             &hw_stats->rx_packets, "Packets received");
  301         SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD,
  302             &hw_stats->tx_packets, "Packets transmitted");
  303         SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD,
  304             &hw_stats->rx_bytes, "Bytes received");
  305         SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD,
  306             &hw_stats->tx_bytes, "Bytes transmitted");
  307         SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD,
  308             &hw_stats->rx_drops, "Receive packet drops");
  309         SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD,
  310             &hw_stats->tx_drops, "Transmit packet drops");
  311 
  312         /* ENA Admin queue stats */
  313         admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats",
  314             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics");
  315         admin_list = SYSCTL_CHILDREN(admin_node);
  316 
  317         SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD,
  318             &admin_stats->aborted_cmd, 0, "Aborted commands");
  319         SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD,
  320             &admin_stats->submitted_cmd, 0, "Submitted commands");
  321         SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD,
  322             &admin_stats->completed_cmd, 0, "Completed commands");
  323         SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD,
  324             &admin_stats->out_of_space, 0, "Queue out of space");
  325         SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD,
  326             &admin_stats->no_completion, 0, "Commands not completed");
  327 }
  328 
  329 static void
  330 ena_sysctl_add_eni_metrics(struct ena_adapter *adapter)
  331 {
  332         device_t dev;
  333         struct ena_admin_eni_stats *eni_metrics;
  334 
  335         struct sysctl_ctx_list *ctx;
  336         struct sysctl_oid *tree;
  337         struct sysctl_oid_list *child;
  338 
  339         struct sysctl_oid *eni_node;
  340         struct sysctl_oid_list *eni_list;
  341 
  342         dev = adapter->pdev;
  343 
  344         ctx = device_get_sysctl_ctx(dev);
  345         tree = device_get_sysctl_tree(dev);
  346         child = SYSCTL_CHILDREN(tree);
  347 
  348         eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics",
  349             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics");
  350         eni_list = SYSCTL_CHILDREN(eni_node);
  351 
  352         eni_metrics = &adapter->eni_metrics;
  353 
  354         SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded",
  355             CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0,
  356             "Inbound BW allowance exceeded");
  357         SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded",
  358             CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0,
  359             "Outbound BW allowance exceeded");
  360         SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded",
  361             CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0,
  362             "PPS allowance exceeded");
  363         SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded",
  364             CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0,
  365             "Connection tracking allowance exceeded");
  366         SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded",
  367             CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0,
  368             "Linklocal packet rate allowance exceeded");
  369 
  370         /*
  371          * Tuneable, which determines how often ENI metrics will be read.
  372          * 0 means it's turned off. Maximum allowed value is limited by:
  373          * ENI_METRICS_MAX_SAMPLE_INTERVAL.
  374          */
  375         SYSCTL_ADD_PROC(ctx, eni_list, OID_AUTO, "sample_interval",
  376             CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
  377             ena_sysctl_eni_metrics_interval, "SU",
  378             "Interval in seconds for updating ENI emetrics. 0 turns off the update.");
  379 }
  380 
  381 static void
  382 ena_sysctl_add_tuneables(struct ena_adapter *adapter)
  383 {
  384         device_t dev;
  385 
  386         struct sysctl_ctx_list *ctx;
  387         struct sysctl_oid *tree;
  388         struct sysctl_oid_list *child;
  389 
  390         dev = adapter->pdev;
  391 
  392         ctx = device_get_sysctl_ctx(dev);
  393         tree = device_get_sysctl_tree(dev);
  394         child = SYSCTL_CHILDREN(tree);
  395 
  396         /* Tuneable number of buffers in the buf-ring (drbr) */
  397         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size",
  398             CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
  399             ena_sysctl_buf_ring_size, "I",
  400             "Size of the Tx buffer ring (drbr).");
  401 
  402         /* Tuneable number of the Rx ring size */
  403         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size",
  404             CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
  405             ena_sysctl_rx_queue_size, "I",
  406             "Size of the Rx ring. The size should be a power of 2.");
  407 
  408         /* Tuneable number of IO queues */
  409         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb",
  410             CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
  411             ena_sysctl_io_queues_nb, "I", "Number of IO queues.");
  412 }
  413 
  414 /* Kernel option RSS prevents manipulation of key hash and indirection table. */
  415 #ifndef RSS
  416 static void
  417 ena_sysctl_add_rss(struct ena_adapter *adapter)
  418 {
  419         device_t dev;
  420 
  421         struct sysctl_ctx_list *ctx;
  422         struct sysctl_oid *tree;
  423         struct sysctl_oid_list *child;
  424 
  425         dev = adapter->pdev;
  426 
  427         ctx = device_get_sysctl_ctx(dev);
  428         tree = device_get_sysctl_tree(dev);
  429         child = SYSCTL_CHILDREN(tree);
  430 
  431         /* RSS options */
  432         tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss",
  433             CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options.");
  434         child = SYSCTL_CHILDREN(tree);
  435 
  436         /* RSS hash key */
  437         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key",
  438             CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
  439             ena_sysctl_rss_key, "A", "RSS key.");
  440 
  441         /* Tuneable RSS indirection table */
  442         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table",
  443             CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
  444             ena_sysctl_rss_indir_table, "A", "RSS indirection table.");
  445 
  446         /* RSS indirection table size */
  447         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size",
  448             CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0,
  449             "RSS indirection table size.");
  450 }
  451 #endif /* RSS */
  452 
  453 
  454 /*
  455  * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes.
  456  *
  457  * Whether the nodes are registered or unregistered depends on a delta between
  458  * the `old` and `new` parameters, representing the number of queues.
  459  *
  460  * This function is used to hide sysctl attributes for queue nodes which aren't
  461  * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`).
  462  *
  463  * NOTE:
  464  * All unregistered nodes must be registered again at detach, i.e. by a call to
  465  * this function.
  466  */
  467 void
  468 ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new)
  469 {
  470         struct sysctl_oid *oid;
  471         int min, max, i;
  472 
  473         min = MIN(old, new);
  474         max = MIN(MAX(old, new), adapter->max_num_io_queues);
  475 
  476         for (i = min; i < max; ++i) {
  477                 oid = adapter->que[i].oid;
  478 
  479                 sysctl_wlock();
  480                 if (old > new)
  481                         sysctl_unregister_oid(oid);
  482                 else
  483                         sysctl_register_oid(oid);
  484                 sysctl_wunlock();
  485         }
  486 }
  487 
  488 static int
  489 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS)
  490 {
  491         struct ena_adapter *adapter = arg1;
  492         uint32_t val;
  493         int error;
  494 
  495         ENA_LOCK_LOCK();
  496         if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
  497                 error = EINVAL;
  498                 goto unlock;
  499         }
  500 
  501         val = 0;
  502         error = sysctl_wire_old_buffer(req, sizeof(val));
  503         if (error == 0) {
  504                 val = adapter->buf_ring_size;
  505                 error = sysctl_handle_32(oidp, &val, 0, req);
  506         }
  507         if (error != 0 || req->newptr == NULL)
  508                 goto unlock;
  509 
  510         if (!powerof2(val) || val == 0) {
  511                 ena_log(adapter->pdev, ERR,
  512                     "Requested new Tx buffer ring size (%u) is not a power of 2\n",
  513                     val);
  514                 error = EINVAL;
  515                 goto unlock;
  516         }
  517 
  518         if (val != adapter->buf_ring_size) {
  519                 ena_log(adapter->pdev, INFO,
  520                     "Requested new Tx buffer ring size: %d. Old size: %d\n",
  521                     val, adapter->buf_ring_size);
  522 
  523                 error = ena_update_buf_ring_size(adapter, val);
  524         } else {
  525                 ena_log(adapter->pdev, ERR,
  526                     "New Tx buffer ring size is the same as already used: %u\n",
  527                     adapter->buf_ring_size);
  528         }
  529 
  530 unlock:
  531         ENA_LOCK_UNLOCK();
  532 
  533         return (error);
  534 }
  535 
  536 static int
  537 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS)
  538 {
  539         struct ena_adapter *adapter = arg1;
  540         uint32_t val;
  541         int error;
  542 
  543         ENA_LOCK_LOCK();
  544         if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
  545                 error = EINVAL;
  546                 goto unlock;
  547         }
  548 
  549         val = 0;
  550         error = sysctl_wire_old_buffer(req, sizeof(val));
  551         if (error == 0) {
  552                 val = adapter->requested_rx_ring_size;
  553                 error = sysctl_handle_32(oidp, &val, 0, req);
  554         }
  555         if (error != 0 || req->newptr == NULL)
  556                 goto unlock;
  557 
  558         if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) {
  559                 ena_log(adapter->pdev, ERR,
  560                     "Requested new Rx queue size (%u) is out of range: [%u, %u]\n",
  561                     val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size);
  562                 error = EINVAL;
  563                 goto unlock;
  564         }
  565 
  566         /* Check if the parameter is power of 2 */
  567         if (!powerof2(val)) {
  568                 ena_log(adapter->pdev, ERR,
  569                     "Requested new Rx queue size (%u) is not a power of 2\n",
  570                     val);
  571                 error = EINVAL;
  572                 goto unlock;
  573         }
  574 
  575         if (val != adapter->requested_rx_ring_size) {
  576                 ena_log(adapter->pdev, INFO,
  577                     "Requested new Rx queue size: %u. Old size: %u\n", val,
  578                     adapter->requested_rx_ring_size);
  579 
  580                 error = ena_update_queue_size(adapter,
  581                     adapter->requested_tx_ring_size, val);
  582         } else {
  583                 ena_log(adapter->pdev, ERR,
  584                     "New Rx queue size is the same as already used: %u\n",
  585                     adapter->requested_rx_ring_size);
  586         }
  587 
  588 unlock:
  589         ENA_LOCK_UNLOCK();
  590 
  591         return (error);
  592 }
  593 
  594 /*
  595  * Change number of effectively used IO queues adapter->num_io_queues
  596  */
  597 static int
  598 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS)
  599 {
  600         struct ena_adapter *adapter = arg1;
  601         uint32_t old_num_queues, tmp = 0;
  602         int error;
  603 
  604         ENA_LOCK_LOCK();
  605         if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
  606                 error = EINVAL;
  607                 goto unlock;
  608         }
  609 
  610         error = sysctl_wire_old_buffer(req, sizeof(tmp));
  611         if (error == 0) {
  612                 tmp = adapter->num_io_queues;
  613                 error = sysctl_handle_int(oidp, &tmp, 0, req);
  614         }
  615         if (error != 0 || req->newptr == NULL)
  616                 goto unlock;
  617 
  618         if (tmp == 0) {
  619                 ena_log(adapter->pdev, ERR,
  620                     "Requested number of IO queues is zero\n");
  621                 error = EINVAL;
  622                 goto unlock;
  623         }
  624 
  625         /*
  626          * The adapter::max_num_io_queues is the HW capability. The system
  627          * resources availability may potentially be a tighter limit. Therefore
  628          * the relation `adapter::max_num_io_queues >= adapter::msix_vecs`
  629          * always holds true, while the `adapter::msix_vecs` is variable across
  630          * device reset (`ena_destroy_device()` + `ena_restore_device()`).
  631          */
  632         if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) {
  633                 ena_log(adapter->pdev, ERR,
  634                     "Requested number of IO queues is higher than maximum allowed (%u)\n",
  635                     adapter->msix_vecs - ENA_ADMIN_MSIX_VEC);
  636                 error = EINVAL;
  637                 goto unlock;
  638         }
  639         if (tmp == adapter->num_io_queues) {
  640                 ena_log(adapter->pdev, ERR,
  641                     "Requested number of IO queues is equal to current value "
  642                     "(%u)\n",
  643                     adapter->num_io_queues);
  644         } else {
  645                 ena_log(adapter->pdev, INFO,
  646                     "Requested new number of IO queues: %u, current value: "
  647                     "%u\n",
  648                     tmp, adapter->num_io_queues);
  649 
  650                 old_num_queues = adapter->num_io_queues;
  651                 error = ena_update_io_queue_nb(adapter, tmp);
  652                 if (error != 0)
  653                         return (error);
  654 
  655                 ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp);
  656         }
  657 
  658 unlock:
  659         ENA_LOCK_UNLOCK();
  660 
  661         return (error);
  662 }
  663 
  664 static int
  665 ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS)
  666 {
  667         struct ena_adapter *adapter = arg1;
  668         uint16_t interval;
  669         int error;
  670 
  671         ENA_LOCK_LOCK();
  672         if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
  673                 error = EINVAL;
  674                 goto unlock;
  675         }
  676 
  677         error = sysctl_wire_old_buffer(req, sizeof(interval));
  678         if (error == 0) {
  679                 interval = adapter->eni_metrics_sample_interval;
  680                 error = sysctl_handle_16(oidp, &interval, 0, req);
  681         }
  682         if (error != 0 || req->newptr == NULL)
  683                 goto unlock;
  684 
  685         if (interval > ENI_METRICS_MAX_SAMPLE_INTERVAL) {
  686                 ena_log(adapter->pdev, ERR,
  687                     "ENI metrics update interval is out of range - maximum allowed value: %d seconds\n",
  688                     ENI_METRICS_MAX_SAMPLE_INTERVAL);
  689                 error = EINVAL;
  690                 goto unlock;
  691         }
  692 
  693         if (interval == 0) {
  694                 ena_log(adapter->pdev, INFO,
  695                     "ENI metrics update is now turned off\n");
  696                 bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics));
  697         } else {
  698                 ena_log(adapter->pdev, INFO,
  699                     "ENI metrics update interval is set to: %" PRIu16
  700                     " seconds\n",
  701                     interval);
  702         }
  703 
  704         adapter->eni_metrics_sample_interval = interval;
  705 
  706 unlock:
  707         ENA_LOCK_UNLOCK();
  708 
  709         return (0);
  710 }
  711 
  712 #ifndef RSS
  713 /*
  714  * Change the Receive Side Scaling hash key.
  715  */
  716 static int
  717 ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS)
  718 {
  719         struct ena_adapter *adapter = arg1;
  720         struct ena_com_dev *ena_dev = adapter->ena_dev;
  721         enum ena_admin_hash_functions ena_func;
  722         char msg[ENA_HASH_KEY_MSG_SIZE];
  723         char elem[3] = { 0 };
  724         char *endp;
  725         u8 rss_key[ENA_HASH_KEY_SIZE];
  726         int error, i;
  727 
  728         ENA_LOCK_LOCK();
  729         if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
  730                 error = EINVAL;
  731                 goto unlock;
  732         }
  733 
  734         if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
  735                 error = ENOTSUP;
  736                 goto unlock;
  737         }
  738 
  739         error = sysctl_wire_old_buffer(req, sizeof(msg));
  740         if (error != 0)
  741                 goto unlock;
  742 
  743         error = ena_com_get_hash_function(adapter->ena_dev, &ena_func);
  744         if (error != 0) {
  745                 device_printf(adapter->pdev, "Cannot get hash function\n");
  746                 goto unlock;
  747         }
  748 
  749         if (ena_func != ENA_ADMIN_TOEPLITZ) {
  750                 error = EINVAL;
  751                 device_printf(adapter->pdev, "Unsupported hash algorithm\n");
  752                 goto unlock;
  753         }
  754 
  755         error = ena_rss_get_hash_key(ena_dev, rss_key);
  756         if (error != 0) {
  757                 device_printf(adapter->pdev, "Cannot get hash key\n");
  758                 goto unlock;
  759         }
  760 
  761         for (i = 0; i < ENA_HASH_KEY_SIZE; ++i)
  762                 snprintf(&msg[i * 2], 3, "%02x", rss_key[i]);
  763 
  764         error = sysctl_handle_string(oidp, msg, sizeof(msg), req);
  765         if (error != 0 || req->newptr == NULL)
  766                 goto unlock;
  767 
  768         if (strlen(msg) != sizeof(msg) - 1) {
  769                 error = EINVAL;
  770                 device_printf(adapter->pdev, "Invalid key size\n");
  771                 goto unlock;
  772         }
  773 
  774         for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) {
  775                 strncpy(elem, &msg[i * 2], 2);
  776                 rss_key[i] = strtol(elem, &endp, 16);
  777 
  778                 /* Both hex nibbles in the string must be valid to continue. */
  779                 if (endp == elem || *endp != '\0' || rss_key[i] < 0) {
  780                         error = EINVAL;
  781                         device_printf(adapter->pdev,
  782                             "Invalid key hex value: '%c'\n", *endp);
  783                         goto unlock;
  784                 }
  785         }
  786 
  787         error = ena_rss_set_hash(ena_dev, rss_key);
  788         if (error != 0)
  789                 device_printf(adapter->pdev, "Cannot fill hash key\n");
  790 
  791 unlock:
  792         ENA_LOCK_UNLOCK();
  793 
  794         return (error);
  795 }
  796 
  797 /*
  798  * Change the Receive Side Scaling indirection table.
  799  *
  800  * The sysctl entry string consists of one or more `x:y` keypairs, where
  801  * x stands for the table index and y for its new value.
  802  * Table indices that don't need to be updated can be omitted from the string
  803  * and will retain their existing values. If an index is entered more than once,
  804  * the last value is used.
  805  *
  806  * Example:
  807  * To update two selected indices in the RSS indirection table, e.g. setting
  808  * index 0 to queue 5 and then index 5 to queue 0, the below command should be
  809  * used:
  810  *   sysctl dev.ena.0.rss.indir_table="0:5 5:0"
  811  */
  812 static int
  813 ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS)
  814 {
  815         int num_queues, error;
  816         struct ena_adapter *adapter = arg1;
  817         struct ena_indir *indir;
  818         char *msg, *buf, *endp;
  819         uint32_t idx, value;
  820 
  821         ENA_LOCK_LOCK();
  822         if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
  823                 error = EINVAL;
  824                 goto unlock;
  825         }
  826 
  827         if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
  828                 error = ENOTSUP;
  829                 goto unlock;
  830         }
  831 
  832         indir = adapter->rss_indir;
  833         msg = indir->sysctl_buf;
  834 
  835         if (unlikely(indir == NULL)) {
  836                 error = ENOTSUP;
  837                 goto unlock;
  838         }
  839 
  840         error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req);
  841         if (error != 0 || req->newptr == NULL)
  842                 goto unlock;
  843 
  844         num_queues = adapter->num_io_queues;
  845 
  846         /*
  847          * This sysctl expects msg to be a list of `x:y` record pairs,
  848          * where x is the indirection table index and y is its value.
  849          */
  850         for (buf = msg; *buf != '\0'; buf = endp) {
  851                 idx = strtol(buf, &endp, 10);
  852 
  853                 if (endp == buf || idx < 0) {
  854                         device_printf(adapter->pdev, "Invalid index: %s\n",
  855                             buf);
  856                         error = EINVAL;
  857                         break;
  858                 }
  859 
  860                 if (idx >= ENA_RX_RSS_TABLE_SIZE) {
  861                         device_printf(adapter->pdev, "Index %d out of range\n",
  862                             idx);
  863                         error = ERANGE;
  864                         break;
  865                 }
  866 
  867                 buf = endp;
  868 
  869                 if (*buf++ != ':') {
  870                         device_printf(adapter->pdev, "Missing ':' separator\n");
  871                         error = EINVAL;
  872                         break;
  873                 }
  874 
  875                 value = strtol(buf, &endp, 10);
  876 
  877                 if (endp == buf || value < 0) {
  878                         device_printf(adapter->pdev, "Invalid value: %s\n",
  879                             buf);
  880                         error = EINVAL;
  881                         break;
  882                 }
  883 
  884                 if (value >= num_queues) {
  885                         device_printf(adapter->pdev, "Value %d out of range\n",
  886                             value);
  887                         error = ERANGE;
  888                         break;
  889                 }
  890 
  891                 indir->table[idx] = value;
  892         }
  893 
  894         if (error != 0) /* Reload indirection table with last good data. */
  895                 ena_rss_indir_get(adapter, indir->table);
  896 
  897         /* At this point msg has been clobbered by sysctl_handle_string. */
  898         ena_rss_copy_indir_buf(msg, indir->table);
  899 
  900         if (error == 0)
  901                 error = ena_rss_indir_set(adapter, indir->table);
  902 
  903 unlock:
  904         ENA_LOCK_UNLOCK();
  905 
  906         return (error);
  907 }
  908 #endif /* RSS */
Cache object: 829887044bbe29d890dfe3526031e358 
 
 |