The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/ena/ena_sysctl.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause
    3  *
    4  * Copyright (c) 2015-2021 Amazon.com, Inc. or its affiliates.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  *
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  *
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
   22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
   28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   29  */
   30 #include <sys/cdefs.h>
   31 #include <sys/param.h>
   32 __FBSDID("$FreeBSD$");
   33 
   34 #include "opt_rss.h"
   35 
   36 #include "ena_sysctl.h"
   37 #include "ena_rss.h"
   38 
   39 static void     ena_sysctl_add_wd(struct ena_adapter *);
   40 static void     ena_sysctl_add_stats(struct ena_adapter *);
   41 static void     ena_sysctl_add_eni_metrics(struct ena_adapter *);
   42 static void     ena_sysctl_add_tuneables(struct ena_adapter *);
   43 /* Kernel option RSS prevents manipulation of key hash and indirection table. */
   44 #ifndef RSS
   45 static void     ena_sysctl_add_rss(struct ena_adapter *);
   46 #endif
   47 static int      ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS);
   48 static int      ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS);
   49 static int      ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS);
   50 static int      ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS);
   51 #ifndef RSS
   52 static int      ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS);
   53 static int      ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS);
   54 #endif
   55 
   56 /* Limit max ENI sample rate to be an hour. */
   57 #define ENI_METRICS_MAX_SAMPLE_INTERVAL 3600
   58 #define ENA_HASH_KEY_MSG_SIZE           (ENA_HASH_KEY_SIZE * 2 + 1)
   59 
   60 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
   61     "ENA driver parameters");
   62 
   63 /*
   64  * Logging level for changing verbosity of the output
   65  */
   66 int ena_log_level = ENA_INFO;
   67 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN,
   68     &ena_log_level, 0, "Logging level indicating verbosity of the logs");
   69 
   70 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD,
   71     DRV_MODULE_VERSION, "ENA driver version");
   72 
   73 /*
   74  * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead).
   75  * Using 9k mbufs in low memory conditions might cause allocation to take a lot
   76  * of time and lead to the OS instability as it needs to look for the contiguous
   77  * pages.
   78  * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if
   79  * the network performance is the priority, the 9k mbufs can be used.
   80  */
   81 int ena_enable_9k_mbufs = 0;
   82 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN,
   83     &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors");
   84 
   85 /*
   86  * Force the driver to use large LLQ (Low Latency Queue) header. Defaults to
   87  * false. This option may be important for platforms, which often handle packet
   88  * headers on Tx with total header size greater than 96B, as it may
   89  * reduce the latency.
   90  * It also reduces the maximum Tx queue size by half, so it may cause more Tx
   91  * packet drops.
   92  */
   93 bool ena_force_large_llq_header = false;
   94 SYSCTL_BOOL(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN,
   95     &ena_force_large_llq_header, 0,
   96     "Increases maximum supported header size in LLQ mode to 224 bytes, while reducing the maximum Tx queue size by half.\n");
   97 
   98 int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE;
   99 
  100 void
  101 ena_sysctl_add_nodes(struct ena_adapter *adapter)
  102 {
  103         ena_sysctl_add_wd(adapter);
  104         ena_sysctl_add_stats(adapter);
  105         ena_sysctl_add_eni_metrics(adapter);
  106         ena_sysctl_add_tuneables(adapter);
  107 #ifndef RSS
  108         ena_sysctl_add_rss(adapter);
  109 #endif
  110 }
  111 
  112 static void
  113 ena_sysctl_add_wd(struct ena_adapter *adapter)
  114 {
  115         device_t dev;
  116 
  117         struct sysctl_ctx_list *ctx;
  118         struct sysctl_oid *tree;
  119         struct sysctl_oid_list *child;
  120 
  121         dev = adapter->pdev;
  122 
  123         ctx = device_get_sysctl_ctx(dev);
  124         tree = device_get_sysctl_tree(dev);
  125         child = SYSCTL_CHILDREN(tree);
  126 
  127         /* Sysctl calls for Watchdog service */
  128         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active",
  129             CTLFLAG_RWTUN, &adapter->wd_active, 0,
  130             "Watchdog is active");
  131 
  132         SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout",
  133             CTLFLAG_RWTUN, &adapter->keep_alive_timeout,
  134             "Timeout for Keep Alive messages");
  135 
  136         SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout",
  137             CTLFLAG_RWTUN, &adapter->missing_tx_timeout,
  138             "Timeout for TX completion");
  139 
  140         SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues",
  141             CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0,
  142             "Number of TX queues to check per run");
  143 
  144         SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold",
  145             CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0,
  146             "Max number of timeouted packets");
  147 }
  148 
  149 static void
  150 ena_sysctl_add_stats(struct ena_adapter *adapter)
  151 {
  152         device_t dev;
  153 
  154         struct ena_ring *tx_ring;
  155         struct ena_ring *rx_ring;
  156 
  157         struct ena_hw_stats *hw_stats;
  158         struct ena_stats_dev *dev_stats;
  159         struct ena_stats_tx *tx_stats;
  160         struct ena_stats_rx *rx_stats;
  161         struct ena_com_stats_admin *admin_stats;
  162 
  163         struct sysctl_ctx_list *ctx;
  164         struct sysctl_oid *tree;
  165         struct sysctl_oid_list *child;
  166 
  167         struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node;
  168         struct sysctl_oid *admin_node;
  169         struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list;
  170         struct sysctl_oid_list *admin_list;
  171 
  172 #define QUEUE_NAME_LEN 32
  173         char namebuf[QUEUE_NAME_LEN];
  174         int i;
  175 
  176         dev = adapter->pdev;
  177 
  178         ctx = device_get_sysctl_ctx(dev);
  179         tree = device_get_sysctl_tree(dev);
  180         child = SYSCTL_CHILDREN(tree);
  181 
  182         tx_ring = adapter->tx_ring;
  183         rx_ring = adapter->rx_ring;
  184 
  185         hw_stats = &adapter->hw_stats;
  186         dev_stats = &adapter->dev_stats;
  187         admin_stats = &adapter->ena_dev->admin_queue.stats;
  188 
  189         SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired",
  190             CTLFLAG_RD, &dev_stats->wd_expired,
  191             "Watchdog expiry count");
  192         SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up",
  193             CTLFLAG_RD, &dev_stats->interface_up,
  194             "Network interface up count");
  195         SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down",
  196             CTLFLAG_RD, &dev_stats->interface_down,
  197             "Network interface down count");
  198         SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause",
  199             CTLFLAG_RD, &dev_stats->admin_q_pause,
  200             "Admin queue pauses");
  201 
  202         for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) {
  203                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
  204 
  205                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
  206                     namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name");
  207                 queue_list = SYSCTL_CHILDREN(queue_node);
  208 
  209                 adapter->que[i].oid = queue_node;
  210 
  211                 /* TX specific stats */
  212                 tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO,
  213                     "tx_ring", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring");
  214                 tx_list = SYSCTL_CHILDREN(tx_node);
  215 
  216                 tx_stats = &tx_ring->tx_stats;
  217 
  218                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  219                     "count", CTLFLAG_RD,
  220                     &tx_stats->cnt, "Packets sent");
  221                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  222                     "bytes", CTLFLAG_RD,
  223                     &tx_stats->bytes, "Bytes sent");
  224                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  225                     "prepare_ctx_err", CTLFLAG_RD,
  226                     &tx_stats->prepare_ctx_err,
  227                     "TX buffer preparation failures");
  228                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  229                     "dma_mapping_err", CTLFLAG_RD,
  230                     &tx_stats->dma_mapping_err, "DMA mapping failures");
  231                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  232                     "doorbells", CTLFLAG_RD,
  233                     &tx_stats->doorbells, "Queue doorbells");
  234                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  235                     "missing_tx_comp", CTLFLAG_RD,
  236                     &tx_stats->missing_tx_comp, "TX completions missed");
  237                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  238                     "bad_req_id", CTLFLAG_RD,
  239                     &tx_stats->bad_req_id, "Bad request id count");
  240                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  241                         "mbuf_collapses", CTLFLAG_RD,
  242                         &tx_stats->collapse,
  243                         "Mbuf collapse count");
  244                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  245                         "mbuf_collapse_err", CTLFLAG_RD,
  246                         &tx_stats->collapse_err,
  247                         "Mbuf collapse failures");
  248                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  249                     "queue_wakeups", CTLFLAG_RD,
  250                     &tx_stats->queue_wakeup, "Queue wakeups");
  251                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  252                     "queue_stops", CTLFLAG_RD,
  253                     &tx_stats->queue_stop, "Queue stops");
  254                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  255                     "llq_buffer_copy", CTLFLAG_RD,
  256                     &tx_stats->llq_buffer_copy,
  257                     "Header copies for llq transaction");
  258                 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
  259                     "unmask_interrupt_num", CTLFLAG_RD,
  260                     &tx_stats->unmask_interrupt_num,
  261                     "Unmasked interrupt count");
  262 
  263                 /* RX specific stats */
  264                 rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO,
  265                     "rx_ring", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring");
  266                 rx_list = SYSCTL_CHILDREN(rx_node);
  267 
  268                 rx_stats = &rx_ring->rx_stats;
  269 
  270                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
  271                     "count", CTLFLAG_RD,
  272                     &rx_stats->cnt, "Packets received");
  273                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
  274                     "bytes", CTLFLAG_RD,
  275                     &rx_stats->bytes, "Bytes received");
  276                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
  277                     "refil_partial", CTLFLAG_RD,
  278                     &rx_stats->refil_partial, "Partial refilled mbufs");
  279                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
  280                     "csum_bad", CTLFLAG_RD,
  281                     &rx_stats->csum_bad, "Bad RX checksum");
  282                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
  283                     "mbuf_alloc_fail", CTLFLAG_RD,
  284                     &rx_stats->mbuf_alloc_fail, "Failed mbuf allocs");
  285                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
  286                     "mjum_alloc_fail", CTLFLAG_RD,
  287                     &rx_stats->mjum_alloc_fail, "Failed jumbo mbuf allocs");
  288                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
  289                     "dma_mapping_err", CTLFLAG_RD,
  290                     &rx_stats->dma_mapping_err, "DMA mapping errors");
  291                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
  292                     "bad_desc_num", CTLFLAG_RD,
  293                     &rx_stats->bad_desc_num, "Bad descriptor count");
  294                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
  295                     "bad_req_id", CTLFLAG_RD,
  296                     &rx_stats->bad_req_id, "Bad request id count");
  297                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
  298                     "empty_rx_ring", CTLFLAG_RD,
  299                     &rx_stats->empty_rx_ring, "RX descriptors depletion count");
  300                 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
  301                     "csum_good", CTLFLAG_RD,
  302                     &rx_stats->csum_good, "Valid RX checksum calculations");
  303         }
  304 
  305         /* Stats read from device */
  306         hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats",
  307             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware");
  308         hw_list = SYSCTL_CHILDREN(hw_node);
  309 
  310         SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD,
  311             &hw_stats->rx_packets, "Packets received");
  312         SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD,
  313             &hw_stats->tx_packets, "Packets transmitted");
  314         SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD,
  315             &hw_stats->rx_bytes, "Bytes received");
  316         SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD,
  317             &hw_stats->tx_bytes, "Bytes transmitted");
  318         SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD,
  319             &hw_stats->rx_drops, "Receive packet drops");
  320         SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD,
  321             &hw_stats->tx_drops, "Transmit packet drops");
  322 
  323         /* ENA Admin queue stats */
  324         admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats",
  325             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics");
  326         admin_list = SYSCTL_CHILDREN(admin_node);
  327 
  328         SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD,
  329             &admin_stats->aborted_cmd, 0, "Aborted commands");
  330         SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD,
  331             &admin_stats->submitted_cmd, 0, "Submitted commands");
  332         SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD,
  333             &admin_stats->completed_cmd, 0, "Completed commands");
  334         SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD,
  335             &admin_stats->out_of_space, 0, "Queue out of space");
  336         SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD,
  337             &admin_stats->no_completion, 0, "Commands not completed");
  338 }
  339 
  340 static void
  341 ena_sysctl_add_eni_metrics(struct ena_adapter *adapter)
  342 {
  343         device_t dev;
  344         struct ena_admin_eni_stats *eni_metrics;
  345 
  346         struct sysctl_ctx_list *ctx;
  347         struct sysctl_oid *tree;
  348         struct sysctl_oid_list *child;
  349 
  350         struct sysctl_oid *eni_node;
  351         struct sysctl_oid_list *eni_list;
  352 
  353         dev = adapter->pdev;
  354 
  355         ctx = device_get_sysctl_ctx(dev);
  356         tree = device_get_sysctl_tree(dev);
  357         child = SYSCTL_CHILDREN(tree);
  358 
  359         eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics",
  360             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics");
  361         eni_list = SYSCTL_CHILDREN(eni_node);
  362 
  363         eni_metrics = &adapter->eni_metrics;
  364 
  365         SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded",
  366             CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0,
  367             "Inbound BW allowance exceeded");
  368         SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded",
  369             CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0,
  370             "Outbound BW allowance exceeded");
  371         SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded",
  372             CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0,
  373             "PPS allowance exceeded");
  374         SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded",
  375             CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0,
  376             "Connection tracking allowance exceeded");
  377         SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded",
  378             CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0,
  379             "Linklocal packet rate allowance exceeded");
  380 
  381         /*
  382          * Tuneable, which determines how often ENI metrics will be read.
  383          * 0 means it's turned off. Maximum allowed value is limited by:
  384          * ENI_METRICS_MAX_SAMPLE_INTERVAL.
  385          */
  386         SYSCTL_ADD_PROC(ctx, eni_list, OID_AUTO, "sample_interval",
  387             CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
  388             ena_sysctl_eni_metrics_interval, "SU",
  389             "Interval in seconds for updating ENI emetrics. 0 turns off the update.");
  390 }
  391 
  392 static void
  393 ena_sysctl_add_tuneables(struct ena_adapter *adapter)
  394 {
  395         device_t dev;
  396 
  397         struct sysctl_ctx_list *ctx;
  398         struct sysctl_oid *tree;
  399         struct sysctl_oid_list *child;
  400 
  401         dev = adapter->pdev;
  402 
  403         ctx = device_get_sysctl_ctx(dev);
  404         tree = device_get_sysctl_tree(dev);
  405         child = SYSCTL_CHILDREN(tree);
  406 
  407         /* Tuneable number of buffers in the buf-ring (drbr) */
  408         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size",
  409             CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
  410             ena_sysctl_buf_ring_size, "I",
  411             "Size of the Tx buffer ring (drbr).");
  412 
  413         /* Tuneable number of the Rx ring size */
  414         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size",
  415             CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
  416             ena_sysctl_rx_queue_size, "I",
  417             "Size of the Rx ring. The size should be a power of 2.");
  418 
  419         /* Tuneable number of IO queues */
  420         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb",
  421             CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
  422             ena_sysctl_io_queues_nb, "I", "Number of IO queues.");
  423 }
  424 
  425 /* Kernel option RSS prevents manipulation of key hash and indirection table. */
  426 #ifndef RSS
  427 static void
  428 ena_sysctl_add_rss(struct ena_adapter *adapter)
  429 {
  430         device_t dev;
  431 
  432         struct sysctl_ctx_list *ctx;
  433         struct sysctl_oid *tree;
  434         struct sysctl_oid_list *child;
  435 
  436         dev = adapter->pdev;
  437 
  438         ctx = device_get_sysctl_ctx(dev);
  439         tree = device_get_sysctl_tree(dev);
  440         child = SYSCTL_CHILDREN(tree);
  441 
  442         /* RSS options */
  443         tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss",
  444             CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options.");
  445         child = SYSCTL_CHILDREN(tree);
  446 
  447         /* RSS hash key */
  448         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key",
  449             CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
  450             ena_sysctl_rss_key, "A", "RSS key.");
  451 
  452         /* Tuneable RSS indirection table */
  453         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table",
  454             CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
  455             ena_sysctl_rss_indir_table, "A", "RSS indirection table.");
  456 
  457         /* RSS indirection table size */
  458         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size",
  459             CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0,
  460             "RSS indirection table size.");
  461 }
  462 #endif /* RSS */
  463 
  464 
  465 /*
  466  * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes.
  467  *
  468  * Whether the nodes are registered or unregistered depends on a delta between
  469  * the `old` and `new` parameters, representing the number of queues.
  470  *
  471  * This function is used to hide sysctl attributes for queue nodes which aren't
  472  * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`).
  473  *
  474  * NOTE:
  475  * All unregistered nodes must be registered again at detach, i.e. by a call to
  476  * this function.
  477  */
  478 void
  479 ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new)
  480 {
  481         device_t dev;
  482         struct sysctl_oid *oid;
  483         int min, max, i;
  484 
  485         dev = adapter->pdev;
  486         min = MIN(old, new);
  487         max = MIN(MAX(old, new), adapter->max_num_io_queues);
  488 
  489         for (i = min; i < max; ++i) {
  490                 oid = adapter->que[i].oid;
  491 
  492                 sysctl_wlock();
  493                 if (old > new)
  494                         sysctl_unregister_oid(oid);
  495                 else
  496                         sysctl_register_oid(oid);
  497                 sysctl_wunlock();
  498         }
  499 }
  500 
  501 static int
  502 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS)
  503 {
  504         struct ena_adapter *adapter = arg1;
  505         uint32_t val;
  506         int error;
  507 
  508         ENA_LOCK_LOCK();
  509         if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
  510                 error = EINVAL;
  511                 goto unlock;
  512         }
  513 
  514         val = 0;
  515         error = sysctl_wire_old_buffer(req, sizeof(val));
  516         if (error == 0) {
  517                 val = adapter->buf_ring_size;
  518                 error = sysctl_handle_32(oidp, &val, 0, req);
  519         }
  520         if (error != 0 || req->newptr == NULL)
  521                 goto unlock;
  522 
  523         if (!powerof2(val) || val == 0) {
  524                 ena_log(adapter->pdev, ERR,
  525                     "Requested new Tx buffer ring size (%u) is not a power of 2\n",
  526                     val);
  527                 error = EINVAL;
  528                 goto unlock;
  529         }
  530 
  531         if (val != adapter->buf_ring_size) {
  532                 ena_log(adapter->pdev, INFO,
  533                     "Requested new Tx buffer ring size: %d. Old size: %d\n",
  534                     val, adapter->buf_ring_size);
  535 
  536                 error = ena_update_buf_ring_size(adapter, val);
  537         } else {
  538                 ena_log(adapter->pdev, ERR,
  539                     "New Tx buffer ring size is the same as already used: %u\n",
  540                     adapter->buf_ring_size);
  541         }
  542 
  543 unlock:
  544         ENA_LOCK_UNLOCK();
  545 
  546         return (error);
  547 }
  548 
  549 static int
  550 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS)
  551 {
  552         struct ena_adapter *adapter = arg1;
  553         uint32_t val;
  554         int error;
  555 
  556         ENA_LOCK_LOCK();
  557         if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
  558                 error = EINVAL;
  559                 goto unlock;
  560         }
  561 
  562         val = 0;
  563         error = sysctl_wire_old_buffer(req, sizeof(val));
  564         if (error == 0) {
  565                 val = adapter->requested_rx_ring_size;
  566                 error = sysctl_handle_32(oidp, &val, 0, req);
  567         }
  568         if (error != 0 || req->newptr == NULL)
  569                 goto unlock;
  570 
  571         if  (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) {
  572                 ena_log(adapter->pdev, ERR,
  573                     "Requested new Rx queue size (%u) is out of range: [%u, %u]\n",
  574                     val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size);
  575                 error = EINVAL;
  576                 goto unlock;
  577         }
  578 
  579         /* Check if the parameter is power of 2 */
  580         if (!powerof2(val)) {
  581                 ena_log(adapter->pdev, ERR,
  582                     "Requested new Rx queue size (%u) is not a power of 2\n",
  583                     val);
  584                 error = EINVAL;
  585                 goto unlock;
  586         }
  587 
  588         if (val != adapter->requested_rx_ring_size) {
  589                 ena_log(adapter->pdev, INFO,
  590                     "Requested new Rx queue size: %u. Old size: %u\n",
  591                     val, adapter->requested_rx_ring_size);
  592 
  593                 error = ena_update_queue_size(adapter,
  594                     adapter->requested_tx_ring_size, val);
  595         } else {
  596                 ena_log(adapter->pdev, ERR,
  597                     "New Rx queue size is the same as already used: %u\n",
  598                     adapter->requested_rx_ring_size);
  599         }
  600 
  601 unlock:
  602         ENA_LOCK_UNLOCK();
  603 
  604         return (error);
  605 }
  606 
  607 /*
  608  * Change number of effectively used IO queues adapter->num_io_queues
  609  */
  610 static int
  611 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS)
  612 {
  613         struct ena_adapter *adapter = arg1;
  614         uint32_t old_num_queues, tmp = 0;
  615         int error;
  616 
  617         ENA_LOCK_LOCK();
  618         if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
  619                 error = EINVAL;
  620                 goto unlock;
  621         }
  622 
  623         error = sysctl_wire_old_buffer(req, sizeof(tmp));
  624         if (error == 0) {
  625                 tmp = adapter->num_io_queues;
  626                 error = sysctl_handle_int(oidp, &tmp, 0, req);
  627         }
  628         if (error != 0 || req->newptr == NULL)
  629                 goto unlock;
  630 
  631         if (tmp == 0) {
  632                 ena_log(adapter->pdev, ERR,
  633                     "Requested number of IO queues is zero\n");
  634                 error = EINVAL;
  635                 goto unlock;
  636         }
  637 
  638         /*
  639          * The adapter::max_num_io_queues is the HW capability. The system
  640          * resources availability may potentially be a tighter limit. Therefore
  641          * the relation `adapter::max_num_io_queues >= adapter::msix_vecs`
  642          * always holds true, while the `adapter::msix_vecs` is variable across
  643          * device reset (`ena_destroy_device()` + `ena_restore_device()`).
  644          */
  645         if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) {
  646                 ena_log(adapter->pdev, ERR,
  647                     "Requested number of IO queues is higher than maximum "
  648                     "allowed (%u)\n", adapter->msix_vecs - ENA_ADMIN_MSIX_VEC);
  649                 error = EINVAL;
  650                 goto unlock;
  651         }
  652         if (tmp == adapter->num_io_queues) {
  653                 ena_log(adapter->pdev, ERR,
  654                     "Requested number of IO queues is equal to current value "
  655                     "(%u)\n", adapter->num_io_queues);
  656         } else {
  657                 ena_log(adapter->pdev, INFO,
  658                     "Requested new number of IO queues: %u, current value: "
  659                     "%u\n", tmp, adapter->num_io_queues);
  660 
  661                 old_num_queues = adapter->num_io_queues;
  662                 error = ena_update_io_queue_nb(adapter, tmp);
  663                 if (error != 0)
  664                         return (error);
  665 
  666                 ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp);
  667         }
  668 
  669 unlock:
  670         ENA_LOCK_UNLOCK();
  671 
  672         return (error);
  673 }
  674 
  675 static int
  676 ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS)
  677 {
  678         struct ena_adapter *adapter = arg1;
  679         uint16_t interval;
  680         int error;
  681 
  682         ENA_LOCK_LOCK();
  683         if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
  684                 error = EINVAL;
  685                 goto unlock;
  686         }
  687 
  688         error = sysctl_wire_old_buffer(req, sizeof(interval));
  689         if (error == 0) {
  690                 interval = adapter->eni_metrics_sample_interval;
  691                 error = sysctl_handle_16(oidp, &interval, 0, req);
  692         }
  693         if (error != 0 || req->newptr == NULL)
  694                 goto unlock;
  695 
  696         if (interval > ENI_METRICS_MAX_SAMPLE_INTERVAL) {
  697                 ena_log(adapter->pdev, ERR,
  698                     "ENI metrics update interval is out of range - maximum allowed value: %d seconds\n",
  699                     ENI_METRICS_MAX_SAMPLE_INTERVAL);
  700                 error = EINVAL;
  701                 goto unlock;
  702         }
  703 
  704         if (interval == 0) {
  705                 ena_log(adapter->pdev, INFO,
  706                     "ENI metrics update is now turned off\n");
  707                 bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics));
  708         } else {
  709                 ena_log(adapter->pdev, INFO,
  710                     "ENI metrics update interval is set to: %"PRIu16" seconds\n",
  711                     interval);
  712         }
  713 
  714         adapter->eni_metrics_sample_interval = interval;
  715 
  716 unlock:
  717         ENA_LOCK_UNLOCK();
  718 
  719         return (0);
  720 }
  721 
  722 #ifndef RSS
  723 /*
  724  * Change the Receive Side Scaling hash key.
  725  */
  726 static int
  727 ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS)
  728 {
  729         struct ena_adapter *adapter = arg1;
  730         struct ena_com_dev *ena_dev = adapter->ena_dev;
  731         enum ena_admin_hash_functions ena_func;
  732         char msg[ENA_HASH_KEY_MSG_SIZE];
  733         char elem[3] = { 0 };
  734         char *endp;
  735         u8 rss_key[ENA_HASH_KEY_SIZE];
  736         int error, i;
  737 
  738         ENA_LOCK_LOCK();
  739         if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
  740                 error = EINVAL;
  741                 goto unlock;
  742         }
  743 
  744         if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
  745                 error = ENOTSUP;
  746                 goto unlock;
  747         }
  748 
  749         error = sysctl_wire_old_buffer(req, sizeof(msg));
  750         if (error != 0)
  751                 goto unlock;
  752 
  753         error = ena_com_get_hash_function(adapter->ena_dev, &ena_func);
  754         if (error != 0) {
  755                 device_printf(adapter->pdev, "Cannot get hash function\n");
  756                 goto unlock;
  757         }
  758 
  759         if (ena_func != ENA_ADMIN_TOEPLITZ) {
  760                 error = EINVAL;
  761                 device_printf(adapter->pdev, "Unsupported hash algorithm\n");
  762                 goto unlock;
  763         }
  764 
  765         error = ena_rss_get_hash_key(ena_dev, rss_key);
  766         if (error != 0) {
  767                 device_printf(adapter->pdev, "Cannot get hash key\n");
  768                 goto unlock;
  769         }
  770 
  771         for (i = 0; i < ENA_HASH_KEY_SIZE; ++i)
  772                 snprintf(&msg[i * 2], 3, "%02x", rss_key[i]);
  773 
  774         error = sysctl_handle_string(oidp, msg, sizeof(msg), req);
  775         if (error != 0 || req->newptr == NULL)
  776                 goto unlock;
  777 
  778         if (strlen(msg) != sizeof(msg) - 1) {
  779                 error = EINVAL;
  780                 device_printf(adapter->pdev, "Invalid key size\n");
  781                 goto unlock;
  782         }
  783 
  784         for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) {
  785                 strncpy(elem, &msg[i * 2], 2);
  786                 rss_key[i] = strtol(elem, &endp, 16);
  787 
  788                 /* Both hex nibbles in the string must be valid to continue. */
  789                 if (endp == elem || *endp != '\0' || rss_key[i] < 0) {
  790                         error = EINVAL;
  791                         device_printf(adapter->pdev,
  792                             "Invalid key hex value: '%c'\n", *endp);
  793                         goto unlock;
  794                 }
  795         }
  796 
  797         error = ena_rss_set_hash(ena_dev, rss_key);
  798         if (error != 0)
  799                 device_printf(adapter->pdev, "Cannot fill hash key\n");
  800 
  801 unlock:
  802         ENA_LOCK_UNLOCK();
  803 
  804         return (error);
  805 }
  806 
  807 /*
  808  * Change the Receive Side Scaling indirection table.
  809  *
  810  * The sysctl entry string consists of one or more `x:y` keypairs, where
  811  * x stands for the table index and y for its new value.
  812  * Table indices that don't need to be updated can be omitted from the string
  813  * and will retain their existing values. If an index is entered more than once,
  814  * the last value is used.
  815  *
  816  * Example:
  817  * To update two selected indices in the RSS indirection table, e.g. setting
  818  * index 0 to queue 5 and then index 5 to queue 0, the below command should be
  819  * used:
  820  *   sysctl dev.ena.0.rss.indir_table="0:5 5:0"
  821  */
  822 static int
  823 ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS)
  824 {
  825         int num_queues, error;
  826         struct ena_adapter *adapter = arg1;
  827         struct ena_com_dev *ena_dev;
  828         struct ena_indir *indir;
  829         char *msg, *buf, *endp;
  830         uint32_t idx, value;
  831 
  832         ENA_LOCK_LOCK();
  833         if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
  834                 error = EINVAL;
  835                 goto unlock;
  836         }
  837 
  838         if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
  839                 error = ENOTSUP;
  840                 goto unlock;
  841         }
  842 
  843         ena_dev = adapter->ena_dev;
  844         indir = adapter->rss_indir;
  845         msg = indir->sysctl_buf;
  846 
  847         if (unlikely(indir == NULL)) {
  848                 error = ENOTSUP;
  849                 goto unlock;
  850         }
  851 
  852         error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req);
  853         if (error != 0 || req->newptr == NULL)
  854                 goto unlock;
  855 
  856         num_queues = adapter->num_io_queues;
  857 
  858         /*
  859          * This sysctl expects msg to be a list of `x:y` record pairs,
  860          * where x is the indirection table index and y is its value.
  861          */
  862         for (buf = msg; *buf != '\0'; buf = endp) {
  863                 idx = strtol(buf, &endp, 10);
  864 
  865                 if (endp == buf || idx < 0) {
  866                         device_printf(adapter->pdev, "Invalid index: %s\n",
  867                             buf);
  868                         error = EINVAL;
  869                         break;
  870                 }
  871 
  872                 if (idx >= ENA_RX_RSS_TABLE_SIZE) {
  873                         device_printf(adapter->pdev, "Index %d out of range\n",
  874                             idx);
  875                         error = ERANGE;
  876                         break;
  877                 }
  878 
  879                 buf = endp;
  880 
  881                 if (*buf++ != ':') {
  882                         device_printf(adapter->pdev, "Missing ':' separator\n");
  883                         error = EINVAL;
  884                         break;
  885                 }
  886 
  887                 value = strtol(buf, &endp, 10);
  888 
  889                 if (endp == buf || value < 0) {
  890                         device_printf(adapter->pdev, "Invalid value: %s\n",
  891                             buf);
  892                         error = EINVAL;
  893                         break;
  894                 }
  895 
  896                 if (value >= num_queues) {
  897                         device_printf(adapter->pdev, "Value %d out of range\n",
  898                             value);
  899                         error = ERANGE;
  900                         break;
  901                 }
  902 
  903                 indir->table[idx] = value;
  904         }
  905 
  906         if (error != 0) /* Reload indirection table with last good data. */
  907                 ena_rss_indir_get(adapter, indir->table);
  908 
  909         /* At this point msg has been clobbered by sysctl_handle_string. */
  910         ena_rss_copy_indir_buf(msg, indir->table);
  911 
  912         if (error == 0)
  913                 error = ena_rss_indir_set(adapter, indir->table);
  914 
  915 unlock:
  916         ENA_LOCK_UNLOCK();
  917 
  918         return (error);
  919 }
  920 #endif /* RSS */

Cache object: 8fda07fb449766f30ac0d3d8fdfd731d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.