The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/mlx5/mlx5_en/mlx5_en_main.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2015-2021 Mellanox Technologies. All rights reserved.
    3  * Copyright (c) 2022 NVIDIA corporation & affiliates.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  *
   26  * $FreeBSD$
   27  */
   28 
   29 #include "opt_kern_tls.h"
   30 #include "opt_rss.h"
   31 #include "opt_ratelimit.h"
   32 
   33 #include <dev/mlx5/mlx5_en/en.h>
   34 
   35 #include <sys/eventhandler.h>
   36 #include <sys/sockio.h>
   37 #include <machine/atomic.h>
   38 
   39 #include <net/debugnet.h>
   40 
   41 static int mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs);
   42 static if_snd_tag_query_t mlx5e_ul_snd_tag_query;
   43 static if_snd_tag_free_t mlx5e_ul_snd_tag_free;
   44 
   45 struct mlx5e_channel_param {
   46         struct mlx5e_rq_param rq;
   47         struct mlx5e_sq_param sq;
   48         struct mlx5e_cq_param rx_cq;
   49         struct mlx5e_cq_param tx_cq;
   50 };
   51 
   52 struct media {
   53         u32     subtype;
   54         u64     baudrate;
   55 };
   56 
   57 static const struct media mlx5e_mode_table[MLX5E_LINK_SPEEDS_NUMBER] =
   58 {
   59         [MLX5E_1000BASE_CX_SGMII] = {
   60                 .subtype = IFM_1000_CX_SGMII,
   61                 .baudrate = IF_Mbps(1000ULL),
   62         },
   63         [MLX5E_1000BASE_KX] = {
   64                 .subtype = IFM_1000_KX,
   65                 .baudrate = IF_Mbps(1000ULL),
   66         },
   67         [MLX5E_10GBASE_CX4] = {
   68                 .subtype = IFM_10G_CX4,
   69                 .baudrate = IF_Gbps(10ULL),
   70         },
   71         [MLX5E_10GBASE_KX4] = {
   72                 .subtype = IFM_10G_KX4,
   73                 .baudrate = IF_Gbps(10ULL),
   74         },
   75         [MLX5E_10GBASE_KR] = {
   76                 .subtype = IFM_10G_KR,
   77                 .baudrate = IF_Gbps(10ULL),
   78         },
   79         [MLX5E_20GBASE_KR2] = {
   80                 .subtype = IFM_20G_KR2,
   81                 .baudrate = IF_Gbps(20ULL),
   82         },
   83         [MLX5E_40GBASE_CR4] = {
   84                 .subtype = IFM_40G_CR4,
   85                 .baudrate = IF_Gbps(40ULL),
   86         },
   87         [MLX5E_40GBASE_KR4] = {
   88                 .subtype = IFM_40G_KR4,
   89                 .baudrate = IF_Gbps(40ULL),
   90         },
   91         [MLX5E_56GBASE_R4] = {
   92                 .subtype = IFM_56G_R4,
   93                 .baudrate = IF_Gbps(56ULL),
   94         },
   95         [MLX5E_10GBASE_CR] = {
   96                 .subtype = IFM_10G_CR1,
   97                 .baudrate = IF_Gbps(10ULL),
   98         },
   99         [MLX5E_10GBASE_SR] = {
  100                 .subtype = IFM_10G_SR,
  101                 .baudrate = IF_Gbps(10ULL),
  102         },
  103         [MLX5E_10GBASE_ER_LR] = {
  104                 .subtype = IFM_10G_ER,
  105                 .baudrate = IF_Gbps(10ULL),
  106         },
  107         [MLX5E_40GBASE_SR4] = {
  108                 .subtype = IFM_40G_SR4,
  109                 .baudrate = IF_Gbps(40ULL),
  110         },
  111         [MLX5E_40GBASE_LR4_ER4] = {
  112                 .subtype = IFM_40G_LR4,
  113                 .baudrate = IF_Gbps(40ULL),
  114         },
  115         [MLX5E_100GBASE_CR4] = {
  116                 .subtype = IFM_100G_CR4,
  117                 .baudrate = IF_Gbps(100ULL),
  118         },
  119         [MLX5E_100GBASE_SR4] = {
  120                 .subtype = IFM_100G_SR4,
  121                 .baudrate = IF_Gbps(100ULL),
  122         },
  123         [MLX5E_100GBASE_KR4] = {
  124                 .subtype = IFM_100G_KR4,
  125                 .baudrate = IF_Gbps(100ULL),
  126         },
  127         [MLX5E_100GBASE_LR4] = {
  128                 .subtype = IFM_100G_LR4,
  129                 .baudrate = IF_Gbps(100ULL),
  130         },
  131         [MLX5E_100BASE_TX] = {
  132                 .subtype = IFM_100_TX,
  133                 .baudrate = IF_Mbps(100ULL),
  134         },
  135         [MLX5E_1000BASE_T] = {
  136                 .subtype = IFM_1000_T,
  137                 .baudrate = IF_Mbps(1000ULL),
  138         },
  139         [MLX5E_10GBASE_T] = {
  140                 .subtype = IFM_10G_T,
  141                 .baudrate = IF_Gbps(10ULL),
  142         },
  143         [MLX5E_25GBASE_CR] = {
  144                 .subtype = IFM_25G_CR,
  145                 .baudrate = IF_Gbps(25ULL),
  146         },
  147         [MLX5E_25GBASE_KR] = {
  148                 .subtype = IFM_25G_KR,
  149                 .baudrate = IF_Gbps(25ULL),
  150         },
  151         [MLX5E_25GBASE_SR] = {
  152                 .subtype = IFM_25G_SR,
  153                 .baudrate = IF_Gbps(25ULL),
  154         },
  155         [MLX5E_50GBASE_CR2] = {
  156                 .subtype = IFM_50G_CR2,
  157                 .baudrate = IF_Gbps(50ULL),
  158         },
  159         [MLX5E_50GBASE_KR2] = {
  160                 .subtype = IFM_50G_KR2,
  161                 .baudrate = IF_Gbps(50ULL),
  162         },
  163         [MLX5E_50GBASE_KR4] = {
  164                 .subtype = IFM_50G_KR4,
  165                 .baudrate = IF_Gbps(50ULL),
  166         },
  167 };
  168 
  169 static const struct media mlx5e_ext_mode_table[MLX5E_EXT_LINK_SPEEDS_NUMBER][MLX5E_CABLE_TYPE_NUMBER] =
  170 {
  171         /**/
  172         [MLX5E_SGMII_100M][MLX5E_CABLE_TYPE_UNKNOWN] = {
  173                 .subtype = IFM_100_SGMII,
  174                 .baudrate = IF_Mbps(100),
  175         },
  176 
  177         /**/
  178         [MLX5E_1000BASE_X_SGMII][MLX5E_CABLE_TYPE_UNKNOWN] = {
  179                 .subtype = IFM_1000_CX,
  180                 .baudrate = IF_Mbps(1000),
  181         },
  182         [MLX5E_1000BASE_X_SGMII][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = {
  183                 .subtype = IFM_1000_SX,
  184                 .baudrate = IF_Mbps(1000),
  185         },
  186 
  187         /**/
  188         [MLX5E_5GBASE_R][MLX5E_CABLE_TYPE_UNKNOWN] = {
  189                 .subtype = IFM_5000_KR,
  190                 .baudrate = IF_Mbps(5000),
  191         },
  192         [MLX5E_5GBASE_R][MLX5E_CABLE_TYPE_TWISTED_PAIR] = {
  193                 .subtype = IFM_5000_T,
  194                 .baudrate = IF_Mbps(5000),
  195         },
  196 
  197         /**/
  198         [MLX5E_10GBASE_XFI_XAUI_1][MLX5E_CABLE_TYPE_UNKNOWN] = {
  199                 .subtype = IFM_10G_KR,
  200                 .baudrate = IF_Gbps(10ULL),
  201         },
  202         [MLX5E_10GBASE_XFI_XAUI_1][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = {
  203                 .subtype = IFM_10G_CR1,
  204                 .baudrate = IF_Gbps(10ULL),
  205         },
  206         [MLX5E_10GBASE_XFI_XAUI_1][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = {
  207                 .subtype = IFM_10G_SR,
  208                 .baudrate = IF_Gbps(10ULL),
  209         },
  210 
  211         /**/
  212         [MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_CABLE_TYPE_UNKNOWN] = {
  213                 .subtype = IFM_40G_KR4,
  214                 .baudrate = IF_Gbps(40ULL),
  215         },
  216         [MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = {
  217                 .subtype = IFM_40G_CR4,
  218                 .baudrate = IF_Gbps(40ULL),
  219         },
  220         [MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = {
  221                 .subtype = IFM_40G_SR4,
  222                 .baudrate = IF_Gbps(40ULL),
  223         },
  224 
  225         /**/
  226         [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_CABLE_TYPE_UNKNOWN] = {
  227                 .subtype = IFM_25G_KR,
  228                 .baudrate = IF_Gbps(25ULL),
  229         },
  230         [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = {
  231                 .subtype = IFM_25G_CR,
  232                 .baudrate = IF_Gbps(25ULL),
  233         },
  234         [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = {
  235                 .subtype = IFM_25G_SR,
  236                 .baudrate = IF_Gbps(25ULL),
  237         },
  238         [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_CABLE_TYPE_TWISTED_PAIR] = {
  239                 .subtype = IFM_25G_T,
  240                 .baudrate = IF_Gbps(25ULL),
  241         },
  242 
  243         /**/
  244         [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_CABLE_TYPE_UNKNOWN] = {
  245                 .subtype = IFM_50G_KR2,
  246                 .baudrate = IF_Gbps(50ULL),
  247         },
  248         [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = {
  249                 .subtype = IFM_50G_CR2,
  250                 .baudrate = IF_Gbps(50ULL),
  251         },
  252         [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = {
  253                 .subtype = IFM_50G_SR2,
  254                 .baudrate = IF_Gbps(50ULL),
  255         },
  256 
  257         /**/
  258         [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_CABLE_TYPE_UNKNOWN] = {
  259                 .subtype = IFM_50G_KR_PAM4,
  260                 .baudrate = IF_Gbps(50ULL),
  261         },
  262         [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = {
  263                 .subtype = IFM_50G_CP,
  264                 .baudrate = IF_Gbps(50ULL),
  265         },
  266         [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = {
  267                 .subtype = IFM_50G_SR,
  268                 .baudrate = IF_Gbps(50ULL),
  269         },
  270 
  271         /**/
  272         [MLX5E_CAUI_4_100GBASE_CR4_KR4][MLX5E_CABLE_TYPE_UNKNOWN] = {
  273                 .subtype = IFM_100G_KR4,
  274                 .baudrate = IF_Gbps(100ULL),
  275         },
  276         [MLX5E_CAUI_4_100GBASE_CR4_KR4][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = {
  277                 .subtype = IFM_100G_CR4,
  278                 .baudrate = IF_Gbps(100ULL),
  279         },
  280         [MLX5E_CAUI_4_100GBASE_CR4_KR4][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = {
  281                 .subtype = IFM_100G_SR4,
  282                 .baudrate = IF_Gbps(100ULL),
  283         },
  284 
  285         /**/
  286         [MLX5E_100GAUI_1_100GBASE_CR_KR][MLX5E_CABLE_TYPE_UNKNOWN] = {
  287                 .subtype = IFM_100G_KR_PAM4,
  288                 .baudrate = IF_Gbps(100ULL),
  289         },
  290         [MLX5E_100GAUI_1_100GBASE_CR_KR][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = {
  291                 .subtype = IFM_100G_CR_PAM4,
  292                 .baudrate = IF_Gbps(100ULL),
  293         },
  294         [MLX5E_100GAUI_1_100GBASE_CR_KR][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = {
  295                 .subtype = IFM_100G_SR2,        /* XXX */
  296                 .baudrate = IF_Gbps(100ULL),
  297         },
  298 
  299         /**/
  300         [MLX5E_100GAUI_2_100GBASE_CR2_KR2][MLX5E_CABLE_TYPE_UNKNOWN] = {
  301                 .subtype = IFM_100G_KR4,
  302                 .baudrate = IF_Gbps(100ULL),
  303         },
  304         [MLX5E_100GAUI_2_100GBASE_CR2_KR2][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = {
  305                 .subtype = IFM_100G_CP2,
  306                 .baudrate = IF_Gbps(100ULL),
  307         },
  308         [MLX5E_100GAUI_2_100GBASE_CR2_KR2][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = {
  309                 .subtype = IFM_100G_SR2,
  310                 .baudrate = IF_Gbps(100ULL),
  311         },
  312 
  313         /**/
  314         [MLX5E_200GAUI_2_200GBASE_CR2_KR2][MLX5E_CABLE_TYPE_UNKNOWN] = {
  315                 .subtype = IFM_200G_KR4_PAM4,   /* XXX */
  316                 .baudrate = IF_Gbps(200ULL),
  317         },
  318         [MLX5E_200GAUI_2_200GBASE_CR2_KR2][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = {
  319                 .subtype = IFM_200G_CR4_PAM4,   /* XXX */
  320                 .baudrate = IF_Gbps(200ULL),
  321         },
  322         [MLX5E_200GAUI_2_200GBASE_CR2_KR2][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = {
  323                 .subtype = IFM_200G_SR4,        /* XXX */
  324                 .baudrate = IF_Gbps(200ULL),
  325         },
  326 
  327         /**/
  328         [MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_CABLE_TYPE_UNKNOWN] = {
  329                 .subtype = IFM_200G_KR4_PAM4,
  330                 .baudrate = IF_Gbps(200ULL),
  331         },
  332         [MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = {
  333                 .subtype = IFM_200G_CR4_PAM4,
  334                 .baudrate = IF_Gbps(200ULL),
  335         },
  336         [MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = {
  337                 .subtype = IFM_200G_SR4,
  338                 .baudrate = IF_Gbps(200ULL),
  339         },
  340 
  341         /**/
  342         [MLX5E_400GAUI_8][MLX5E_CABLE_TYPE_UNKNOWN] = {
  343                 .subtype = IFM_400G_LR8,        /* XXX */
  344                 .baudrate = IF_Gbps(400ULL),
  345         },
  346 
  347         /**/
  348         [MLX5E_400GAUI_4_400GBASE_CR4_KR4][MLX5E_CABLE_TYPE_UNKNOWN] = {
  349                 .subtype = IFM_400G_LR8,        /* XXX */
  350                 .baudrate = IF_Gbps(400ULL),
  351         },
  352 };
  353 
  354 static const struct if_snd_tag_sw mlx5e_ul_snd_tag_sw = {
  355         .snd_tag_query = mlx5e_ul_snd_tag_query,
  356         .snd_tag_free = mlx5e_ul_snd_tag_free,
  357         .type = IF_SND_TAG_TYPE_UNLIMITED
  358 };
  359 
  360 DEBUGNET_DEFINE(mlx5_en);
  361 
  362 MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
  363 
  364 static void
  365 mlx5e_update_carrier(struct mlx5e_priv *priv)
  366 {
  367         struct mlx5_core_dev *mdev = priv->mdev;
  368         u32 out[MLX5_ST_SZ_DW(ptys_reg)];
  369         u32 eth_proto_oper;
  370         int error;
  371         u8 i;
  372         u8 cable_type;
  373         u8 port_state;
  374         u8 is_er_type;
  375         bool ext;
  376         struct media media_entry = {};
  377 
  378         port_state = mlx5_query_vport_state(mdev,
  379             MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
  380 
  381         if (port_state == VPORT_STATE_UP) {
  382                 priv->media_status_last |= IFM_ACTIVE;
  383         } else {
  384                 priv->media_status_last &= ~IFM_ACTIVE;
  385                 priv->media_active_last = IFM_ETHER;
  386                 if_link_state_change(priv->ifp, LINK_STATE_DOWN);
  387                 return;
  388         }
  389 
  390         error = mlx5_query_port_ptys(mdev, out, sizeof(out),
  391             MLX5_PTYS_EN, 1);
  392         if (error) {
  393                 priv->media_active_last = IFM_ETHER;
  394                 priv->ifp->if_baudrate = 1;
  395                 mlx5_en_err(priv->ifp, "query port ptys failed: 0x%x\n",
  396                     error);
  397                 return;
  398         }
  399 
  400         ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
  401         eth_proto_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
  402             eth_proto_oper);
  403 
  404         i = ilog2(eth_proto_oper);
  405 
  406         if (ext) {
  407                 error = mlx5_query_pddr_cable_type(mdev, 1, &cable_type);
  408                 if (error != 0) {
  409                         /* use fallback entry */
  410                         media_entry = mlx5e_ext_mode_table[i][MLX5E_CABLE_TYPE_UNKNOWN];
  411 
  412                         mlx5_en_err(priv->ifp,
  413                             "query port pddr failed: %d\n", error);
  414                 } else {
  415                         media_entry = mlx5e_ext_mode_table[i][cable_type];
  416 
  417                         /* check if we should use fallback entry */
  418                         if (media_entry.subtype == 0)
  419                                 media_entry = mlx5e_ext_mode_table[i][MLX5E_CABLE_TYPE_UNKNOWN];
  420                 }
  421         } else {
  422                 media_entry = mlx5e_mode_table[i];
  423         }
  424 
  425         if (media_entry.subtype == 0) {
  426                 mlx5_en_err(priv->ifp,
  427                     "Could not find operational media subtype\n");
  428                 return;
  429         }
  430 
  431         switch (media_entry.subtype) {
  432         case IFM_10G_ER:
  433                 error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type);
  434                 if (error != 0) {
  435                         mlx5_en_err(priv->ifp,
  436                             "query port pddr failed: %d\n", error);
  437                 }
  438                 if (error != 0 || is_er_type == 0)
  439                         media_entry.subtype = IFM_10G_LR;
  440                 break;
  441         case IFM_40G_LR4:
  442                 error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type);
  443                 if (error != 0) {
  444                         mlx5_en_err(priv->ifp,
  445                             "query port pddr failed: %d\n", error);
  446                 }
  447                 if (error == 0 && is_er_type != 0)
  448                         media_entry.subtype = IFM_40G_ER4;
  449                 break;
  450         }
  451         priv->media_active_last = media_entry.subtype | IFM_ETHER | IFM_FDX;
  452         priv->ifp->if_baudrate = media_entry.baudrate;
  453 
  454         if_link_state_change(priv->ifp, LINK_STATE_UP);
  455 }
  456 
  457 static void
  458 mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
  459 {
  460         struct mlx5e_priv *priv = dev->if_softc;
  461 
  462         ifmr->ifm_status = priv->media_status_last;
  463         ifmr->ifm_current = ifmr->ifm_active = priv->media_active_last |
  464             (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
  465             (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
  466 
  467 }
  468 
  469 static u32
  470 mlx5e_find_link_mode(u32 subtype, bool ext)
  471 {
  472         u32 link_mode = 0;
  473 
  474         switch (subtype) {
  475         case 0:
  476                 goto done;
  477         case IFM_10G_LR:
  478                 subtype = IFM_10G_ER;
  479                 break;
  480         case IFM_40G_ER4:
  481                 subtype = IFM_40G_LR4;
  482                 break;
  483         default:
  484                 break;
  485         }
  486 
  487         if (ext) {
  488                 for (unsigned i = 0; i != MLX5E_EXT_LINK_SPEEDS_NUMBER; i++) {
  489                         for (unsigned j = 0; j != MLX5E_CABLE_TYPE_NUMBER; j++) {
  490                                 if (mlx5e_ext_mode_table[i][j].subtype == subtype)
  491                                         link_mode |= MLX5E_PROT_MASK(i);
  492                         }
  493                 }
  494         } else {
  495                 for (unsigned i = 0; i != MLX5E_LINK_SPEEDS_NUMBER; i++) {
  496                         if (mlx5e_mode_table[i].subtype == subtype)
  497                                 link_mode |= MLX5E_PROT_MASK(i);
  498                 }
  499         }
  500 done:
  501         return (link_mode);
  502 }
  503 
  504 static int
  505 mlx5e_set_port_pause_and_pfc(struct mlx5e_priv *priv)
  506 {
  507         return (mlx5_set_port_pause_and_pfc(priv->mdev, 1,
  508             priv->params.rx_pauseframe_control,
  509             priv->params.tx_pauseframe_control,
  510             priv->params.rx_priority_flow_control,
  511             priv->params.tx_priority_flow_control));
  512 }
  513 
  514 static int
  515 mlx5e_set_port_pfc(struct mlx5e_priv *priv)
  516 {
  517         int error;
  518 
  519         if (priv->gone != 0) {
  520                 error = -ENXIO;
  521         } else if (priv->params.rx_pauseframe_control ||
  522             priv->params.tx_pauseframe_control) {
  523                 mlx5_en_err(priv->ifp,
  524                     "Global pauseframes must be disabled before enabling PFC.\n");
  525                 error = -EINVAL;
  526         } else {
  527                 error = mlx5e_set_port_pause_and_pfc(priv);
  528         }
  529         return (error);
  530 }
  531 
  532 static int
  533 mlx5e_media_change(struct ifnet *dev)
  534 {
  535         struct mlx5e_priv *priv = dev->if_softc;
  536         struct mlx5_core_dev *mdev = priv->mdev;
  537         u32 eth_proto_cap;
  538         u32 link_mode;
  539         u32 out[MLX5_ST_SZ_DW(ptys_reg)];
  540         int was_opened;
  541         int locked;
  542         int error;
  543         bool ext;
  544 
  545         locked = PRIV_LOCKED(priv);
  546         if (!locked)
  547                 PRIV_LOCK(priv);
  548 
  549         if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
  550                 error = EINVAL;
  551                 goto done;
  552         }
  553 
  554         error = mlx5_query_port_ptys(mdev, out, sizeof(out),
  555             MLX5_PTYS_EN, 1);
  556         if (error != 0) {
  557                 mlx5_en_err(dev, "Query port media capability failed\n");
  558                 goto done;
  559         }
  560 
  561         ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
  562         link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media), ext);
  563 
  564         /* query supported capabilities */
  565         eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
  566             eth_proto_capability);
  567 
  568         /* check for autoselect */
  569         if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
  570                 link_mode = eth_proto_cap;
  571                 if (link_mode == 0) {
  572                         mlx5_en_err(dev, "Port media capability is zero\n");
  573                         error = EINVAL;
  574                         goto done;
  575                 }
  576         } else {
  577                 link_mode = link_mode & eth_proto_cap;
  578                 if (link_mode == 0) {
  579                         mlx5_en_err(dev, "Not supported link mode requested\n");
  580                         error = EINVAL;
  581                         goto done;
  582                 }
  583         }
  584         if (priv->media.ifm_media & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
  585                 /* check if PFC is enabled */
  586                 if (priv->params.rx_priority_flow_control ||
  587                     priv->params.tx_priority_flow_control) {
  588                         mlx5_en_err(dev, "PFC must be disabled before enabling global pauseframes.\n");
  589                         error = EINVAL;
  590                         goto done;
  591                 }
  592         }
  593         /* update pauseframe control bits */
  594         priv->params.rx_pauseframe_control =
  595             (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
  596         priv->params.tx_pauseframe_control =
  597             (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
  598 
  599         /* check if device is opened */
  600         was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
  601 
  602         /* reconfigure the hardware */
  603         mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
  604         mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN, ext);
  605         error = -mlx5e_set_port_pause_and_pfc(priv);
  606         if (was_opened)
  607                 mlx5_set_port_status(mdev, MLX5_PORT_UP);
  608 
  609 done:
  610         if (!locked)
  611                 PRIV_UNLOCK(priv);
  612         return (error);
  613 }
  614 
  615 static void
  616 mlx5e_update_carrier_work(struct work_struct *work)
  617 {
  618         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
  619             update_carrier_work);
  620 
  621         PRIV_LOCK(priv);
  622         if (test_bit(MLX5E_STATE_OPENED, &priv->state))
  623                 mlx5e_update_carrier(priv);
  624         PRIV_UNLOCK(priv);
  625 }
  626 
  627 #define MLX5E_PCIE_PERF_GET_64(a,b,c,d,e,f)    \
  628         s_debug->c = MLX5_GET64(mpcnt_reg, out, counter_set.f.c);
  629 
  630 #define MLX5E_PCIE_PERF_GET_32(a,b,c,d,e,f)    \
  631         s_debug->c = MLX5_GET(mpcnt_reg, out, counter_set.f.c);
  632 
  633 static void
  634 mlx5e_update_pcie_counters(struct mlx5e_priv *priv)
  635 {
  636         struct mlx5_core_dev *mdev = priv->mdev;
  637         struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
  638         const unsigned sz = MLX5_ST_SZ_BYTES(mpcnt_reg);
  639         void *out;
  640         void *in;
  641         int err;
  642 
  643         /* allocate firmware request structures */
  644         in = mlx5_vzalloc(sz);
  645         out = mlx5_vzalloc(sz);
  646         if (in == NULL || out == NULL)
  647                 goto free_out;
  648 
  649         MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP);
  650         err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
  651         if (err != 0)
  652                 goto free_out;
  653 
  654         MLX5E_PCIE_PERFORMANCE_COUNTERS_64(MLX5E_PCIE_PERF_GET_64)
  655         MLX5E_PCIE_PERFORMANCE_COUNTERS_32(MLX5E_PCIE_PERF_GET_32)
  656 
  657         MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP);
  658         err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
  659         if (err != 0)
  660                 goto free_out;
  661 
  662         MLX5E_PCIE_TIMERS_AND_STATES_COUNTERS_32(MLX5E_PCIE_PERF_GET_32)
  663 
  664         MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_LANE_COUNTERS_GROUP);
  665         err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
  666         if (err != 0)
  667                 goto free_out;
  668 
  669         MLX5E_PCIE_LANE_COUNTERS_32(MLX5E_PCIE_PERF_GET_32)
  670 
  671 free_out:
  672         /* free firmware request structures */
  673         kvfree(in);
  674         kvfree(out);
  675 }
  676 
  677 /*
  678  * This function reads the physical port counters from the firmware
  679  * using a pre-defined layout defined by various MLX5E_PPORT_XXX()
  680  * macros. The output is converted from big-endian 64-bit values into
  681  * host endian ones and stored in the "priv->stats.pport" structure.
  682  */
  683 static void
  684 mlx5e_update_pport_counters(struct mlx5e_priv *priv)
  685 {
  686         struct mlx5_core_dev *mdev = priv->mdev;
  687         struct mlx5e_pport_stats *s = &priv->stats.pport;
  688         struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
  689         u32 *in;
  690         u32 *out;
  691         const u64 *ptr;
  692         unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
  693         unsigned x;
  694         unsigned y;
  695         unsigned z;
  696 
  697         /* allocate firmware request structures */
  698         in = mlx5_vzalloc(sz);
  699         out = mlx5_vzalloc(sz);
  700         if (in == NULL || out == NULL)
  701                 goto free_out;
  702 
  703         /*
  704          * Get pointer to the 64-bit counter set which is located at a
  705          * fixed offset in the output firmware request structure:
  706          */
  707         ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
  708 
  709         MLX5_SET(ppcnt_reg, in, local_port, 1);
  710 
  711         /* read IEEE802_3 counter group using predefined counter layout */
  712         MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
  713         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
  714         for (x = 0, y = MLX5E_PPORT_PER_PRIO_STATS_NUM;
  715              x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
  716                 s->arg[y] = be64toh(ptr[x]);
  717 
  718         /* read RFC2819 counter group using predefined counter layout */
  719         MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
  720         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
  721         for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
  722                 s->arg[y] = be64toh(ptr[x]);
  723 
  724         for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
  725             MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
  726                 s_debug->arg[y] = be64toh(ptr[x]);
  727 
  728         /* read RFC2863 counter group using predefined counter layout */
  729         MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
  730         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
  731         for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
  732                 s_debug->arg[y] = be64toh(ptr[x]);
  733 
  734         /* read physical layer stats counter group using predefined counter layout */
  735         MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
  736         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
  737         for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
  738                 s_debug->arg[y] = be64toh(ptr[x]);
  739 
  740         /* read Extended Ethernet counter group using predefined counter layout */
  741         MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
  742         mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
  743         for (x = 0; x != MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG_NUM; x++, y++)
  744                 s_debug->arg[y] = be64toh(ptr[x]);
  745 
  746         /* read Extended Statistical Group */
  747         if (MLX5_CAP_GEN(mdev, pcam_reg) &&
  748             MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group) &&
  749             MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) {
  750                 /* read Extended Statistical counter group using predefined counter layout */
  751                 MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP);
  752                 mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
  753 
  754                 for (x = 0; x != MLX5E_PPORT_STATISTICAL_DEBUG_NUM; x++, y++)
  755                         s_debug->arg[y] = be64toh(ptr[x]);
  756         }
  757 
  758         /* read PCIE counters */
  759         mlx5e_update_pcie_counters(priv);
  760 
  761         /* read per-priority counters */
  762         MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
  763 
  764         /* iterate all the priorities */
  765         for (y = z = 0; z != MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO; z++) {
  766                 MLX5_SET(ppcnt_reg, in, prio_tc, z);
  767                 mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
  768 
  769                 /* read per priority stats counter group using predefined counter layout */
  770                 for (x = 0; x != (MLX5E_PPORT_PER_PRIO_STATS_NUM /
  771                     MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO); x++, y++)
  772                         s->arg[y] = be64toh(ptr[x]);
  773         }
  774 
  775 free_out:
  776         /* free firmware request structures */
  777         kvfree(in);
  778         kvfree(out);
  779 }
  780 
  781 static void
  782 mlx5e_grp_vnic_env_update_stats(struct mlx5e_priv *priv)
  783 {
  784         u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {};
  785         u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
  786 
  787         if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
  788                 return;
  789 
  790         MLX5_SET(query_vnic_env_in, in, opcode,
  791             MLX5_CMD_OP_QUERY_VNIC_ENV);
  792         MLX5_SET(query_vnic_env_in, in, op_mod, 0);
  793         MLX5_SET(query_vnic_env_in, in, other_vport, 0);
  794 
  795         if (mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out)) != 0)
  796                 return;
  797 
  798         priv->stats.vport.rx_steer_missed_packets =
  799             MLX5_GET64(query_vnic_env_out, out,
  800             vport_env.nic_receive_steering_discard);
  801 }
  802 
  803 /*
  804  * This function is called regularly to collect all statistics
  805  * counters from the firmware. The values can be viewed through the
  806  * sysctl interface. Execution is serialized using the priv's global
  807  * configuration lock.
  808  */
  809 static void
  810 mlx5e_update_stats_locked(struct mlx5e_priv *priv)
  811 {
  812         struct mlx5_core_dev *mdev = priv->mdev;
  813         struct mlx5e_vport_stats *s = &priv->stats.vport;
  814         struct mlx5e_sq_stats *sq_stats;
  815         u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
  816         u32 *out;
  817         int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
  818         u64 tso_packets = 0;
  819         u64 tso_bytes = 0;
  820         u64 tx_queue_dropped = 0;
  821         u64 tx_defragged = 0;
  822         u64 tx_offload_none = 0;
  823         u64 lro_packets = 0;
  824         u64 lro_bytes = 0;
  825         u64 sw_lro_queued = 0;
  826         u64 sw_lro_flushed = 0;
  827         u64 rx_csum_none = 0;
  828         u64 rx_wqe_err = 0;
  829         u64 rx_packets = 0;
  830         u64 rx_bytes = 0;
  831         u64 rx_decrypted_error = 0;
  832         u64 rx_decrypted_ok = 0;
  833         u32 rx_out_of_buffer = 0;
  834         int error;
  835         int i;
  836         int j;
  837 
  838         out = mlx5_vzalloc(outlen);
  839         if (out == NULL)
  840                 goto free_out;
  841 
  842         /* Collect firts the SW counters and then HW for consistency */
  843         for (i = 0; i < priv->params.num_channels; i++) {
  844                 struct mlx5e_channel *pch = priv->channel + i;
  845                 struct mlx5e_rq *rq = &pch->rq;
  846                 struct mlx5e_rq_stats *rq_stats = &pch->rq.stats;
  847 
  848                 /* collect stats from LRO */
  849                 rq_stats->sw_lro_queued = rq->lro.lro_queued;
  850                 rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
  851                 sw_lro_queued += rq_stats->sw_lro_queued;
  852                 sw_lro_flushed += rq_stats->sw_lro_flushed;
  853                 lro_packets += rq_stats->lro_packets;
  854                 lro_bytes += rq_stats->lro_bytes;
  855                 rx_csum_none += rq_stats->csum_none;
  856                 rx_wqe_err += rq_stats->wqe_err;
  857                 rx_packets += rq_stats->packets;
  858                 rx_bytes += rq_stats->bytes;
  859                 rx_decrypted_error += rq_stats->decrypted_error_packets;
  860                 rx_decrypted_ok += rq_stats->decrypted_ok_packets;
  861 
  862                 for (j = 0; j < priv->num_tc; j++) {
  863                         sq_stats = &pch->sq[j].stats;
  864 
  865                         tso_packets += sq_stats->tso_packets;
  866                         tso_bytes += sq_stats->tso_bytes;
  867                         tx_queue_dropped += sq_stats->dropped;
  868                         tx_queue_dropped += sq_stats->enobuf;
  869                         tx_defragged += sq_stats->defragged;
  870                         tx_offload_none += sq_stats->csum_offload_none;
  871                 }
  872         }
  873 
  874 #ifdef RATELIMIT
  875         /* Collect statistics from all rate-limit queues */
  876         for (j = 0; j < priv->rl.param.tx_worker_threads_def; j++) {
  877                 struct mlx5e_rl_worker *rlw = priv->rl.workers + j;
  878 
  879                 for (i = 0; i < priv->rl.param.tx_channels_per_worker_def; i++) {
  880                         struct mlx5e_rl_channel *channel = rlw->channels + i;
  881                         struct mlx5e_sq *sq = channel->sq;
  882 
  883                         if (sq == NULL)
  884                                 continue;
  885 
  886                         sq_stats = &sq->stats;
  887 
  888                         tso_packets += sq_stats->tso_packets;
  889                         tso_bytes += sq_stats->tso_bytes;
  890                         tx_queue_dropped += sq_stats->dropped;
  891                         tx_queue_dropped += sq_stats->enobuf;
  892                         tx_defragged += sq_stats->defragged;
  893                         tx_offload_none += sq_stats->csum_offload_none;
  894                 }
  895         }
  896 #endif
  897 
  898         /* update counters */
  899         s->tso_packets = tso_packets;
  900         s->tso_bytes = tso_bytes;
  901         s->tx_queue_dropped = tx_queue_dropped;
  902         s->tx_defragged = tx_defragged;
  903         s->lro_packets = lro_packets;
  904         s->lro_bytes = lro_bytes;
  905         s->sw_lro_queued = sw_lro_queued;
  906         s->sw_lro_flushed = sw_lro_flushed;
  907         s->rx_csum_none = rx_csum_none;
  908         s->rx_wqe_err = rx_wqe_err;
  909         s->rx_packets = rx_packets;
  910         s->rx_bytes = rx_bytes;
  911         s->rx_decrypted_error_packets = rx_decrypted_error;
  912         s->rx_decrypted_ok_packets = rx_decrypted_ok;
  913 
  914         mlx5e_grp_vnic_env_update_stats(priv);
  915 
  916         /* HW counters */
  917         memset(in, 0, sizeof(in));
  918 
  919         MLX5_SET(query_vport_counter_in, in, opcode,
  920             MLX5_CMD_OP_QUERY_VPORT_COUNTER);
  921         MLX5_SET(query_vport_counter_in, in, op_mod, 0);
  922         MLX5_SET(query_vport_counter_in, in, other_vport, 0);
  923 
  924         memset(out, 0, outlen);
  925 
  926         /* get number of out-of-buffer drops first */
  927         if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0 &&
  928             mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
  929             &rx_out_of_buffer) == 0) {
  930                 s->rx_out_of_buffer = rx_out_of_buffer;
  931         }
  932 
  933         /* get port statistics */
  934         if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen) == 0) {
  935 #define MLX5_GET_CTR(out, x) \
  936         MLX5_GET64(query_vport_counter_out, out, x)
  937 
  938                 s->rx_error_packets =
  939                     MLX5_GET_CTR(out, received_errors.packets);
  940                 s->rx_error_bytes =
  941                     MLX5_GET_CTR(out, received_errors.octets);
  942                 s->tx_error_packets =
  943                     MLX5_GET_CTR(out, transmit_errors.packets);
  944                 s->tx_error_bytes =
  945                     MLX5_GET_CTR(out, transmit_errors.octets);
  946 
  947                 s->rx_unicast_packets =
  948                     MLX5_GET_CTR(out, received_eth_unicast.packets);
  949                 s->rx_unicast_bytes =
  950                     MLX5_GET_CTR(out, received_eth_unicast.octets);
  951                 s->tx_unicast_packets =
  952                     MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
  953                 s->tx_unicast_bytes =
  954                     MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
  955 
  956                 s->rx_multicast_packets =
  957                     MLX5_GET_CTR(out, received_eth_multicast.packets);
  958                 s->rx_multicast_bytes =
  959                     MLX5_GET_CTR(out, received_eth_multicast.octets);
  960                 s->tx_multicast_packets =
  961                     MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
  962                 s->tx_multicast_bytes =
  963                     MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
  964 
  965                 s->rx_broadcast_packets =
  966                     MLX5_GET_CTR(out, received_eth_broadcast.packets);
  967                 s->rx_broadcast_bytes =
  968                     MLX5_GET_CTR(out, received_eth_broadcast.octets);
  969                 s->tx_broadcast_packets =
  970                     MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
  971                 s->tx_broadcast_bytes =
  972                     MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
  973 
  974                 s->tx_packets = s->tx_unicast_packets +
  975                     s->tx_multicast_packets + s->tx_broadcast_packets;
  976                 s->tx_bytes = s->tx_unicast_bytes + s->tx_multicast_bytes +
  977                     s->tx_broadcast_bytes;
  978 
  979                 /* Update calculated offload counters */
  980                 s->tx_csum_offload = s->tx_packets - tx_offload_none;
  981                 s->rx_csum_good = s->rx_packets - s->rx_csum_none;
  982         }
  983 
  984         /* Get physical port counters */
  985         mlx5e_update_pport_counters(priv);
  986 
  987         s->tx_jumbo_packets =
  988             priv->stats.port_stats_debug.tx_stat_p1519to2047octets +
  989             priv->stats.port_stats_debug.tx_stat_p2048to4095octets +
  990             priv->stats.port_stats_debug.tx_stat_p4096to8191octets +
  991             priv->stats.port_stats_debug.tx_stat_p8192to10239octets;
  992 
  993 free_out:
  994         kvfree(out);
  995 
  996         /* Update diagnostics, if any */
  997         if (priv->params_ethtool.diag_pci_enable ||
  998             priv->params_ethtool.diag_general_enable) {
  999                 error = mlx5_core_get_diagnostics_full(mdev,
 1000                     priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL,
 1001                     priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL);
 1002                 if (error != 0)
 1003                         mlx5_en_err(priv->ifp,
 1004                             "Failed reading diagnostics: %d\n", error);
 1005         }
 1006 
 1007         /* Update FEC, if any */
 1008         error = mlx5e_fec_update(priv);
 1009         if (error != 0 && error != EOPNOTSUPP) {
 1010                 mlx5_en_err(priv->ifp,
 1011                     "Updating FEC failed: %d\n", error);
 1012         }
 1013 
 1014         /* Update temperature, if any */
 1015         if (priv->params_ethtool.hw_num_temp != 0) {
 1016                 error = mlx5e_hw_temperature_update(priv);
 1017                 if (error != 0 && error != EOPNOTSUPP) {
 1018                         mlx5_en_err(priv->ifp,
 1019                             "Updating temperature failed: %d\n", error);
 1020                 }
 1021         }
 1022 }
 1023 
 1024 static void
 1025 mlx5e_update_stats_work(struct work_struct *work)
 1026 {
 1027         struct mlx5e_priv *priv;
 1028 
 1029         priv = container_of(work, struct mlx5e_priv, update_stats_work);
 1030         PRIV_LOCK(priv);
 1031         if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0 &&
 1032             !test_bit(MLX5_INTERFACE_STATE_TEARDOWN, &priv->mdev->intf_state))
 1033                 mlx5e_update_stats_locked(priv);
 1034         PRIV_UNLOCK(priv);
 1035 }
 1036 
 1037 static void
 1038 mlx5e_update_stats(void *arg)
 1039 {
 1040         struct mlx5e_priv *priv = arg;
 1041 
 1042         queue_work(priv->wq, &priv->update_stats_work);
 1043 
 1044         callout_reset(&priv->watchdog, hz / 4, &mlx5e_update_stats, priv);
 1045 }
 1046 
 1047 static void
 1048 mlx5e_async_event_sub(struct mlx5e_priv *priv,
 1049     enum mlx5_dev_event event)
 1050 {
 1051         switch (event) {
 1052         case MLX5_DEV_EVENT_PORT_UP:
 1053         case MLX5_DEV_EVENT_PORT_DOWN:
 1054                 queue_work(priv->wq, &priv->update_carrier_work);
 1055                 break;
 1056 
 1057         default:
 1058                 break;
 1059         }
 1060 }
 1061 
 1062 static void
 1063 mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
 1064     enum mlx5_dev_event event, unsigned long param)
 1065 {
 1066         struct mlx5e_priv *priv = vpriv;
 1067 
 1068         mtx_lock(&priv->async_events_mtx);
 1069         if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
 1070                 mlx5e_async_event_sub(priv, event);
 1071         mtx_unlock(&priv->async_events_mtx);
 1072 }
 1073 
 1074 static void
 1075 mlx5e_enable_async_events(struct mlx5e_priv *priv)
 1076 {
 1077         set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
 1078 }
 1079 
 1080 static void
 1081 mlx5e_disable_async_events(struct mlx5e_priv *priv)
 1082 {
 1083         mtx_lock(&priv->async_events_mtx);
 1084         clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
 1085         mtx_unlock(&priv->async_events_mtx);
 1086 }
 1087 
 1088 static void mlx5e_calibration_callout(void *arg);
 1089 static int mlx5e_calibration_duration = 20;
 1090 static int mlx5e_fast_calibration = 1;
 1091 static int mlx5e_normal_calibration = 30;
 1092 
 1093 static SYSCTL_NODE(_hw_mlx5, OID_AUTO, calibr, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
 1094     "MLX5 timestamp calibration parameters");
 1095 
 1096 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, duration, CTLFLAG_RWTUN,
 1097     &mlx5e_calibration_duration, 0,
 1098     "Duration of initial calibration");
 1099 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, fast, CTLFLAG_RWTUN,
 1100     &mlx5e_fast_calibration, 0,
 1101     "Recalibration interval during initial calibration");
 1102 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, normal, CTLFLAG_RWTUN,
 1103     &mlx5e_normal_calibration, 0,
 1104     "Recalibration interval during normal operations");
 1105 
 1106 /*
 1107  * Ignites the calibration process.
 1108  */
 1109 static void
 1110 mlx5e_reset_calibration_callout(struct mlx5e_priv *priv)
 1111 {
 1112 
 1113         if (priv->clbr_done == 0)
 1114                 mlx5e_calibration_callout(priv);
 1115         else
 1116                 callout_reset_sbt_curcpu(&priv->tstmp_clbr, (priv->clbr_done <
 1117                     mlx5e_calibration_duration ? mlx5e_fast_calibration :
 1118                     mlx5e_normal_calibration) * SBT_1S, 0,
 1119                     mlx5e_calibration_callout, priv, C_DIRECT_EXEC);
 1120 }
 1121 
 1122 static uint64_t
 1123 mlx5e_timespec2usec(const struct timespec *ts)
 1124 {
 1125 
 1126         return ((uint64_t)ts->tv_sec * 1000000000 + ts->tv_nsec);
 1127 }
 1128 
 1129 static uint64_t
 1130 mlx5e_hw_clock(struct mlx5e_priv *priv)
 1131 {
 1132         struct mlx5_init_seg *iseg;
 1133         uint32_t hw_h, hw_h1, hw_l;
 1134 
 1135         iseg = priv->mdev->iseg;
 1136         do {
 1137                 hw_h = ioread32be(&iseg->internal_timer_h);
 1138                 hw_l = ioread32be(&iseg->internal_timer_l);
 1139                 hw_h1 = ioread32be(&iseg->internal_timer_h);
 1140         } while (hw_h1 != hw_h);
 1141         return (((uint64_t)hw_h << 32) | hw_l);
 1142 }
 1143 
 1144 /*
 1145  * The calibration callout, it runs either in the context of the
 1146  * thread which enables calibration, or in callout.  It takes the
 1147  * snapshot of system and adapter clocks, then advances the pointers to
 1148  * the calibration point to allow rx path to read the consistent data
 1149  * lockless.
 1150  */
 1151 static void
 1152 mlx5e_calibration_callout(void *arg)
 1153 {
 1154         struct mlx5e_priv *priv;
 1155         struct mlx5e_clbr_point *next, *curr;
 1156         struct timespec ts;
 1157         int clbr_curr_next;
 1158 
 1159         priv = arg;
 1160         curr = &priv->clbr_points[priv->clbr_curr];
 1161         clbr_curr_next = priv->clbr_curr + 1;
 1162         if (clbr_curr_next >= nitems(priv->clbr_points))
 1163                 clbr_curr_next = 0;
 1164         next = &priv->clbr_points[clbr_curr_next];
 1165 
 1166         next->base_prev = curr->base_curr;
 1167         next->clbr_hw_prev = curr->clbr_hw_curr;
 1168 
 1169         next->clbr_hw_curr = mlx5e_hw_clock(priv);
 1170         if (((next->clbr_hw_curr - curr->clbr_hw_curr) >> MLX5E_TSTMP_PREC) ==
 1171             0) {
 1172                 if (priv->clbr_done != 0) {
 1173                         mlx5_en_err(priv->ifp,
 1174                             "HW failed tstmp frozen %#jx %#jx, disabling\n",
 1175                              next->clbr_hw_curr, curr->clbr_hw_prev);
 1176                         priv->clbr_done = 0;
 1177                 }
 1178                 atomic_store_rel_int(&curr->clbr_gen, 0);
 1179                 return;
 1180         }
 1181 
 1182         nanouptime(&ts);
 1183         next->base_curr = mlx5e_timespec2usec(&ts);
 1184 
 1185         curr->clbr_gen = 0;
 1186         atomic_thread_fence_rel();
 1187         priv->clbr_curr = clbr_curr_next;
 1188         atomic_store_rel_int(&next->clbr_gen, ++(priv->clbr_gen));
 1189 
 1190         if (priv->clbr_done < mlx5e_calibration_duration)
 1191                 priv->clbr_done++;
 1192         mlx5e_reset_calibration_callout(priv);
 1193 }
 1194 
 1195 static const char *mlx5e_rq_stats_desc[] = {
 1196         MLX5E_RQ_STATS(MLX5E_STATS_DESC)
 1197 };
 1198 
 1199 static int
 1200 mlx5e_create_rq(struct mlx5e_channel *c,
 1201     struct mlx5e_rq_param *param,
 1202     struct mlx5e_rq *rq)
 1203 {
 1204         struct mlx5e_priv *priv = c->priv;
 1205         struct mlx5_core_dev *mdev = priv->mdev;
 1206         char buffer[16];
 1207         void *rqc = param->rqc;
 1208         void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
 1209         int wq_sz;
 1210         int err;
 1211         int i;
 1212         u32 nsegs, wqe_sz;
 1213 
 1214         err = mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
 1215         if (err != 0)
 1216                 goto done;
 1217 
 1218         /* Create DMA descriptor TAG */
 1219         if ((err = -bus_dma_tag_create(
 1220             bus_get_dma_tag(mdev->pdev->dev.bsddev),
 1221             1,                          /* any alignment */
 1222             0,                          /* no boundary */
 1223             BUS_SPACE_MAXADDR,          /* lowaddr */
 1224             BUS_SPACE_MAXADDR,          /* highaddr */
 1225             NULL, NULL,                 /* filter, filterarg */
 1226             nsegs * MLX5E_MAX_RX_BYTES, /* maxsize */
 1227             nsegs,                      /* nsegments */
 1228             nsegs * MLX5E_MAX_RX_BYTES, /* maxsegsize */
 1229             0,                          /* flags */
 1230             NULL, NULL,                 /* lockfunc, lockfuncarg */
 1231             &rq->dma_tag)))
 1232                 goto done;
 1233 
 1234         err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
 1235             &rq->wq_ctrl);
 1236         if (err)
 1237                 goto err_free_dma_tag;
 1238 
 1239         rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
 1240 
 1241         err = mlx5e_get_wqe_sz(priv, &rq->wqe_sz, &rq->nsegs);
 1242         if (err != 0)
 1243                 goto err_rq_wq_destroy;
 1244 
 1245         wq_sz = mlx5_wq_ll_get_size(&rq->wq);
 1246 
 1247         err = -tcp_lro_init_args(&rq->lro, priv->ifp, TCP_LRO_ENTRIES, wq_sz);
 1248         if (err)
 1249                 goto err_rq_wq_destroy;
 1250 
 1251         rq->mbuf = malloc_domainset(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN,
 1252             mlx5_dev_domainset(mdev), M_WAITOK | M_ZERO);
 1253         for (i = 0; i != wq_sz; i++) {
 1254                 struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
 1255                 int j;
 1256 
 1257                 err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
 1258                 if (err != 0) {
 1259                         while (i--)
 1260                                 bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
 1261                         goto err_rq_mbuf_free;
 1262                 }
 1263 
 1264                 /* set value for constant fields */
 1265                 for (j = 0; j < rq->nsegs; j++)
 1266                         wqe->data[j].lkey = cpu_to_be32(priv->mr.key);
 1267         }
 1268 
 1269         INIT_WORK(&rq->dim.work, mlx5e_dim_work);
 1270         if (priv->params.rx_cq_moderation_mode < 2) {
 1271                 rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
 1272         } else {
 1273                 void *cqc = container_of(param,
 1274                     struct mlx5e_channel_param, rq)->rx_cq.cqc;
 1275 
 1276                 switch (MLX5_GET(cqc, cqc, cq_period_mode)) {
 1277                 case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
 1278                         rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 1279                         break;
 1280                 case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
 1281                         rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
 1282                         break;
 1283                 default:
 1284                         rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
 1285                         break;
 1286                 }
 1287         }
 1288 
 1289         rq->ifp = priv->ifp;
 1290         rq->channel = c;
 1291         rq->ix = c->ix;
 1292 
 1293         snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
 1294         mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 1295             buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
 1296             rq->stats.arg);
 1297         return (0);
 1298 
 1299 err_rq_mbuf_free:
 1300         free(rq->mbuf, M_MLX5EN);
 1301         tcp_lro_free(&rq->lro);
 1302 err_rq_wq_destroy:
 1303         mlx5_wq_destroy(&rq->wq_ctrl);
 1304 err_free_dma_tag:
 1305         bus_dma_tag_destroy(rq->dma_tag);
 1306 done:
 1307         return (err);
 1308 }
 1309 
 1310 static void
 1311 mlx5e_destroy_rq(struct mlx5e_rq *rq)
 1312 {
 1313         int wq_sz;
 1314         int i;
 1315 
 1316         /* destroy all sysctl nodes */
 1317         sysctl_ctx_free(&rq->stats.ctx);
 1318 
 1319         /* free leftover LRO packets, if any */
 1320         tcp_lro_free(&rq->lro);
 1321 
 1322         wq_sz = mlx5_wq_ll_get_size(&rq->wq);
 1323         for (i = 0; i != wq_sz; i++) {
 1324                 if (rq->mbuf[i].mbuf != NULL) {
 1325                         bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map);
 1326                         m_freem(rq->mbuf[i].mbuf);
 1327                 }
 1328                 bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
 1329         }
 1330         free(rq->mbuf, M_MLX5EN);
 1331         mlx5_wq_destroy(&rq->wq_ctrl);
 1332         bus_dma_tag_destroy(rq->dma_tag);
 1333 }
 1334 
 1335 static int
 1336 mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
 1337 {
 1338         struct mlx5e_channel *c = rq->channel;
 1339         struct mlx5e_priv *priv = c->priv;
 1340         struct mlx5_core_dev *mdev = priv->mdev;
 1341         void *in;
 1342         void *rqc;
 1343         void *wq;
 1344         int inlen;
 1345         int err;
 1346         u8 ts_format;
 1347 
 1348         inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
 1349             sizeof(u64) * rq->wq_ctrl.buf.npages;
 1350         in = mlx5_vzalloc(inlen);
 1351         if (in == NULL)
 1352                 return (-ENOMEM);
 1353 
 1354         ts_format = mlx5_get_rq_default_ts(mdev);
 1355         rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
 1356         wq = MLX5_ADDR_OF(rqc, rqc, wq);
 1357 
 1358         memcpy(rqc, param->rqc, sizeof(param->rqc));
 1359 
 1360         MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
 1361         MLX5_SET(rqc, rqc, ts_format, ts_format);
 1362         MLX5_SET(rqc, rqc, flush_in_error_en, 1);
 1363         if (priv->counter_set_id >= 0)
 1364                 MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
 1365         MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
 1366             MLX5_ADAPTER_PAGE_SHIFT);
 1367         MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
 1368 
 1369         mlx5_fill_page_array(&rq->wq_ctrl.buf,
 1370             (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
 1371 
 1372         err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
 1373 
 1374         kvfree(in);
 1375 
 1376         return (err);
 1377 }
 1378 
 1379 static int
 1380 mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
 1381 {
 1382         struct mlx5e_channel *c = rq->channel;
 1383         struct mlx5e_priv *priv = c->priv;
 1384         struct mlx5_core_dev *mdev = priv->mdev;
 1385 
 1386         void *in;
 1387         void *rqc;
 1388         int inlen;
 1389         int err;
 1390 
 1391         inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
 1392         in = mlx5_vzalloc(inlen);
 1393         if (in == NULL)
 1394                 return (-ENOMEM);
 1395 
 1396         rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
 1397 
 1398         MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
 1399         MLX5_SET(modify_rq_in, in, rq_state, curr_state);
 1400         MLX5_SET(rqc, rqc, state, next_state);
 1401 
 1402         err = mlx5_core_modify_rq(mdev, in, inlen);
 1403 
 1404         kvfree(in);
 1405 
 1406         return (err);
 1407 }
 1408 
 1409 static void
 1410 mlx5e_disable_rq(struct mlx5e_rq *rq)
 1411 {
 1412         struct mlx5e_channel *c = rq->channel;
 1413         struct mlx5e_priv *priv = c->priv;
 1414         struct mlx5_core_dev *mdev = priv->mdev;
 1415 
 1416         mlx5_core_destroy_rq(mdev, rq->rqn);
 1417 }
 1418 
 1419 static int
 1420 mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
 1421 {
 1422         struct mlx5e_channel *c = rq->channel;
 1423         struct mlx5e_priv *priv = c->priv;
 1424         struct mlx5_wq_ll *wq = &rq->wq;
 1425         int i;
 1426 
 1427         for (i = 0; i < 1000; i++) {
 1428                 if (wq->cur_sz >= priv->params.min_rx_wqes)
 1429                         return (0);
 1430 
 1431                 msleep(4);
 1432         }
 1433         return (-ETIMEDOUT);
 1434 }
 1435 
 1436 static int
 1437 mlx5e_open_rq(struct mlx5e_channel *c,
 1438     struct mlx5e_rq_param *param,
 1439     struct mlx5e_rq *rq)
 1440 {
 1441         int err;
 1442 
 1443         err = mlx5e_create_rq(c, param, rq);
 1444         if (err)
 1445                 return (err);
 1446 
 1447         /* set CQN in RQ parameters */
 1448         MLX5_SET(rqc, param->rqc, cqn, c->rq.cq.mcq.cqn);
 1449 
 1450         err = mlx5e_enable_rq(rq, param);
 1451         if (err)
 1452                 goto err_destroy_rq;
 1453 
 1454         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
 1455         if (err)
 1456                 goto err_disable_rq;
 1457 
 1458         c->rq.enabled = 1;
 1459 
 1460         return (0);
 1461 
 1462 err_disable_rq:
 1463         mlx5e_disable_rq(rq);
 1464 err_destroy_rq:
 1465         mlx5e_destroy_rq(rq);
 1466 
 1467         return (err);
 1468 }
 1469 
 1470 static void
 1471 mlx5e_close_rq(struct mlx5e_rq *rq)
 1472 {
 1473         mtx_lock(&rq->mtx);
 1474         rq->enabled = 0;
 1475         callout_stop(&rq->watchdog);
 1476         mtx_unlock(&rq->mtx);
 1477 
 1478         mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
 1479 }
 1480 
 1481 static void
 1482 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
 1483 {
 1484 
 1485         mlx5e_disable_rq(rq);
 1486         mlx5e_close_cq(&rq->cq);
 1487         cancel_work_sync(&rq->dim.work);
 1488         mlx5e_destroy_rq(rq);
 1489 }
 1490 
 1491 /*
 1492  * What is a drop RQ and why is it needed?
 1493  *
 1494  * The RSS indirection table, also called the RQT, selects the
 1495  * destination RQ based on the receive queue number, RQN. The RQT is
 1496  * frequently referred to by flow steering rules to distribute traffic
 1497  * among multiple RQs. The problem is that the RQs cannot be destroyed
 1498  * before the RQT referring them is destroyed too. Further, TLS RX
 1499  * rules may still be referring to the RQT even if the link went
 1500  * down. Because there is no magic RQN for dropping packets, we create
 1501  * a dummy RQ, also called drop RQ, which sole purpose is to drop all
 1502  * received packets. When the link goes down this RQN is filled in all
 1503  * RQT entries, of the main RQT, so the real RQs which are about to be
 1504  * destroyed can be released and the TLS RX rules can be sustained.
 1505  */
 1506 static void
 1507 mlx5e_open_drop_rq_comp(struct mlx5_core_cq *mcq __unused, struct mlx5_eqe *eqe __unused)
 1508 {
 1509 }
 1510 
 1511 static int
 1512 mlx5e_open_drop_rq(struct mlx5e_priv *priv,
 1513     struct mlx5e_rq *drop_rq)
 1514 {
 1515         struct mlx5e_cq_param param_cq = {};
 1516         struct mlx5e_rq_param param_rq = {};
 1517         void *rqc_wq = MLX5_ADDR_OF(rqc, param_rq.rqc, wq);
 1518         int err;
 1519 
 1520         /* set channel pointer */
 1521         drop_rq->channel = priv->channel;
 1522 
 1523         /* set basic CQ parameters needed */
 1524         MLX5_SET(cqc, param_cq.cqc, log_cq_size, 0);
 1525         MLX5_SET(cqc, param_cq.cqc, uar_page, priv->mdev->priv.uar->index);
 1526 
 1527         /* open receive completion queue */
 1528         err = mlx5e_open_cq(priv, &param_cq, &drop_rq->cq,
 1529             &mlx5e_open_drop_rq_comp, 0);
 1530         if (err)
 1531                 goto err_done;
 1532 
 1533         /* set basic WQ parameters needed */
 1534         MLX5_SET(wq, rqc_wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
 1535         MLX5_SET(wq, rqc_wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
 1536         MLX5_SET(wq, rqc_wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe) + sizeof(struct mlx5_wqe_data_seg)));
 1537         MLX5_SET(wq, rqc_wq, log_wq_sz, 0);
 1538         MLX5_SET(wq, rqc_wq, pd, priv->pdn);
 1539 
 1540         param_rq.wq.linear = 1;
 1541 
 1542         err = mlx5_wq_ll_create(priv->mdev, &param_rq.wq, rqc_wq, &drop_rq->wq,
 1543             &drop_rq->wq_ctrl);
 1544         if (err)
 1545                 goto err_close_cq;
 1546 
 1547         /* set CQN in RQ parameters */
 1548         MLX5_SET(rqc, param_rq.rqc, cqn, drop_rq->cq.mcq.cqn);
 1549 
 1550         err = mlx5e_enable_rq(drop_rq, &param_rq);
 1551         if (err)
 1552                 goto err_wq_destroy;
 1553 
 1554         err = mlx5e_modify_rq(drop_rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
 1555         if (err)
 1556                 goto err_disable_rq;
 1557 
 1558         return (err);
 1559 
 1560 err_disable_rq:
 1561         mlx5e_disable_rq(drop_rq);
 1562 err_wq_destroy:
 1563         mlx5_wq_destroy(&drop_rq->wq_ctrl);
 1564 err_close_cq:
 1565         mlx5e_close_cq(&drop_rq->cq);
 1566 err_done:
 1567         return (err);
 1568 }
 1569 
 1570 static void
 1571 mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
 1572 {
 1573         mlx5e_modify_rq(drop_rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
 1574         mlx5e_disable_rq(drop_rq);
 1575         mlx5_wq_destroy(&drop_rq->wq_ctrl);
 1576         mlx5e_close_cq(&drop_rq->cq);
 1577 }
 1578 
 1579 void
 1580 mlx5e_free_sq_db(struct mlx5e_sq *sq)
 1581 {
 1582         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 1583         int x;
 1584 
 1585         for (x = 0; x != wq_sz; x++) {
 1586                 if (sq->mbuf[x].mbuf != NULL) {
 1587                         bus_dmamap_unload(sq->dma_tag, sq->mbuf[x].dma_map);
 1588                         m_freem(sq->mbuf[x].mbuf);
 1589                 }
 1590                 if (sq->mbuf[x].mst != NULL) {
 1591                         m_snd_tag_rele(sq->mbuf[x].mst);
 1592                         sq->mbuf[x].mst = NULL;
 1593                 }
 1594                 bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
 1595         }
 1596         free(sq->mbuf, M_MLX5EN);
 1597 }
 1598 
 1599 int
 1600 mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
 1601 {
 1602         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 1603         int err;
 1604         int x;
 1605 
 1606         sq->mbuf = malloc_domainset(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN,
 1607             mlx5_dev_domainset(sq->priv->mdev), M_WAITOK | M_ZERO);
 1608 
 1609         /* Create DMA descriptor MAPs */
 1610         for (x = 0; x != wq_sz; x++) {
 1611                 err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
 1612                 if (err != 0) {
 1613                         while (x--)
 1614                                 bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
 1615                         free(sq->mbuf, M_MLX5EN);
 1616                         return (err);
 1617                 }
 1618         }
 1619         return (0);
 1620 }
 1621 
 1622 static const char *mlx5e_sq_stats_desc[] = {
 1623         MLX5E_SQ_STATS(MLX5E_STATS_DESC)
 1624 };
 1625 
 1626 void
 1627 mlx5e_update_sq_inline(struct mlx5e_sq *sq)
 1628 {
 1629         sq->max_inline = sq->priv->params.tx_max_inline;
 1630         sq->min_inline_mode = sq->priv->params.tx_min_inline_mode;
 1631 
 1632         /*
 1633          * Check if trust state is DSCP or if inline mode is NONE which
 1634          * indicates CX-5 or newer hardware.
 1635          */
 1636         if (sq->priv->params_ethtool.trust_state != MLX5_QPTS_TRUST_PCP ||
 1637             sq->min_inline_mode == MLX5_INLINE_MODE_NONE) {
 1638                 if (MLX5_CAP_ETH(sq->priv->mdev, wqe_vlan_insert))
 1639                         sq->min_insert_caps = MLX5E_INSERT_VLAN | MLX5E_INSERT_NON_VLAN;
 1640                 else
 1641                         sq->min_insert_caps = MLX5E_INSERT_NON_VLAN;
 1642         } else {
 1643                 sq->min_insert_caps = 0;
 1644         }
 1645 }
 1646 
 1647 static void
 1648 mlx5e_refresh_sq_inline_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
 1649 {
 1650         int i;
 1651 
 1652         for (i = 0; i != priv->num_tc; i++) {
 1653                 mtx_lock(&c->sq[i].lock);
 1654                 mlx5e_update_sq_inline(&c->sq[i]);
 1655                 mtx_unlock(&c->sq[i].lock);
 1656         }
 1657 }
 1658 
 1659 void
 1660 mlx5e_refresh_sq_inline(struct mlx5e_priv *priv)
 1661 {
 1662         int i;
 1663 
 1664         /* check if channels are closed */
 1665         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
 1666                 return;
 1667 
 1668         for (i = 0; i < priv->params.num_channels; i++)
 1669                 mlx5e_refresh_sq_inline_sub(priv, &priv->channel[i]);
 1670 }
 1671 
 1672 static int
 1673 mlx5e_create_sq(struct mlx5e_channel *c,
 1674     int tc,
 1675     struct mlx5e_sq_param *param,
 1676     struct mlx5e_sq *sq)
 1677 {
 1678         struct mlx5e_priv *priv = c->priv;
 1679         struct mlx5_core_dev *mdev = priv->mdev;
 1680         char buffer[16];
 1681         void *sqc = param->sqc;
 1682         void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
 1683         int err;
 1684 
 1685         /* Create DMA descriptor TAG */
 1686         if ((err = -bus_dma_tag_create(
 1687             bus_get_dma_tag(mdev->pdev->dev.bsddev),
 1688             1,                          /* any alignment */
 1689             0,                          /* no boundary */
 1690             BUS_SPACE_MAXADDR,          /* lowaddr */
 1691             BUS_SPACE_MAXADDR,          /* highaddr */
 1692             NULL, NULL,                 /* filter, filterarg */
 1693             MLX5E_MAX_TX_PAYLOAD_SIZE,  /* maxsize */
 1694             MLX5E_MAX_TX_MBUF_FRAGS,    /* nsegments */
 1695             MLX5E_MAX_TX_MBUF_SIZE,     /* maxsegsize */
 1696             0,                          /* flags */
 1697             NULL, NULL,                 /* lockfunc, lockfuncarg */
 1698             &sq->dma_tag)))
 1699                 goto done;
 1700 
 1701         sq->mkey_be = cpu_to_be32(priv->mr.key);
 1702         sq->ifp = priv->ifp;
 1703         sq->priv = priv;
 1704         sq->tc = tc;
 1705 
 1706         err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
 1707             &sq->wq_ctrl);
 1708         if (err)
 1709                 goto err_free_dma_tag;
 1710 
 1711         sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
 1712 
 1713         err = mlx5e_alloc_sq_db(sq);
 1714         if (err)
 1715                 goto err_sq_wq_destroy;
 1716 
 1717         mlx5e_update_sq_inline(sq);
 1718 
 1719         snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
 1720         mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 1721             buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
 1722             sq->stats.arg);
 1723 
 1724         return (0);
 1725 
 1726 err_sq_wq_destroy:
 1727         mlx5_wq_destroy(&sq->wq_ctrl);
 1728 
 1729 err_free_dma_tag:
 1730         bus_dma_tag_destroy(sq->dma_tag);
 1731 done:
 1732         return (err);
 1733 }
 1734 
 1735 static void
 1736 mlx5e_destroy_sq(struct mlx5e_sq *sq)
 1737 {
 1738         /* destroy all sysctl nodes */
 1739         sysctl_ctx_free(&sq->stats.ctx);
 1740 
 1741         mlx5e_free_sq_db(sq);
 1742         mlx5_wq_destroy(&sq->wq_ctrl);
 1743         bus_dma_tag_destroy(sq->dma_tag);
 1744 }
 1745 
 1746 int
 1747 mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param,
 1748     const struct mlx5_sq_bfreg *bfreg, int tis_num)
 1749 {
 1750         void *in;
 1751         void *sqc;
 1752         void *wq;
 1753         int inlen;
 1754         int err;
 1755         u8 ts_format;
 1756 
 1757         inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
 1758             sizeof(u64) * sq->wq_ctrl.buf.npages;
 1759         in = mlx5_vzalloc(inlen);
 1760         if (in == NULL)
 1761                 return (-ENOMEM);
 1762 
 1763         sq->uar_map = bfreg->map;
 1764 
 1765         ts_format = mlx5_get_sq_default_ts(sq->priv->mdev);
 1766         sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
 1767         wq = MLX5_ADDR_OF(sqc, sqc, wq);
 1768 
 1769         memcpy(sqc, param->sqc, sizeof(param->sqc));
 1770 
 1771         MLX5_SET(sqc, sqc, tis_num_0, tis_num);
 1772         MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
 1773         MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
 1774         MLX5_SET(sqc, sqc, ts_format, ts_format);
 1775         MLX5_SET(sqc, sqc, tis_lst_sz, 1);
 1776         MLX5_SET(sqc, sqc, flush_in_error_en, 1);
 1777         MLX5_SET(sqc, sqc, allow_swp, 1);
 1778 
 1779         MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
 1780         MLX5_SET(wq, wq, uar_page, bfreg->index);
 1781         MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
 1782             MLX5_ADAPTER_PAGE_SHIFT);
 1783         MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
 1784 
 1785         mlx5_fill_page_array(&sq->wq_ctrl.buf,
 1786             (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
 1787 
 1788         err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn);
 1789 
 1790         kvfree(in);
 1791 
 1792         return (err);
 1793 }
 1794 
 1795 int
 1796 mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
 1797 {
 1798         void *in;
 1799         void *sqc;
 1800         int inlen;
 1801         int err;
 1802 
 1803         inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
 1804         in = mlx5_vzalloc(inlen);
 1805         if (in == NULL)
 1806                 return (-ENOMEM);
 1807 
 1808         sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
 1809 
 1810         MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
 1811         MLX5_SET(modify_sq_in, in, sq_state, curr_state);
 1812         MLX5_SET(sqc, sqc, state, next_state);
 1813 
 1814         err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen);
 1815 
 1816         kvfree(in);
 1817 
 1818         return (err);
 1819 }
 1820 
 1821 void
 1822 mlx5e_disable_sq(struct mlx5e_sq *sq)
 1823 {
 1824 
 1825         mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn);
 1826 }
 1827 
 1828 static int
 1829 mlx5e_open_sq(struct mlx5e_channel *c,
 1830     int tc,
 1831     struct mlx5e_sq_param *param,
 1832     struct mlx5e_sq *sq)
 1833 {
 1834         int err;
 1835 
 1836         sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
 1837 
 1838         /* ensure the TX completion event factor is not zero */
 1839         if (sq->cev_factor == 0)
 1840                 sq->cev_factor = 1;
 1841 
 1842         err = mlx5e_create_sq(c, tc, param, sq);
 1843         if (err)
 1844                 return (err);
 1845 
 1846         err = mlx5e_enable_sq(sq, param, &c->bfreg, c->priv->tisn[tc]);
 1847         if (err)
 1848                 goto err_destroy_sq;
 1849 
 1850         err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
 1851         if (err)
 1852                 goto err_disable_sq;
 1853 
 1854         WRITE_ONCE(sq->running, 1);
 1855 
 1856         return (0);
 1857 
 1858 err_disable_sq:
 1859         mlx5e_disable_sq(sq);
 1860 err_destroy_sq:
 1861         mlx5e_destroy_sq(sq);
 1862 
 1863         return (err);
 1864 }
 1865 
 1866 static void
 1867 mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
 1868 {
 1869         /* fill up remainder with NOPs */
 1870         while (sq->cev_counter != 0) {
 1871                 while (!mlx5e_sq_has_room_for(sq, 1)) {
 1872                         if (can_sleep != 0) {
 1873                                 mtx_unlock(&sq->lock);
 1874                                 msleep(4);
 1875                                 mtx_lock(&sq->lock);
 1876                         } else {
 1877                                 goto done;
 1878                         }
 1879                 }
 1880                 /* send a single NOP */
 1881                 mlx5e_send_nop(sq, 1);
 1882                 atomic_thread_fence_rel();
 1883         }
 1884 done:
 1885         mlx5e_tx_notify_hw(sq, false);
 1886 }
 1887 
 1888 void
 1889 mlx5e_sq_cev_timeout(void *arg)
 1890 {
 1891         struct mlx5e_sq *sq = arg;
 1892 
 1893         mtx_assert(&sq->lock, MA_OWNED);
 1894 
 1895         /* check next state */
 1896         switch (sq->cev_next_state) {
 1897         case MLX5E_CEV_STATE_SEND_NOPS:
 1898                 /* fill TX ring with NOPs, if any */
 1899                 mlx5e_sq_send_nops_locked(sq, 0);
 1900 
 1901                 /* check if completed */
 1902                 if (sq->cev_counter == 0) {
 1903                         sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
 1904                         return;
 1905                 }
 1906                 break;
 1907         default:
 1908                 /* send NOPs on next timeout */
 1909                 sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
 1910                 break;
 1911         }
 1912 
 1913         /* restart timer */
 1914         callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
 1915 }
 1916 
 1917 void
 1918 mlx5e_drain_sq(struct mlx5e_sq *sq)
 1919 {
 1920         int error;
 1921         struct mlx5_core_dev *mdev= sq->priv->mdev;
 1922 
 1923         /*
 1924          * Check if already stopped.
 1925          *
 1926          * NOTE: Serialization of this function is managed by the
 1927          * caller ensuring the priv's state lock is locked or in case
 1928          * of rate limit support, a single thread manages drain and
 1929          * resume of SQs. The "running" variable can therefore safely
 1930          * be read without any locks.
 1931          */
 1932         if (READ_ONCE(sq->running) == 0)
 1933                 return;
 1934 
 1935         /* don't put more packets into the SQ */
 1936         WRITE_ONCE(sq->running, 0);
 1937 
 1938         /* serialize access to DMA rings */
 1939         mtx_lock(&sq->lock);
 1940 
 1941         /* teardown event factor timer, if any */
 1942         sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
 1943         callout_stop(&sq->cev_callout);
 1944 
 1945         /* send dummy NOPs in order to flush the transmit ring */
 1946         mlx5e_sq_send_nops_locked(sq, 1);
 1947         mtx_unlock(&sq->lock);
 1948 
 1949         /* wait till SQ is empty or link is down */
 1950         mtx_lock(&sq->lock);
 1951         while (sq->cc != sq->pc &&
 1952             (sq->priv->media_status_last & IFM_ACTIVE) != 0 &&
 1953             mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR &&
 1954             pci_channel_offline(mdev->pdev) == 0) {
 1955                 mtx_unlock(&sq->lock);
 1956                 msleep(1);
 1957                 sq->cq.mcq.comp(&sq->cq.mcq, NULL);
 1958                 mtx_lock(&sq->lock);
 1959         }
 1960         mtx_unlock(&sq->lock);
 1961 
 1962         /* error out remaining requests */
 1963         error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
 1964         if (error != 0) {
 1965                 mlx5_en_err(sq->ifp,
 1966                     "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error);
 1967         }
 1968 
 1969         /* wait till SQ is empty */
 1970         mtx_lock(&sq->lock);
 1971         while (sq->cc != sq->pc &&
 1972                mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR &&
 1973                pci_channel_offline(mdev->pdev) == 0) {
 1974                 mtx_unlock(&sq->lock);
 1975                 msleep(1);
 1976                 sq->cq.mcq.comp(&sq->cq.mcq, NULL);
 1977                 mtx_lock(&sq->lock);
 1978         }
 1979         mtx_unlock(&sq->lock);
 1980 }
 1981 
 1982 static void
 1983 mlx5e_close_sq_wait(struct mlx5e_sq *sq)
 1984 {
 1985 
 1986         mlx5e_drain_sq(sq);
 1987         mlx5e_disable_sq(sq);
 1988         mlx5e_destroy_sq(sq);
 1989 }
 1990 
 1991 static int
 1992 mlx5e_create_cq(struct mlx5e_priv *priv,
 1993     struct mlx5e_cq_param *param,
 1994     struct mlx5e_cq *cq,
 1995     mlx5e_cq_comp_t *comp,
 1996     int eq_ix)
 1997 {
 1998         struct mlx5_core_dev *mdev = priv->mdev;
 1999         struct mlx5_core_cq *mcq = &cq->mcq;
 2000         int eqn_not_used;
 2001         int irqn;
 2002         int err;
 2003         u32 i;
 2004 
 2005         err = mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn);
 2006         if (err)
 2007                 return (err);
 2008 
 2009         err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
 2010             &cq->wq_ctrl);
 2011         if (err)
 2012                 return (err);
 2013 
 2014         mcq->cqe_sz = 64;
 2015         mcq->set_ci_db = cq->wq_ctrl.db.db;
 2016         mcq->arm_db = cq->wq_ctrl.db.db + 1;
 2017         *mcq->set_ci_db = 0;
 2018         *mcq->arm_db = 0;
 2019         mcq->vector = eq_ix;
 2020         mcq->comp = comp;
 2021         mcq->event = mlx5e_cq_error_event;
 2022         mcq->irqn = irqn;
 2023 
 2024         for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
 2025                 struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
 2026 
 2027                 cqe->op_own = 0xf1;
 2028         }
 2029 
 2030         cq->priv = priv;
 2031 
 2032         return (0);
 2033 }
 2034 
 2035 static void
 2036 mlx5e_destroy_cq(struct mlx5e_cq *cq)
 2037 {
 2038         mlx5_wq_destroy(&cq->wq_ctrl);
 2039 }
 2040 
 2041 static int
 2042 mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix)
 2043 {
 2044         struct mlx5_core_cq *mcq = &cq->mcq;
 2045         u32 out[MLX5_ST_SZ_DW(create_cq_out)];
 2046         void *in;
 2047         void *cqc;
 2048         int inlen;
 2049         int irqn_not_used;
 2050         int eqn;
 2051         int err;
 2052 
 2053         inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
 2054             sizeof(u64) * cq->wq_ctrl.buf.npages;
 2055         in = mlx5_vzalloc(inlen);
 2056         if (in == NULL)
 2057                 return (-ENOMEM);
 2058 
 2059         cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
 2060 
 2061         memcpy(cqc, param->cqc, sizeof(param->cqc));
 2062 
 2063         mlx5_fill_page_array(&cq->wq_ctrl.buf,
 2064             (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
 2065 
 2066         mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used);
 2067 
 2068         MLX5_SET(cqc, cqc, c_eqn, eqn);
 2069         MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
 2070             MLX5_ADAPTER_PAGE_SHIFT);
 2071         MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
 2072 
 2073         err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen, out, sizeof(out));
 2074 
 2075         kvfree(in);
 2076 
 2077         if (err)
 2078                 return (err);
 2079 
 2080         mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock));
 2081 
 2082         return (0);
 2083 }
 2084 
 2085 static void
 2086 mlx5e_disable_cq(struct mlx5e_cq *cq)
 2087 {
 2088 
 2089         mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq);
 2090 }
 2091 
 2092 int
 2093 mlx5e_open_cq(struct mlx5e_priv *priv,
 2094     struct mlx5e_cq_param *param,
 2095     struct mlx5e_cq *cq,
 2096     mlx5e_cq_comp_t *comp,
 2097     int eq_ix)
 2098 {
 2099         int err;
 2100 
 2101         err = mlx5e_create_cq(priv, param, cq, comp, eq_ix);
 2102         if (err)
 2103                 return (err);
 2104 
 2105         err = mlx5e_enable_cq(cq, param, eq_ix);
 2106         if (err)
 2107                 goto err_destroy_cq;
 2108 
 2109         return (0);
 2110 
 2111 err_destroy_cq:
 2112         mlx5e_destroy_cq(cq);
 2113 
 2114         return (err);
 2115 }
 2116 
 2117 void
 2118 mlx5e_close_cq(struct mlx5e_cq *cq)
 2119 {
 2120         mlx5e_disable_cq(cq);
 2121         mlx5e_destroy_cq(cq);
 2122 }
 2123 
 2124 static int
 2125 mlx5e_open_tx_cqs(struct mlx5e_channel *c,
 2126     struct mlx5e_channel_param *cparam)
 2127 {
 2128         int err;
 2129         int tc;
 2130 
 2131         for (tc = 0; tc < c->priv->num_tc; tc++) {
 2132                 /* open completion queue */
 2133                 err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq,
 2134                     &mlx5e_tx_cq_comp, c->ix);
 2135                 if (err)
 2136                         goto err_close_tx_cqs;
 2137         }
 2138         return (0);
 2139 
 2140 err_close_tx_cqs:
 2141         for (tc--; tc >= 0; tc--)
 2142                 mlx5e_close_cq(&c->sq[tc].cq);
 2143 
 2144         return (err);
 2145 }
 2146 
 2147 static void
 2148 mlx5e_close_tx_cqs(struct mlx5e_channel *c)
 2149 {
 2150         int tc;
 2151 
 2152         for (tc = 0; tc < c->priv->num_tc; tc++)
 2153                 mlx5e_close_cq(&c->sq[tc].cq);
 2154 }
 2155 
 2156 static int
 2157 mlx5e_open_sqs(struct mlx5e_channel *c,
 2158     struct mlx5e_channel_param *cparam)
 2159 {
 2160         int err;
 2161         int tc;
 2162 
 2163         for (tc = 0; tc < c->priv->num_tc; tc++) {
 2164                 err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
 2165                 if (err)
 2166                         goto err_close_sqs;
 2167         }
 2168 
 2169         return (0);
 2170 
 2171 err_close_sqs:
 2172         for (tc--; tc >= 0; tc--)
 2173                 mlx5e_close_sq_wait(&c->sq[tc]);
 2174 
 2175         return (err);
 2176 }
 2177 
 2178 static void
 2179 mlx5e_close_sqs_wait(struct mlx5e_channel *c)
 2180 {
 2181         int tc;
 2182 
 2183         for (tc = 0; tc < c->priv->num_tc; tc++)
 2184                 mlx5e_close_sq_wait(&c->sq[tc]);
 2185 }
 2186 
 2187 static void
 2188 mlx5e_chan_static_init(struct mlx5e_priv *priv, struct mlx5e_channel *c, int ix)
 2189 {
 2190         int tc;
 2191 
 2192         /* setup priv and channel number */
 2193         c->priv = priv;
 2194         c->ix = ix;
 2195 
 2196         /* setup send tag */
 2197         m_snd_tag_init(&c->tag, c->priv->ifp, &mlx5e_ul_snd_tag_sw);
 2198 
 2199         init_completion(&c->completion);
 2200 
 2201         mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
 2202 
 2203         callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0);
 2204 
 2205         for (tc = 0; tc != MLX5E_MAX_TX_NUM_TC; tc++) {
 2206                 struct mlx5e_sq *sq = c->sq + tc;
 2207 
 2208                 mtx_init(&sq->lock, "mlx5tx",
 2209                     MTX_NETWORK_LOCK " TX", MTX_DEF);
 2210                 mtx_init(&sq->comp_lock, "mlx5comp",
 2211                     MTX_NETWORK_LOCK " TX", MTX_DEF);
 2212 
 2213                 callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
 2214         }
 2215 
 2216         mlx5e_iq_static_init(&c->iq);
 2217 }
 2218 
 2219 static void
 2220 mlx5e_chan_wait_for_completion(struct mlx5e_channel *c)
 2221 {
 2222 
 2223         m_snd_tag_rele(&c->tag);
 2224         wait_for_completion(&c->completion);
 2225 }
 2226 
 2227 static void
 2228 mlx5e_priv_wait_for_completion(struct mlx5e_priv *priv, const uint32_t channels)
 2229 {
 2230         uint32_t x;
 2231 
 2232         for (x = 0; x != channels; x++)
 2233                 mlx5e_chan_wait_for_completion(&priv->channel[x]);
 2234 }
 2235 
 2236 static void
 2237 mlx5e_chan_static_destroy(struct mlx5e_channel *c)
 2238 {
 2239         int tc;
 2240 
 2241         callout_drain(&c->rq.watchdog);
 2242 
 2243         mtx_destroy(&c->rq.mtx);
 2244 
 2245         for (tc = 0; tc != MLX5E_MAX_TX_NUM_TC; tc++) {
 2246                 callout_drain(&c->sq[tc].cev_callout);
 2247                 mtx_destroy(&c->sq[tc].lock);
 2248                 mtx_destroy(&c->sq[tc].comp_lock);
 2249         }
 2250 
 2251         mlx5e_iq_static_destroy(&c->iq);
 2252 }
 2253 
 2254 static int
 2255 mlx5e_open_channel(struct mlx5e_priv *priv,
 2256     struct mlx5e_channel_param *cparam,
 2257     struct mlx5e_channel *c)
 2258 {
 2259         struct epoch_tracker et;
 2260         int i, err;
 2261 
 2262         /* zero non-persistent data */
 2263         MLX5E_ZERO(&c->rq, mlx5e_rq_zero_start);
 2264         for (i = 0; i != priv->num_tc; i++)
 2265                 MLX5E_ZERO(&c->sq[i], mlx5e_sq_zero_start);
 2266         MLX5E_ZERO(&c->iq, mlx5e_iq_zero_start);
 2267 
 2268         /* open transmit completion queue */
 2269         err = mlx5e_open_tx_cqs(c, cparam);
 2270         if (err)
 2271                 goto err_free;
 2272 
 2273         /* open receive completion queue */
 2274         err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq,
 2275             &mlx5e_rx_cq_comp, c->ix);
 2276         if (err)
 2277                 goto err_close_tx_cqs;
 2278 
 2279         err = mlx5e_open_sqs(c, cparam);
 2280         if (err)
 2281                 goto err_close_rx_cq;
 2282 
 2283         err = mlx5e_iq_open(c, &cparam->sq, &cparam->tx_cq, &c->iq);
 2284         if (err)
 2285                 goto err_close_sqs;
 2286 
 2287         err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
 2288         if (err)
 2289                 goto err_close_iq;
 2290 
 2291         /* poll receive queue initially */
 2292         NET_EPOCH_ENTER(et);
 2293         c->rq.cq.mcq.comp(&c->rq.cq.mcq, NULL);
 2294         NET_EPOCH_EXIT(et);
 2295 
 2296         return (0);
 2297 
 2298 err_close_iq:
 2299         mlx5e_iq_close(&c->iq);
 2300 
 2301 err_close_sqs:
 2302         mlx5e_close_sqs_wait(c);
 2303 
 2304 err_close_rx_cq:
 2305         mlx5e_close_cq(&c->rq.cq);
 2306 
 2307 err_close_tx_cqs:
 2308         mlx5e_close_tx_cqs(c);
 2309 
 2310 err_free:
 2311         return (err);
 2312 }
 2313 
 2314 static void
 2315 mlx5e_close_channel(struct mlx5e_channel *c)
 2316 {
 2317         mlx5e_close_rq(&c->rq);
 2318 }
 2319 
 2320 static void
 2321 mlx5e_close_channel_wait(struct mlx5e_channel *c)
 2322 {
 2323         mlx5e_close_rq_wait(&c->rq);
 2324         mlx5e_iq_close(&c->iq);
 2325         mlx5e_close_sqs_wait(c);
 2326         mlx5e_close_tx_cqs(c);
 2327 }
 2328 
 2329 static int
 2330 mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs)
 2331 {
 2332         u32 r, n;
 2333 
 2334         r = priv->params.hw_lro_en ? priv->params.lro_wqe_sz :
 2335             MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
 2336         if (r > MJUM16BYTES)
 2337                 return (-ENOMEM);
 2338 
 2339         if (r > MJUM9BYTES)
 2340                 r = MJUM16BYTES;
 2341         else if (r > MJUMPAGESIZE)
 2342                 r = MJUM9BYTES;
 2343         else if (r > MCLBYTES)
 2344                 r = MJUMPAGESIZE;
 2345         else
 2346                 r = MCLBYTES;
 2347 
 2348         /*
 2349          * n + 1 must be a power of two, because stride size must be.
 2350          * Stride size is 16 * (n + 1), as the first segment is
 2351          * control.
 2352          */
 2353         for (n = howmany(r, MLX5E_MAX_RX_BYTES); !powerof2(n + 1); n++)
 2354                 ;
 2355 
 2356         if (n > MLX5E_MAX_BUSDMA_RX_SEGS)
 2357                 return (-ENOMEM);
 2358 
 2359         *wqe_sz = r;
 2360         *nsegs = n;
 2361         return (0);
 2362 }
 2363 
 2364 static void
 2365 mlx5e_build_rq_param(struct mlx5e_priv *priv,
 2366     struct mlx5e_rq_param *param)
 2367 {
 2368         void *rqc = param->rqc;
 2369         void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
 2370         u32 wqe_sz, nsegs;
 2371 
 2372         mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
 2373         MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
 2374         MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
 2375         MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe) +
 2376             nsegs * sizeof(struct mlx5_wqe_data_seg)));
 2377         MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
 2378         MLX5_SET(wq, wq, pd, priv->pdn);
 2379 
 2380         param->wq.linear = 1;
 2381 }
 2382 
 2383 static void
 2384 mlx5e_build_sq_param(struct mlx5e_priv *priv,
 2385     struct mlx5e_sq_param *param)
 2386 {
 2387         void *sqc = param->sqc;
 2388         void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 2389 
 2390         MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
 2391         MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
 2392         MLX5_SET(wq, wq, pd, priv->pdn);
 2393 
 2394         param->wq.linear = 1;
 2395 }
 2396 
 2397 static void
 2398 mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
 2399     struct mlx5e_cq_param *param)
 2400 {
 2401         void *cqc = param->cqc;
 2402 
 2403         MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index);
 2404 }
 2405 
 2406 static void
 2407 mlx5e_get_default_profile(struct mlx5e_priv *priv, int mode, struct net_dim_cq_moder *ptr)
 2408 {
 2409 
 2410         *ptr = net_dim_get_profile(mode, MLX5E_DIM_DEFAULT_PROFILE);
 2411 
 2412         /* apply LRO restrictions */
 2413         if (priv->params.hw_lro_en &&
 2414             ptr->pkts > MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO) {
 2415                 ptr->pkts = MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO;
 2416         }
 2417 }
 2418 
 2419 static void
 2420 mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
 2421     struct mlx5e_cq_param *param)
 2422 {
 2423         struct net_dim_cq_moder curr;
 2424         void *cqc = param->cqc;
 2425 
 2426         /*
 2427          * We use MLX5_CQE_FORMAT_HASH because the RX hash mini CQE
 2428          * format is more beneficial for FreeBSD use case.
 2429          *
 2430          * Adding support for MLX5_CQE_FORMAT_CSUM will require changes
 2431          * in mlx5e_decompress_cqe.
 2432          */
 2433         if (priv->params.cqe_zipping_en) {
 2434                 MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_HASH);
 2435                 MLX5_SET(cqc, cqc, cqe_compression_en, 1);
 2436         }
 2437 
 2438         MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
 2439 
 2440         switch (priv->params.rx_cq_moderation_mode) {
 2441         case 0:
 2442                 MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
 2443                 MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
 2444                 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 2445                 break;
 2446         case 1:
 2447                 MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
 2448                 MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
 2449                 if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
 2450                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
 2451                 else
 2452                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 2453                 break;
 2454         case 2:
 2455                 mlx5e_get_default_profile(priv, NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE, &curr);
 2456                 MLX5_SET(cqc, cqc, cq_period, curr.usec);
 2457                 MLX5_SET(cqc, cqc, cq_max_count, curr.pkts);
 2458                 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 2459                 break;
 2460         case 3:
 2461                 mlx5e_get_default_profile(priv, NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE, &curr);
 2462                 MLX5_SET(cqc, cqc, cq_period, curr.usec);
 2463                 MLX5_SET(cqc, cqc, cq_max_count, curr.pkts);
 2464                 if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
 2465                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
 2466                 else
 2467                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 2468                 break;
 2469         default:
 2470                 break;
 2471         }
 2472 
 2473         mlx5e_dim_build_cq_param(priv, param);
 2474 
 2475         mlx5e_build_common_cq_param(priv, param);
 2476 }
 2477 
 2478 static void
 2479 mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
 2480     struct mlx5e_cq_param *param)
 2481 {
 2482         void *cqc = param->cqc;
 2483 
 2484         MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
 2485         MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
 2486         MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
 2487 
 2488         switch (priv->params.tx_cq_moderation_mode) {
 2489         case 0:
 2490                 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 2491                 break;
 2492         default:
 2493                 if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
 2494                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
 2495                 else
 2496                         MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 2497                 break;
 2498         }
 2499 
 2500         mlx5e_build_common_cq_param(priv, param);
 2501 }
 2502 
 2503 static void
 2504 mlx5e_build_channel_param(struct mlx5e_priv *priv,
 2505     struct mlx5e_channel_param *cparam)
 2506 {
 2507         memset(cparam, 0, sizeof(*cparam));
 2508 
 2509         mlx5e_build_rq_param(priv, &cparam->rq);
 2510         mlx5e_build_sq_param(priv, &cparam->sq);
 2511         mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
 2512         mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
 2513 }
 2514 
 2515 static int
 2516 mlx5e_open_channels(struct mlx5e_priv *priv)
 2517 {
 2518         struct mlx5e_channel_param *cparam;
 2519         int err;
 2520         int i;
 2521         int j;
 2522 
 2523         cparam = malloc(sizeof(*cparam), M_MLX5EN, M_WAITOK);
 2524 
 2525         mlx5e_build_channel_param(priv, cparam);
 2526         for (i = 0; i < priv->params.num_channels; i++) {
 2527                 err = mlx5e_open_channel(priv, cparam, &priv->channel[i]);
 2528                 if (err)
 2529                         goto err_close_channels;
 2530 
 2531                 /* Bind interrupt vectors, if any. */
 2532                 if (priv->params_ethtool.irq_cpu_base > -1) {
 2533                         cpuset_t cpuset;
 2534                         int cpu;
 2535                         int irq;
 2536                         int eqn;
 2537                         int nirq;
 2538 
 2539                         err = mlx5_vector2eqn(priv->mdev, i,
 2540                             &eqn, &nirq);
 2541 
 2542                         /* error here is non-fatal */
 2543                         if (err != 0)
 2544                                 continue;
 2545 
 2546                         irq = priv->mdev->priv.msix_arr[nirq].vector;
 2547                         cpu = (unsigned)(priv->params_ethtool.irq_cpu_base +
 2548                             i * priv->params_ethtool.irq_cpu_stride) % (unsigned)mp_ncpus;
 2549 
 2550                         CPU_ZERO(&cpuset);
 2551                         CPU_SET(cpu, &cpuset);
 2552                         intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &cpuset);
 2553                 }
 2554         }
 2555 
 2556         for (j = 0; j < priv->params.num_channels; j++) {
 2557                 err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j].rq);
 2558                 if (err)
 2559                         goto err_close_channels;
 2560         }
 2561         free(cparam, M_MLX5EN);
 2562         return (0);
 2563 
 2564 err_close_channels:
 2565         while (i--) {
 2566                 mlx5e_close_channel(&priv->channel[i]);
 2567                 mlx5e_close_channel_wait(&priv->channel[i]);
 2568         }
 2569         free(cparam, M_MLX5EN);
 2570         return (err);
 2571 }
 2572 
 2573 static void
 2574 mlx5e_close_channels(struct mlx5e_priv *priv)
 2575 {
 2576         int i;
 2577 
 2578         for (i = 0; i < priv->params.num_channels; i++)
 2579                 mlx5e_close_channel(&priv->channel[i]);
 2580         for (i = 0; i < priv->params.num_channels; i++)
 2581                 mlx5e_close_channel_wait(&priv->channel[i]);
 2582 }
 2583 
 2584 static int
 2585 mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
 2586 {
 2587 
 2588         if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
 2589                 uint8_t cq_mode;
 2590 
 2591                 switch (priv->params.tx_cq_moderation_mode) {
 2592                 case 0:
 2593                 case 2:
 2594                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 2595                         break;
 2596                 default:
 2597                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
 2598                         break;
 2599                 }
 2600 
 2601                 return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq,
 2602                     priv->params.tx_cq_moderation_usec,
 2603                     priv->params.tx_cq_moderation_pkts,
 2604                     cq_mode));
 2605         }
 2606 
 2607         return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
 2608             priv->params.tx_cq_moderation_usec,
 2609             priv->params.tx_cq_moderation_pkts));
 2610 }
 2611 
 2612 static int
 2613 mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
 2614 {
 2615 
 2616         if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
 2617                 uint8_t cq_mode;
 2618                 uint8_t dim_mode;
 2619                 int retval;
 2620 
 2621                 switch (priv->params.rx_cq_moderation_mode) {
 2622                 case 0:
 2623                 case 2:
 2624                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 2625                         dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 2626                         break;
 2627                 default:
 2628                         cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
 2629                         dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
 2630                         break;
 2631                 }
 2632 
 2633                 /* tear down dynamic interrupt moderation */
 2634                 mtx_lock(&rq->mtx);
 2635                 rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
 2636                 mtx_unlock(&rq->mtx);
 2637 
 2638                 /* wait for dynamic interrupt moderation work task, if any */
 2639                 cancel_work_sync(&rq->dim.work);
 2640 
 2641                 if (priv->params.rx_cq_moderation_mode >= 2) {
 2642                         struct net_dim_cq_moder curr;
 2643 
 2644                         mlx5e_get_default_profile(priv, dim_mode, &curr);
 2645 
 2646                         retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
 2647                             curr.usec, curr.pkts, cq_mode);
 2648 
 2649                         /* set dynamic interrupt moderation mode and zero defaults */
 2650                         mtx_lock(&rq->mtx);
 2651                         rq->dim.mode = dim_mode;
 2652                         rq->dim.state = 0;
 2653                         rq->dim.profile_ix = MLX5E_DIM_DEFAULT_PROFILE;
 2654                         mtx_unlock(&rq->mtx);
 2655                 } else {
 2656                         retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
 2657                             priv->params.rx_cq_moderation_usec,
 2658                             priv->params.rx_cq_moderation_pkts,
 2659                             cq_mode);
 2660                 }
 2661                 return (retval);
 2662         }
 2663 
 2664         return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
 2665             priv->params.rx_cq_moderation_usec,
 2666             priv->params.rx_cq_moderation_pkts));
 2667 }
 2668 
 2669 static int
 2670 mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
 2671 {
 2672         int err;
 2673         int i;
 2674 
 2675         err = mlx5e_refresh_rq_params(priv, &c->rq);
 2676         if (err)
 2677                 goto done;
 2678 
 2679         for (i = 0; i != priv->num_tc; i++) {
 2680                 err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
 2681                 if (err)
 2682                         goto done;
 2683         }
 2684 done:
 2685         return (err);
 2686 }
 2687 
 2688 int
 2689 mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
 2690 {
 2691         int i;
 2692 
 2693         /* check if channels are closed */
 2694         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
 2695                 return (EINVAL);
 2696 
 2697         for (i = 0; i < priv->params.num_channels; i++) {
 2698                 int err;
 2699 
 2700                 err = mlx5e_refresh_channel_params_sub(priv, &priv->channel[i]);
 2701                 if (err)
 2702                         return (err);
 2703         }
 2704         return (0);
 2705 }
 2706 
 2707 static int
 2708 mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
 2709 {
 2710         struct mlx5_core_dev *mdev = priv->mdev;
 2711         u32 in[MLX5_ST_SZ_DW(create_tis_in)];
 2712         void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
 2713 
 2714         memset(in, 0, sizeof(in));
 2715 
 2716         MLX5_SET(tisc, tisc, prio, tc);
 2717         MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
 2718 
 2719         return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
 2720 }
 2721 
 2722 static void
 2723 mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
 2724 {
 2725         mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc], 0);
 2726 }
 2727 
 2728 static int
 2729 mlx5e_open_tises(struct mlx5e_priv *priv)
 2730 {
 2731         int num_tc = priv->num_tc;
 2732         int err;
 2733         int tc;
 2734 
 2735         for (tc = 0; tc < num_tc; tc++) {
 2736                 err = mlx5e_open_tis(priv, tc);
 2737                 if (err)
 2738                         goto err_close_tises;
 2739         }
 2740 
 2741         return (0);
 2742 
 2743 err_close_tises:
 2744         for (tc--; tc >= 0; tc--)
 2745                 mlx5e_close_tis(priv, tc);
 2746 
 2747         return (err);
 2748 }
 2749 
 2750 static void
 2751 mlx5e_close_tises(struct mlx5e_priv *priv)
 2752 {
 2753         int num_tc = priv->num_tc;
 2754         int tc;
 2755 
 2756         for (tc = 0; tc < num_tc; tc++)
 2757                 mlx5e_close_tis(priv, tc);
 2758 }
 2759 
 2760 static int
 2761 mlx5e_open_default_rqt(struct mlx5e_priv *priv, u32 *prqtn, int sz)
 2762 {
 2763         u32 *in;
 2764         void *rqtc;
 2765         int inlen;
 2766         int err;
 2767         int i;
 2768 
 2769         inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
 2770         in = mlx5_vzalloc(inlen);
 2771         if (in == NULL)
 2772                 return (-ENOMEM);
 2773         rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
 2774 
 2775         MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
 2776         MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
 2777 
 2778         for (i = 0; i != sz; i++)
 2779                 MLX5_SET(rqtc, rqtc, rq_num[i], priv->drop_rq.rqn);
 2780 
 2781         err = mlx5_core_create_rqt(priv->mdev, in, inlen, prqtn);
 2782         kvfree(in);
 2783 
 2784         return (err);
 2785 }
 2786 
 2787 static int
 2788 mlx5e_open_rqts(struct mlx5e_priv *priv)
 2789 {
 2790         int err;
 2791         int i;
 2792 
 2793         err = mlx5e_open_default_rqt(priv, &priv->rqtn,
 2794             1 << priv->params.rx_hash_log_tbl_sz);
 2795         if (err)
 2796                 goto err_default;
 2797 
 2798         for (i = 0; i != priv->mdev->priv.eq_table.num_comp_vectors; i++) {
 2799                 err = mlx5e_open_default_rqt(priv, &priv->channel[i].rqtn, 1);
 2800                 if (err)
 2801                         goto err_channel;
 2802         }
 2803         return (0);
 2804 
 2805 err_channel:
 2806         while (i--)
 2807                 mlx5_core_destroy_rqt(priv->mdev, priv->channel[i].rqtn, 0);
 2808 
 2809         mlx5_core_destroy_rqt(priv->mdev, priv->rqtn, 0);
 2810 
 2811 err_default:
 2812         return (err);
 2813 }
 2814 
 2815 static void
 2816 mlx5e_close_rqts(struct mlx5e_priv *priv)
 2817 {
 2818         int i;
 2819 
 2820         for (i = 0; i != priv->mdev->priv.eq_table.num_comp_vectors; i++)
 2821                 mlx5_core_destroy_rqt(priv->mdev, priv->channel[i].rqtn, 0);
 2822 
 2823         mlx5_core_destroy_rqt(priv->mdev, priv->rqtn, 0);
 2824 }
 2825 
 2826 static int
 2827 mlx5e_activate_rqt(struct mlx5e_priv *priv)
 2828 {
 2829         u32 *in;
 2830         void *rqtc;
 2831         int inlen;
 2832         int err;
 2833         int sz;
 2834         int i;
 2835 
 2836         sz = 1 << priv->params.rx_hash_log_tbl_sz;
 2837 
 2838         inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * sz;
 2839         in = mlx5_vzalloc(inlen);
 2840         if (in == NULL)
 2841                 return (-ENOMEM);
 2842 
 2843         rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
 2844 
 2845         MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
 2846         MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1);
 2847 
 2848         for (i = 0; i != sz; i++) {
 2849                 int ix;
 2850 #ifdef RSS
 2851                 ix = rss_get_indirection_to_bucket(i);
 2852 #else
 2853                 ix = i;
 2854 #endif
 2855                 /* ensure we don't overflow */
 2856                 ix %= priv->params.num_channels;
 2857 
 2858                 /* apply receive side scaling stride, if any */
 2859                 ix -= ix % (int)priv->params.channels_rsss;
 2860 
 2861                 MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix].rq.rqn);
 2862         }
 2863 
 2864         err = mlx5_core_modify_rqt(priv->mdev, priv->rqtn, in, inlen);
 2865         if (err)
 2866                 goto err_modify;
 2867 
 2868         inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32);
 2869 
 2870         MLX5_SET(rqtc, rqtc, rqt_actual_size, 1);
 2871 
 2872         for (i = 0; i != priv->mdev->priv.eq_table.num_comp_vectors; i++) {
 2873                 int ix;
 2874 #ifdef RSS
 2875                 ix = rss_get_indirection_to_bucket(i);
 2876 #else
 2877                 ix = i;
 2878 #endif
 2879                 /* ensure we don't overflow */
 2880                 ix %= priv->params.num_channels;
 2881 
 2882                 /* apply receive side scaling stride, if any */
 2883                 ix -= ix % (int)priv->params.channels_rsss;
 2884 
 2885                 MLX5_SET(rqtc, rqtc, rq_num[0], priv->channel[ix].rq.rqn);
 2886 
 2887                 err = mlx5_core_modify_rqt(priv->mdev, priv->channel[i].rqtn, in, inlen);
 2888                 if (err)
 2889                         goto err_modify;
 2890         }
 2891 
 2892 err_modify:
 2893         kvfree(in);
 2894         return (err);
 2895 }
 2896 
 2897 static int
 2898 mlx5e_deactivate_rqt(struct mlx5e_priv *priv)
 2899 {
 2900         u32 *in;
 2901         void *rqtc;
 2902         int inlen;
 2903         int err;
 2904         int sz;
 2905         int i;
 2906 
 2907         sz = 1 << priv->params.rx_hash_log_tbl_sz;
 2908 
 2909         inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * sz;
 2910         in = mlx5_vzalloc(inlen);
 2911         if (in == NULL)
 2912                 return (-ENOMEM);
 2913 
 2914         rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
 2915 
 2916         MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
 2917         MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1);
 2918 
 2919         for (i = 0; i != sz; i++)
 2920                 MLX5_SET(rqtc, rqtc, rq_num[i], priv->drop_rq.rqn);
 2921 
 2922         err = mlx5_core_modify_rqt(priv->mdev, priv->rqtn, in, inlen);
 2923         if (err)
 2924                 goto err_modify;
 2925 
 2926         inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32);
 2927 
 2928         MLX5_SET(rqtc, rqtc, rqt_actual_size, 1);
 2929 
 2930         for (i = 0; i != priv->mdev->priv.eq_table.num_comp_vectors; i++) {
 2931                 MLX5_SET(rqtc, rqtc, rq_num[0], priv->drop_rq.rqn);
 2932 
 2933                 err = mlx5_core_modify_rqt(priv->mdev, priv->channel[i].rqtn, in, inlen);
 2934                 if (err)
 2935                         goto err_modify;
 2936         }
 2937 
 2938 err_modify:
 2939         kvfree(in);
 2940         return (err);
 2941 }
 2942 
 2943 #define MLX5E_RSS_KEY_SIZE (10 * 4)     /* bytes */
 2944 
 2945 static void
 2946 mlx5e_get_rss_key(void *key_ptr)
 2947 {
 2948 #ifdef RSS
 2949         rss_getkey(key_ptr);
 2950 #else
 2951         static const u32 rsskey[] = {
 2952             cpu_to_be32(0xD181C62C),
 2953             cpu_to_be32(0xF7F4DB5B),
 2954             cpu_to_be32(0x1983A2FC),
 2955             cpu_to_be32(0x943E1ADB),
 2956             cpu_to_be32(0xD9389E6B),
 2957             cpu_to_be32(0xD1039C2C),
 2958             cpu_to_be32(0xA74499AD),
 2959             cpu_to_be32(0x593D56D9),
 2960             cpu_to_be32(0xF3253C06),
 2961             cpu_to_be32(0x2ADC1FFC),
 2962         };
 2963         CTASSERT(sizeof(rsskey) == MLX5E_RSS_KEY_SIZE);
 2964         memcpy(key_ptr, rsskey, MLX5E_RSS_KEY_SIZE);
 2965 #endif
 2966 }
 2967 
 2968 static void
 2969 mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt, bool inner_vxlan)
 2970 {
 2971         void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
 2972         void *hfsi = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner);
 2973         void *hfs = inner_vxlan ? hfsi : hfso;
 2974         __be32 *hkey;
 2975 
 2976         MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
 2977 
 2978 #define ROUGH_MAX_L2_L3_HDR_SZ 256
 2979 
 2980 #define MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
 2981                           MLX5_HASH_FIELD_SEL_DST_IP)
 2982 
 2983 #define MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
 2984                           MLX5_HASH_FIELD_SEL_DST_IP   |\
 2985                           MLX5_HASH_FIELD_SEL_L4_SPORT |\
 2986                           MLX5_HASH_FIELD_SEL_L4_DPORT)
 2987 
 2988 #define MLX5_HASH_IP_IPSEC_SPI  (MLX5_HASH_FIELD_SEL_SRC_IP   |\
 2989                                  MLX5_HASH_FIELD_SEL_DST_IP   |\
 2990                                  MLX5_HASH_FIELD_SEL_IPSEC_SPI)
 2991 
 2992         if (priv->params.hw_lro_en) {
 2993                 MLX5_SET(tirc, tirc, lro_enable_mask,
 2994                     MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
 2995                     MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
 2996                 MLX5_SET(tirc, tirc, lro_max_msg_sz,
 2997                     (priv->params.lro_wqe_sz -
 2998                     ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
 2999                 /* TODO: add the option to choose timer value dynamically */
 3000                 MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
 3001                     MLX5_CAP_ETH(priv->mdev,
 3002                     lro_timer_supported_periods[2]));
 3003         }
 3004 
 3005         if (inner_vxlan)
 3006                 MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
 3007 
 3008         /*
 3009          * All packets must go through the indirection table, RQT,
 3010          * because it is not possible to modify the RQN of the TIR
 3011          * for direct dispatchment after it is created, typically
 3012          * when the link goes up and down.
 3013          */
 3014         MLX5_SET(tirc, tirc, disp_type,
 3015             MLX5_TIRC_DISP_TYPE_INDIRECT);
 3016         MLX5_SET(tirc, tirc, indirect_table,
 3017             priv->rqtn);
 3018         MLX5_SET(tirc, tirc, rx_hash_fn,
 3019                  MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
 3020         hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
 3021 
 3022         CTASSERT(MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key) >=
 3023                  MLX5E_RSS_KEY_SIZE);
 3024 #ifdef RSS
 3025         /*
 3026          * The FreeBSD RSS implementation does currently not
 3027          * support symmetric Toeplitz hashes:
 3028          */
 3029         MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
 3030 #else
 3031         MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
 3032 #endif
 3033         mlx5e_get_rss_key(hkey);
 3034 
 3035         switch (tt) {
 3036         case MLX5E_TT_IPV4_TCP:
 3037                 MLX5_SET(rx_hash_field_select, hfs, l3_prot_type,
 3038                     MLX5_L3_PROT_TYPE_IPV4);
 3039                 MLX5_SET(rx_hash_field_select, hfs, l4_prot_type,
 3040                     MLX5_L4_PROT_TYPE_TCP);
 3041 #ifdef RSS
 3042                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
 3043                         MLX5_SET(rx_hash_field_select, hfs, selected_fields,
 3044                             MLX5_HASH_IP);
 3045                 } else
 3046 #endif
 3047                 MLX5_SET(rx_hash_field_select, hfs, selected_fields,
 3048                     MLX5_HASH_ALL);
 3049                 break;
 3050 
 3051         case MLX5E_TT_IPV6_TCP:
 3052                 MLX5_SET(rx_hash_field_select, hfs, l3_prot_type,
 3053                     MLX5_L3_PROT_TYPE_IPV6);
 3054                 MLX5_SET(rx_hash_field_select, hfs, l4_prot_type,
 3055                     MLX5_L4_PROT_TYPE_TCP);
 3056 #ifdef RSS
 3057                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
 3058                         MLX5_SET(rx_hash_field_select, hfs, selected_fields,
 3059                             MLX5_HASH_IP);
 3060                 } else
 3061 #endif
 3062                 MLX5_SET(rx_hash_field_select, hfs, selected_fields,
 3063                     MLX5_HASH_ALL);
 3064                 break;
 3065 
 3066         case MLX5E_TT_IPV4_UDP:
 3067                 MLX5_SET(rx_hash_field_select, hfs, l3_prot_type,
 3068                     MLX5_L3_PROT_TYPE_IPV4);
 3069                 MLX5_SET(rx_hash_field_select, hfs, l4_prot_type,
 3070                     MLX5_L4_PROT_TYPE_UDP);
 3071 #ifdef RSS
 3072                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
 3073                         MLX5_SET(rx_hash_field_select, hfs, selected_fields,
 3074                             MLX5_HASH_IP);
 3075                 } else
 3076 #endif
 3077                 MLX5_SET(rx_hash_field_select, hfs, selected_fields,
 3078                     MLX5_HASH_ALL);
 3079                 break;
 3080 
 3081         case MLX5E_TT_IPV6_UDP:
 3082                 MLX5_SET(rx_hash_field_select, hfs, l3_prot_type,
 3083                     MLX5_L3_PROT_TYPE_IPV6);
 3084                 MLX5_SET(rx_hash_field_select, hfs, l4_prot_type,
 3085                     MLX5_L4_PROT_TYPE_UDP);
 3086 #ifdef RSS
 3087                 if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
 3088                         MLX5_SET(rx_hash_field_select, hfs, selected_fields,
 3089                             MLX5_HASH_IP);
 3090                 } else
 3091 #endif
 3092                 MLX5_SET(rx_hash_field_select, hfs, selected_fields,
 3093                     MLX5_HASH_ALL);
 3094                 break;
 3095 
 3096         case MLX5E_TT_IPV4_IPSEC_AH:
 3097                 MLX5_SET(rx_hash_field_select, hfs, l3_prot_type,
 3098                     MLX5_L3_PROT_TYPE_IPV4);
 3099                 MLX5_SET(rx_hash_field_select, hfs, selected_fields,
 3100                     MLX5_HASH_IP_IPSEC_SPI);
 3101                 break;
 3102 
 3103         case MLX5E_TT_IPV6_IPSEC_AH:
 3104                 MLX5_SET(rx_hash_field_select, hfs, l3_prot_type,
 3105                     MLX5_L3_PROT_TYPE_IPV6);
 3106                 MLX5_SET(rx_hash_field_select, hfs, selected_fields,
 3107                     MLX5_HASH_IP_IPSEC_SPI);
 3108                 break;
 3109 
 3110         case MLX5E_TT_IPV4_IPSEC_ESP:
 3111                 MLX5_SET(rx_hash_field_select, hfs, l3_prot_type,
 3112                     MLX5_L3_PROT_TYPE_IPV4);
 3113                 MLX5_SET(rx_hash_field_select, hfs, selected_fields,
 3114                     MLX5_HASH_IP_IPSEC_SPI);
 3115                 break;
 3116 
 3117         case MLX5E_TT_IPV6_IPSEC_ESP:
 3118                 MLX5_SET(rx_hash_field_select, hfs, l3_prot_type,
 3119                     MLX5_L3_PROT_TYPE_IPV6);
 3120                 MLX5_SET(rx_hash_field_select, hfs, selected_fields,
 3121                     MLX5_HASH_IP_IPSEC_SPI);
 3122                 break;
 3123 
 3124         case MLX5E_TT_IPV4:
 3125                 MLX5_SET(rx_hash_field_select, hfs, l3_prot_type,
 3126                     MLX5_L3_PROT_TYPE_IPV4);
 3127                 MLX5_SET(rx_hash_field_select, hfs, selected_fields,
 3128                     MLX5_HASH_IP);
 3129                 break;
 3130 
 3131         case MLX5E_TT_IPV6:
 3132                 MLX5_SET(rx_hash_field_select, hfs, l3_prot_type,
 3133                     MLX5_L3_PROT_TYPE_IPV6);
 3134                 MLX5_SET(rx_hash_field_select, hfs, selected_fields,
 3135                     MLX5_HASH_IP);
 3136                 break;
 3137 
 3138         default:
 3139                 break;
 3140         }
 3141 }
 3142 
 3143 static int
 3144 mlx5e_open_tir(struct mlx5e_priv *priv, int tt, bool inner_vxlan)
 3145 {
 3146         struct mlx5_core_dev *mdev = priv->mdev;
 3147         u32 *in;
 3148         void *tirc;
 3149         int inlen;
 3150         int err;
 3151 
 3152         inlen = MLX5_ST_SZ_BYTES(create_tir_in);
 3153         in = mlx5_vzalloc(inlen);
 3154         if (in == NULL)
 3155                 return (-ENOMEM);
 3156         tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
 3157 
 3158         mlx5e_build_tir_ctx(priv, tirc, tt, inner_vxlan);
 3159 
 3160         err = mlx5_core_create_tir(mdev, in, inlen, inner_vxlan ?
 3161             &priv->tirn_inner_vxlan[tt] : &priv->tirn[tt]);
 3162 
 3163         kvfree(in);
 3164 
 3165         return (err);
 3166 }
 3167 
 3168 static void
 3169 mlx5e_close_tir(struct mlx5e_priv *priv, int tt, bool inner_vxlan)
 3170 {
 3171         mlx5_core_destroy_tir(priv->mdev, inner_vxlan ?
 3172             priv->tirn_inner_vxlan[tt] : priv->tirn[tt], 0);
 3173 }
 3174 
 3175 static int
 3176 mlx5e_open_tirs(struct mlx5e_priv *priv)
 3177 {
 3178         int err;
 3179         int i;
 3180 
 3181         for (i = 0; i != 2 * MLX5E_NUM_TT; i++) {
 3182                 err = mlx5e_open_tir(priv, i / 2, (i % 2) ? true : false);
 3183                 if (err)
 3184                         goto err_close_tirs;
 3185         }
 3186 
 3187         return (0);
 3188 
 3189 err_close_tirs:
 3190         for (i--; i >= 0; i--)
 3191                 mlx5e_close_tir(priv, i / 2, (i % 2) ? true : false);
 3192 
 3193         return (err);
 3194 }
 3195 
 3196 static void
 3197 mlx5e_close_tirs(struct mlx5e_priv *priv)
 3198 {
 3199         int i;
 3200 
 3201         for (i = 0; i != 2 * MLX5E_NUM_TT; i++)
 3202                 mlx5e_close_tir(priv, i / 2, (i % 2) ? true : false);
 3203 }
 3204 
 3205 /*
 3206  * SW MTU does not include headers,
 3207  * HW MTU includes all headers and checksums.
 3208  */
 3209 static int
 3210 mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
 3211 {
 3212         struct mlx5e_priv *priv = ifp->if_softc;
 3213         struct mlx5_core_dev *mdev = priv->mdev;
 3214         int hw_mtu;
 3215         int err;
 3216 
 3217         hw_mtu = MLX5E_SW2HW_MTU(sw_mtu);
 3218 
 3219         err = mlx5_set_port_mtu(mdev, hw_mtu);
 3220         if (err) {
 3221                 mlx5_en_err(ifp, "mlx5_set_port_mtu failed setting %d, err=%d\n",
 3222                     sw_mtu, err);
 3223                 return (err);
 3224         }
 3225 
 3226         /* Update vport context MTU */
 3227         err = mlx5_set_vport_mtu(mdev, hw_mtu);
 3228         if (err) {
 3229                 mlx5_en_err(ifp,
 3230                     "Failed updating vport context with MTU size, err=%d\n",
 3231                     err);
 3232         }
 3233 
 3234         ifp->if_mtu = sw_mtu;
 3235 
 3236         err = mlx5_query_vport_mtu(mdev, &hw_mtu);
 3237         if (err || !hw_mtu) {
 3238                 /* fallback to port oper mtu */
 3239                 err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
 3240         }
 3241         if (err) {
 3242                 mlx5_en_err(ifp,
 3243                     "Query port MTU, after setting new MTU value, failed\n");
 3244                 return (err);
 3245         } else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) {
 3246                 err = -E2BIG,
 3247                 mlx5_en_err(ifp,
 3248                     "Port MTU %d is smaller than ifp mtu %d\n",
 3249                     hw_mtu, sw_mtu);
 3250         } else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) {
 3251                 err = -EINVAL;
 3252                 mlx5_en_err(ifp,
 3253                     "Port MTU %d is bigger than ifp mtu %d\n",
 3254                     hw_mtu, sw_mtu);
 3255         }
 3256         priv->params_ethtool.hw_mtu = hw_mtu;
 3257 
 3258         /* compute MSB */
 3259         while (hw_mtu & (hw_mtu - 1))
 3260                 hw_mtu &= (hw_mtu - 1);
 3261         priv->params_ethtool.hw_mtu_msb = hw_mtu;
 3262 
 3263         return (err);
 3264 }
 3265 
 3266 int
 3267 mlx5e_open_locked(struct ifnet *ifp)
 3268 {
 3269         struct mlx5e_priv *priv = ifp->if_softc;
 3270         int err;
 3271         u16 set_id;
 3272 
 3273         /* check if already opened */
 3274         if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
 3275                 return (0);
 3276 
 3277 #ifdef RSS
 3278         if (rss_getnumbuckets() > priv->params.num_channels) {
 3279                 mlx5_en_info(ifp,
 3280                     "NOTE: There are more RSS buckets(%u) than channels(%u) available\n",
 3281                     rss_getnumbuckets(), priv->params.num_channels);
 3282         }
 3283 #endif
 3284         err = mlx5e_open_tises(priv);
 3285         if (err) {
 3286                 mlx5_en_err(ifp, "mlx5e_open_tises failed, %d\n", err);
 3287                 return (err);
 3288         }
 3289         err = mlx5_vport_alloc_q_counter(priv->mdev,
 3290             MLX5_INTERFACE_PROTOCOL_ETH, &set_id);
 3291         if (err) {
 3292                 mlx5_en_err(priv->ifp,
 3293                     "mlx5_vport_alloc_q_counter failed: %d\n", err);
 3294                 goto err_close_tises;
 3295         }
 3296         /* store counter set ID */
 3297         priv->counter_set_id = set_id;
 3298 
 3299         err = mlx5e_open_channels(priv);
 3300         if (err) {
 3301                 mlx5_en_err(ifp,
 3302                     "mlx5e_open_channels failed, %d\n", err);
 3303                 goto err_dalloc_q_counter;
 3304         }
 3305         err = mlx5e_activate_rqt(priv);
 3306         if (err) {
 3307                 mlx5_en_err(ifp, "mlx5e_activate_rqt failed, %d\n", err);
 3308                 goto err_close_channels;
 3309         }
 3310 
 3311         set_bit(MLX5E_STATE_OPENED, &priv->state);
 3312 
 3313         mlx5e_update_carrier(priv);
 3314 
 3315         return (0);
 3316 
 3317 err_close_channels:
 3318         mlx5e_close_channels(priv);
 3319 
 3320 err_dalloc_q_counter:
 3321         mlx5_vport_dealloc_q_counter(priv->mdev,
 3322             MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
 3323 
 3324 err_close_tises:
 3325         mlx5e_close_tises(priv);
 3326 
 3327         return (err);
 3328 }
 3329 
 3330 static void
 3331 mlx5e_open(void *arg)
 3332 {
 3333         struct mlx5e_priv *priv = arg;
 3334 
 3335         PRIV_LOCK(priv);
 3336         if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
 3337                 mlx5_en_err(priv->ifp,
 3338                     "Setting port status to up failed\n");
 3339 
 3340         mlx5e_open_locked(priv->ifp);
 3341         priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
 3342         PRIV_UNLOCK(priv);
 3343 }
 3344 
 3345 int
 3346 mlx5e_close_locked(struct ifnet *ifp)
 3347 {
 3348         struct mlx5e_priv *priv = ifp->if_softc;
 3349 
 3350         /* check if already closed */
 3351         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
 3352                 return (0);
 3353 
 3354         clear_bit(MLX5E_STATE_OPENED, &priv->state);
 3355 
 3356         if_link_state_change(priv->ifp, LINK_STATE_DOWN);
 3357 
 3358         mlx5e_deactivate_rqt(priv);
 3359         mlx5e_close_channels(priv);
 3360         mlx5_vport_dealloc_q_counter(priv->mdev,
 3361             MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
 3362         mlx5e_close_tises(priv);
 3363 
 3364         return (0);
 3365 }
 3366 
 3367 static uint64_t
 3368 mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
 3369 {
 3370         struct mlx5e_priv *priv = ifp->if_softc;
 3371         u64 retval;
 3372 
 3373         /* PRIV_LOCK(priv); XXX not allowed */
 3374         switch (cnt) {
 3375         case IFCOUNTER_IPACKETS:
 3376                 retval = priv->stats.vport.rx_packets;
 3377                 break;
 3378         case IFCOUNTER_IERRORS:
 3379                 retval = priv->stats.pport.in_range_len_errors +
 3380                     priv->stats.pport.out_of_range_len +
 3381                     priv->stats.pport.too_long_errors +
 3382                     priv->stats.pport.check_seq_err +
 3383                     priv->stats.pport.alignment_err;
 3384                 break;
 3385         case IFCOUNTER_IQDROPS:
 3386                 retval = priv->stats.vport.rx_out_of_buffer;
 3387                 break;
 3388         case IFCOUNTER_OPACKETS:
 3389                 retval = priv->stats.vport.tx_packets;
 3390                 break;
 3391         case IFCOUNTER_OERRORS:
 3392                 retval = priv->stats.port_stats_debug.out_discards;
 3393                 break;
 3394         case IFCOUNTER_IBYTES:
 3395                 retval = priv->stats.vport.rx_bytes;
 3396                 break;
 3397         case IFCOUNTER_OBYTES:
 3398                 retval = priv->stats.vport.tx_bytes;
 3399                 break;
 3400         case IFCOUNTER_IMCASTS:
 3401                 retval = priv->stats.vport.rx_multicast_packets;
 3402                 break;
 3403         case IFCOUNTER_OMCASTS:
 3404                 retval = priv->stats.vport.tx_multicast_packets;
 3405                 break;
 3406         case IFCOUNTER_OQDROPS:
 3407                 retval = priv->stats.vport.tx_queue_dropped;
 3408                 break;
 3409         case IFCOUNTER_COLLISIONS:
 3410                 retval = priv->stats.pport.collisions;
 3411                 break;
 3412         default:
 3413                 retval = if_get_counter_default(ifp, cnt);
 3414                 break;
 3415         }
 3416         /* PRIV_UNLOCK(priv); XXX not allowed */
 3417         return (retval);
 3418 }
 3419 
 3420 static void
 3421 mlx5e_set_rx_mode(struct ifnet *ifp)
 3422 {
 3423         struct mlx5e_priv *priv = ifp->if_softc;
 3424 
 3425         queue_work(priv->wq, &priv->set_rx_mode_work);
 3426 }
 3427 
 3428 static int
 3429 mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 3430 {
 3431         struct mlx5e_priv *priv;
 3432         struct ifreq *ifr;
 3433         struct ifdownreason *ifdr;
 3434         struct ifi2creq i2c;
 3435         struct ifrsskey *ifrk;
 3436         struct ifrsshash *ifrh;
 3437         struct siocsifcapnv_driver_data *drv_ioctl_data, drv_ioctl_data_d;
 3438         int error = 0;
 3439         int mask;
 3440         int size_read = 0;
 3441         int module_status;
 3442         int module_num;
 3443         int max_mtu;
 3444         uint8_t read_addr;
 3445 
 3446         priv = ifp->if_softc;
 3447 
 3448         /* check if detaching */
 3449         if (priv == NULL || priv->gone != 0)
 3450                 return (ENXIO);
 3451 
 3452         switch (command) {
 3453         case SIOCSIFMTU:
 3454                 ifr = (struct ifreq *)data;
 3455 
 3456                 PRIV_LOCK(priv);
 3457                 mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
 3458 
 3459                 if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
 3460                     ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
 3461                         int was_opened;
 3462 
 3463                         was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
 3464                         if (was_opened)
 3465                                 mlx5e_close_locked(ifp);
 3466 
 3467                         /* set new MTU */
 3468                         mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
 3469 
 3470                         if (was_opened)
 3471                                 mlx5e_open_locked(ifp);
 3472                 } else {
 3473                         error = EINVAL;
 3474                         mlx5_en_err(ifp,
 3475                             "Invalid MTU value. Min val: %d, Max val: %d\n",
 3476                             MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
 3477                 }
 3478                 PRIV_UNLOCK(priv);
 3479                 break;
 3480         case SIOCSIFFLAGS:
 3481                 if ((ifp->if_flags & IFF_UP) &&
 3482                     (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 3483                         mlx5e_set_rx_mode(ifp);
 3484                         break;
 3485                 }
 3486                 PRIV_LOCK(priv);
 3487                 if (ifp->if_flags & IFF_UP) {
 3488                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 3489                                 if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
 3490                                         mlx5e_open_locked(ifp);
 3491                                 ifp->if_drv_flags |= IFF_DRV_RUNNING;
 3492                                 mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
 3493                         }
 3494                 } else {
 3495                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 3496                                 mlx5_set_port_status(priv->mdev,
 3497                                     MLX5_PORT_DOWN);
 3498                                 if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
 3499                                         mlx5e_close_locked(ifp);
 3500                                 mlx5e_update_carrier(priv);
 3501                                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 3502                         }
 3503                 }
 3504                 PRIV_UNLOCK(priv);
 3505                 break;
 3506         case SIOCADDMULTI:
 3507         case SIOCDELMULTI:
 3508                 mlx5e_set_rx_mode(ifp);
 3509                 break;
 3510         case SIOCSIFMEDIA:
 3511         case SIOCGIFMEDIA:
 3512         case SIOCGIFXMEDIA:
 3513                 ifr = (struct ifreq *)data;
 3514                 error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
 3515                 break;
 3516         case SIOCGIFCAPNV:
 3517                 error = 0;
 3518                 break;
 3519         case SIOCSIFCAP:
 3520                 ifr = (struct ifreq *)data;
 3521                 drv_ioctl_data = &drv_ioctl_data_d;
 3522                 drv_ioctl_data->reqcap = ifr->ifr_reqcap;
 3523                 PRIV_LOCK(priv);
 3524                 drv_ioctl_data->reqcap2 = ifp->if_capenable2;
 3525                 drv_ioctl_data->nvcap = NULL;
 3526                 goto siocsifcap_driver;
 3527         case SIOCSIFCAPNV:
 3528                 drv_ioctl_data = (struct siocsifcapnv_driver_data *)data;
 3529                 PRIV_LOCK(priv);
 3530 siocsifcap_driver:
 3531                 mask = drv_ioctl_data->reqcap ^ ifp->if_capenable;
 3532 
 3533                 if (mask & IFCAP_TXCSUM) {
 3534                         ifp->if_capenable ^= IFCAP_TXCSUM;
 3535                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
 3536 
 3537                         if (IFCAP_TSO4 & ifp->if_capenable &&
 3538                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
 3539                                 mask &= ~IFCAP_TSO4;
 3540                                 ifp->if_capenable &= ~IFCAP_TSO4;
 3541                                 ifp->if_hwassist &= ~CSUM_IP_TSO;
 3542                                 mlx5_en_err(ifp,
 3543                                     "tso4 disabled due to -txcsum.\n");
 3544                         }
 3545                 }
 3546                 if (mask & IFCAP_TXCSUM_IPV6) {
 3547                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
 3548                         ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
 3549 
 3550                         if (IFCAP_TSO6 & ifp->if_capenable &&
 3551                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
 3552                                 mask &= ~IFCAP_TSO6;
 3553                                 ifp->if_capenable &= ~IFCAP_TSO6;
 3554                                 ifp->if_hwassist &= ~CSUM_IP6_TSO;
 3555                                 mlx5_en_err(ifp,
 3556                                     "tso6 disabled due to -txcsum6.\n");
 3557                         }
 3558                 }
 3559                 if (mask & IFCAP_MEXTPG)
 3560                         ifp->if_capenable ^= IFCAP_MEXTPG;
 3561                 if (mask & IFCAP_TXTLS4)
 3562                         ifp->if_capenable ^= IFCAP_TXTLS4;
 3563                 if (mask & IFCAP_TXTLS6)
 3564                         ifp->if_capenable ^= IFCAP_TXTLS6;
 3565 #ifdef RATELIMIT
 3566                 if (mask & IFCAP_TXTLS_RTLMT)
 3567                         ifp->if_capenable ^= IFCAP_TXTLS_RTLMT;
 3568 #endif
 3569                 if (mask & IFCAP_RXCSUM)
 3570                         ifp->if_capenable ^= IFCAP_RXCSUM;
 3571                 if (mask & IFCAP_RXCSUM_IPV6)
 3572                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
 3573                 if (mask & IFCAP_TSO4) {
 3574                         if (!(IFCAP_TSO4 & ifp->if_capenable) &&
 3575                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
 3576                                 mlx5_en_err(ifp, "enable txcsum first.\n");
 3577                                 error = EAGAIN;
 3578                                 goto out;
 3579                         }
 3580                         ifp->if_capenable ^= IFCAP_TSO4;
 3581                         ifp->if_hwassist ^= CSUM_IP_TSO;
 3582                 }
 3583                 if (mask & IFCAP_TSO6) {
 3584                         if (!(IFCAP_TSO6 & ifp->if_capenable) &&
 3585                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
 3586                                 mlx5_en_err(ifp, "enable txcsum6 first.\n");
 3587                                 error = EAGAIN;
 3588                                 goto out;
 3589                         }
 3590                         ifp->if_capenable ^= IFCAP_TSO6;
 3591                         ifp->if_hwassist ^= CSUM_IP6_TSO;
 3592                 }
 3593                 if (mask & IFCAP_VLAN_HWTSO)
 3594                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
 3595                 if (mask & IFCAP_VLAN_HWFILTER) {
 3596                         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
 3597                                 mlx5e_disable_vlan_filter(priv);
 3598                         else
 3599                                 mlx5e_enable_vlan_filter(priv);
 3600 
 3601                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
 3602                 }
 3603                 if (mask & IFCAP_VLAN_HWTAGGING)
 3604                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 3605                 if (mask & IFCAP_WOL_MAGIC)
 3606                         ifp->if_capenable ^= IFCAP_WOL_MAGIC;
 3607                 if (mask & IFCAP_VXLAN_HWCSUM) {
 3608                         const bool was_enabled =
 3609                             (ifp->if_capenable & IFCAP_VXLAN_HWCSUM) != 0;
 3610                         if (was_enabled)
 3611                                 mlx5e_del_all_vxlan_rules(priv);
 3612                         ifp->if_capenable ^= IFCAP_VXLAN_HWCSUM;
 3613                         ifp->if_hwassist ^= CSUM_INNER_IP | CSUM_INNER_IP_UDP |
 3614                             CSUM_INNER_IP_TCP | CSUM_INNER_IP6_UDP |
 3615                             CSUM_INNER_IP6_TCP;
 3616                         if (!was_enabled) {
 3617                                 int err = mlx5e_add_all_vxlan_rules(priv);
 3618                                 if (err != 0) {
 3619                                         mlx5_en_err(ifp,
 3620                                             "mlx5e_add_all_vxlan_rules() failed, %d (ignored)\n", err);
 3621                                 }
 3622                         }
 3623                 }
 3624                 if (mask & IFCAP_VXLAN_HWTSO) {
 3625                         ifp->if_capenable ^= IFCAP_VXLAN_HWTSO;
 3626                         ifp->if_hwassist ^= CSUM_INNER_IP_TSO |
 3627                             CSUM_INNER_IP6_TSO;
 3628                 }
 3629 
 3630                 VLAN_CAPABILITIES(ifp);
 3631                 /* turn off LRO means also turn of HW LRO - if it's on */
 3632                 if (mask & IFCAP_LRO) {
 3633                         int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
 3634                         bool need_restart = false;
 3635 
 3636                         ifp->if_capenable ^= IFCAP_LRO;
 3637 
 3638                         /* figure out if updating HW LRO is needed */
 3639                         if (!(ifp->if_capenable & IFCAP_LRO)) {
 3640                                 if (priv->params.hw_lro_en) {
 3641                                         priv->params.hw_lro_en = false;
 3642                                         need_restart = true;
 3643                                 }
 3644                         } else {
 3645                                 if (priv->params.hw_lro_en == false &&
 3646                                     priv->params_ethtool.hw_lro != 0) {
 3647                                         priv->params.hw_lro_en = true;
 3648                                         need_restart = true;
 3649                                 }
 3650                         }
 3651                         if (was_opened && need_restart) {
 3652                                 mlx5e_close_locked(ifp);
 3653                                 mlx5e_open_locked(ifp);
 3654                         }
 3655                 }
 3656                 if (mask & IFCAP_HWRXTSTMP) {
 3657                         ifp->if_capenable ^= IFCAP_HWRXTSTMP;
 3658                         if (ifp->if_capenable & IFCAP_HWRXTSTMP) {
 3659                                 if (priv->clbr_done == 0)
 3660                                         mlx5e_reset_calibration_callout(priv);
 3661                         } else {
 3662                                 callout_drain(&priv->tstmp_clbr);
 3663                                 priv->clbr_done = 0;
 3664                         }
 3665                 }
 3666                 mask = drv_ioctl_data->reqcap2 ^ ifp->if_capenable2;
 3667                 if ((mask & IFCAP2_BIT(IFCAP2_RXTLS4)) != 0)
 3668                         ifp->if_capenable2 ^= IFCAP2_BIT(IFCAP2_RXTLS4);
 3669                 if ((mask & IFCAP2_BIT(IFCAP2_RXTLS6)) != 0)
 3670                         ifp->if_capenable2 ^= IFCAP2_BIT(IFCAP2_RXTLS6);
 3671 out:
 3672                 PRIV_UNLOCK(priv);
 3673                 break;
 3674 
 3675         case SIOCGI2C:
 3676                 ifr = (struct ifreq *)data;
 3677 
 3678                 /*
 3679                  * Copy from the user-space address ifr_data to the
 3680                  * kernel-space address i2c
 3681                  */
 3682                 error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
 3683                 if (error)
 3684                         break;
 3685 
 3686                 if (i2c.len > sizeof(i2c.data)) {
 3687                         error = EINVAL;
 3688                         break;
 3689                 }
 3690 
 3691                 PRIV_LOCK(priv);
 3692                 /* Get module_num which is required for the query_eeprom */
 3693                 error = mlx5_query_module_num(priv->mdev, &module_num);
 3694                 if (error) {
 3695                         mlx5_en_err(ifp,
 3696                             "Query module num failed, eeprom reading is not supported\n");
 3697                         error = EINVAL;
 3698                         goto err_i2c;
 3699                 }
 3700                 /* Check if module is present before doing an access */
 3701                 module_status = mlx5_query_module_status(priv->mdev, module_num);
 3702                 if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED) {
 3703                         error = EINVAL;
 3704                         goto err_i2c;
 3705                 }
 3706                 /*
 3707                  * Currently 0XA0 and 0xA2 are the only addresses permitted.
 3708                  * The internal conversion is as follows:
 3709                  */
 3710                 if (i2c.dev_addr == 0xA0)
 3711                         read_addr = MLX5_I2C_ADDR_LOW;
 3712                 else if (i2c.dev_addr == 0xA2)
 3713                         read_addr = MLX5_I2C_ADDR_HIGH;
 3714                 else {
 3715                         mlx5_en_err(ifp,
 3716                             "Query eeprom failed, Invalid Address: %X\n",
 3717                             i2c.dev_addr);
 3718                         error = EINVAL;
 3719                         goto err_i2c;
 3720                 }
 3721                 error = mlx5_query_eeprom(priv->mdev,
 3722                     read_addr, MLX5_EEPROM_LOW_PAGE,
 3723                     (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
 3724                     (uint32_t *)i2c.data, &size_read);
 3725                 if (error) {
 3726                         mlx5_en_err(ifp,
 3727                             "Query eeprom failed, eeprom reading is not supported\n");
 3728                         error = EINVAL;
 3729                         goto err_i2c;
 3730                 }
 3731 
 3732                 if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
 3733                         error = mlx5_query_eeprom(priv->mdev,
 3734                             read_addr, MLX5_EEPROM_LOW_PAGE,
 3735                             (uint32_t)(i2c.offset + size_read),
 3736                             (uint32_t)(i2c.len - size_read), module_num,
 3737                             (uint32_t *)(i2c.data + size_read), &size_read);
 3738                 }
 3739                 if (error) {
 3740                         mlx5_en_err(ifp,
 3741                             "Query eeprom failed, eeprom reading is not supported\n");
 3742                         error = EINVAL;
 3743                         goto err_i2c;
 3744                 }
 3745 
 3746                 error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
 3747 err_i2c:
 3748                 PRIV_UNLOCK(priv);
 3749                 break;
 3750         case SIOCGIFDOWNREASON:
 3751                 ifdr = (struct ifdownreason *)data;
 3752                 bzero(ifdr->ifdr_msg, sizeof(ifdr->ifdr_msg));
 3753                 PRIV_LOCK(priv);
 3754                 error = -mlx5_query_pddr_troubleshooting_info(priv->mdev, NULL,
 3755                     ifdr->ifdr_msg, sizeof(ifdr->ifdr_msg));
 3756                 PRIV_UNLOCK(priv);
 3757                 if (error == 0)
 3758                         ifdr->ifdr_reason = IFDR_REASON_MSG;
 3759                 break;
 3760 
 3761         case SIOCGIFRSSKEY:
 3762                 ifrk = (struct ifrsskey *)data;
 3763                 ifrk->ifrk_func = RSS_FUNC_TOEPLITZ;
 3764                 ifrk->ifrk_keylen = MLX5E_RSS_KEY_SIZE;
 3765                 CTASSERT(sizeof(ifrk->ifrk_key) >= MLX5E_RSS_KEY_SIZE);
 3766                 mlx5e_get_rss_key(ifrk->ifrk_key);
 3767                 break;
 3768 
 3769         case SIOCGIFRSSHASH:
 3770                 ifrh = (struct ifrsshash *)data;
 3771                 ifrh->ifrh_func = RSS_FUNC_TOEPLITZ;
 3772                 ifrh->ifrh_types =
 3773                     RSS_TYPE_IPV4 |
 3774                     RSS_TYPE_TCP_IPV4 |
 3775                     RSS_TYPE_UDP_IPV4 |
 3776                     RSS_TYPE_IPV6 |
 3777                     RSS_TYPE_TCP_IPV6 |
 3778                     RSS_TYPE_UDP_IPV6;
 3779                 break;
 3780 
 3781         default:
 3782                 error = ether_ioctl(ifp, command, data);
 3783                 break;
 3784         }
 3785         return (error);
 3786 }
 3787 
 3788 static int
 3789 mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
 3790 {
 3791         /*
 3792          * TODO: uncoment once FW really sets all these bits if
 3793          * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
 3794          * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
 3795          * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
 3796          * -ENOTSUPP;
 3797          */
 3798 
 3799         /* TODO: add more must-to-have features */
 3800 
 3801         if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
 3802                 return (-ENODEV);
 3803 
 3804         return (0);
 3805 }
 3806 
 3807 static u16
 3808 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
 3809 {
 3810         const int min_size = ETHER_VLAN_ENCAP_LEN + ETHER_HDR_LEN;
 3811         const int max_size = MLX5E_MAX_TX_INLINE;
 3812         const int bf_buf_size =
 3813             ((1U << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2U) -
 3814             (sizeof(struct mlx5e_tx_wqe) - 2);
 3815 
 3816         /* verify against driver limits */
 3817         if (bf_buf_size > max_size)
 3818                 return (max_size);
 3819         else if (bf_buf_size < min_size)
 3820                 return (min_size);
 3821         else
 3822                 return (bf_buf_size);
 3823 }
 3824 
 3825 static int
 3826 mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
 3827     struct mlx5e_priv *priv,
 3828     int num_comp_vectors)
 3829 {
 3830         int err;
 3831 
 3832         /*
 3833          * TODO: Consider link speed for setting "log_sq_size",
 3834          * "log_rq_size" and "cq_moderation_xxx":
 3835          */
 3836         priv->params.log_sq_size =
 3837             MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
 3838         priv->params.log_rq_size =
 3839             MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
 3840         priv->params.rx_cq_moderation_usec =
 3841             MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
 3842             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
 3843             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
 3844         priv->params.rx_cq_moderation_mode =
 3845             MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
 3846         priv->params.rx_cq_moderation_pkts =
 3847             MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
 3848         priv->params.tx_cq_moderation_usec =
 3849             MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
 3850         priv->params.tx_cq_moderation_pkts =
 3851             MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
 3852         priv->params.min_rx_wqes =
 3853             MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
 3854         priv->params.rx_hash_log_tbl_sz =
 3855             (order_base_2(num_comp_vectors) >
 3856             MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
 3857             order_base_2(num_comp_vectors) :
 3858             MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
 3859         priv->params.num_tc = 1;
 3860         priv->params.default_vlan_prio = 0;
 3861         priv->counter_set_id = -1;
 3862         priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev);
 3863 
 3864         err = mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode);
 3865         if (err)
 3866                 return (err);
 3867 
 3868         /*
 3869          * hw lro is currently defaulted to off. when it won't anymore we
 3870          * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
 3871          */
 3872         priv->params.hw_lro_en = false;
 3873         priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
 3874 
 3875         /*
 3876          * CQE zipping is currently defaulted to off. when it won't
 3877          * anymore we will consider the HW capability:
 3878          * "!!MLX5_CAP_GEN(mdev, cqe_compression)"
 3879          */
 3880         priv->params.cqe_zipping_en = false;
 3881 
 3882         priv->mdev = mdev;
 3883         priv->params.num_channels = num_comp_vectors;
 3884         priv->params.channels_rsss = 1;
 3885         priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
 3886         priv->queue_mapping_channel_mask =
 3887             roundup_pow_of_two(num_comp_vectors) - 1;
 3888         priv->num_tc = priv->params.num_tc;
 3889         priv->default_vlan_prio = priv->params.default_vlan_prio;
 3890 
 3891         INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
 3892         INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
 3893         INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
 3894 
 3895         return (0);
 3896 }
 3897 
 3898 static void
 3899 mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc)
 3900 {
 3901         bool ro_pci_enable =
 3902             pci_get_relaxed_ordering_enabled(mdev->pdev->dev.bsddev);
 3903         bool ro_write = MLX5_CAP_GEN(mdev, relaxed_ordering_write);
 3904         bool ro_read = MLX5_CAP_GEN(mdev, relaxed_ordering_read);
 3905 
 3906         MLX5_SET(mkc, mkc, relaxed_ordering_read, ro_pci_enable && ro_read);
 3907         MLX5_SET(mkc, mkc, relaxed_ordering_write, ro_pci_enable && ro_write);
 3908 }
 3909 
 3910 static int
 3911 mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
 3912                   struct mlx5_core_mkey *mkey)
 3913 {
 3914         struct ifnet *ifp = priv->ifp;
 3915         struct mlx5_core_dev *mdev = priv->mdev;
 3916         int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 3917         void *mkc;
 3918         u32 *in;
 3919         int err;
 3920 
 3921         in = mlx5_vzalloc(inlen);
 3922         if (in == NULL) {
 3923                 mlx5_en_err(ifp, "failed to allocate inbox\n");
 3924                 return (-ENOMEM);
 3925         }
 3926 
 3927         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 3928         MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA);
 3929         MLX5_SET(mkc, mkc, umr_en, 1);  /* used by HW TLS */
 3930         MLX5_SET(mkc, mkc, lw, 1);
 3931         MLX5_SET(mkc, mkc, lr, 1);
 3932         mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
 3933         MLX5_SET(mkc, mkc, pd, pdn);
 3934         MLX5_SET(mkc, mkc, length64, 1);
 3935         MLX5_SET(mkc, mkc, qpn, 0xffffff);
 3936 
 3937         err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
 3938         if (err)
 3939                 mlx5_en_err(ifp, "mlx5_core_create_mkey failed, %d\n",
 3940                     err);
 3941 
 3942         kvfree(in);
 3943         return (err);
 3944 }
 3945 
 3946 static const char *mlx5e_vport_stats_desc[] = {
 3947         MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
 3948 };
 3949 
 3950 static const char *mlx5e_pport_stats_desc[] = {
 3951         MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
 3952 };
 3953 
 3954 static int
 3955 mlx5e_priv_static_init(struct mlx5e_priv *priv, struct mlx5_core_dev *mdev,
 3956     const uint32_t channels)
 3957 {
 3958         uint32_t x;
 3959         int err;
 3960 
 3961         mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
 3962         sx_init(&priv->state_lock, "mlx5state");
 3963         callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
 3964         MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
 3965         for (x = 0; x != channels; x++)
 3966                 mlx5e_chan_static_init(priv, &priv->channel[x], x);
 3967 
 3968         for (x = 0; x != channels; x++) {
 3969                 err = mlx5_alloc_bfreg(mdev, &priv->channel[x].bfreg, false, false);
 3970                 if (err)
 3971                         goto err_alloc_bfreg;
 3972         }
 3973         return (0);
 3974 
 3975 err_alloc_bfreg:
 3976         while (x--)
 3977                 mlx5_free_bfreg(mdev, &priv->channel[x].bfreg);
 3978 
 3979         for (x = 0; x != channels; x++)
 3980                 mlx5e_chan_static_destroy(&priv->channel[x]);
 3981         callout_drain(&priv->watchdog);
 3982         mtx_destroy(&priv->async_events_mtx);
 3983         sx_destroy(&priv->state_lock);
 3984         return (err);
 3985 }
 3986 
 3987 static void
 3988 mlx5e_priv_static_destroy(struct mlx5e_priv *priv, struct mlx5_core_dev *mdev,
 3989     const uint32_t channels)
 3990 {
 3991         uint32_t x;
 3992 
 3993         for (x = 0; x != channels; x++)
 3994                 mlx5_free_bfreg(mdev, &priv->channel[x].bfreg);
 3995         for (x = 0; x != channels; x++)
 3996                 mlx5e_chan_static_destroy(&priv->channel[x]);
 3997         callout_drain(&priv->watchdog);
 3998         mtx_destroy(&priv->async_events_mtx);
 3999         sx_destroy(&priv->state_lock);
 4000 }
 4001 
 4002 static int
 4003 sysctl_firmware(SYSCTL_HANDLER_ARGS)
 4004 {
 4005         /*
 4006          * %d.%d%.d the string format.
 4007          * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
 4008          * We need at most 5 chars to store that.
 4009          * It also has: two "." and NULL at the end, which means we need 18
 4010          * (5*3 + 3) chars at most.
 4011          */
 4012         char fw[18];
 4013         struct mlx5e_priv *priv = arg1;
 4014         int error;
 4015 
 4016         snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
 4017             fw_rev_sub(priv->mdev));
 4018         error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
 4019         return (error);
 4020 }
 4021 
 4022 static void
 4023 mlx5e_disable_tx_dma(struct mlx5e_channel *ch)
 4024 {
 4025         int i;
 4026 
 4027         for (i = 0; i < ch->priv->num_tc; i++)
 4028                 mlx5e_drain_sq(&ch->sq[i]);
 4029 }
 4030 
 4031 static void
 4032 mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq)
 4033 {
 4034 
 4035         sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP);
 4036         sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8);
 4037         mlx5e_tx_notify_hw(sq, true);
 4038 }
 4039 
 4040 void
 4041 mlx5e_resume_sq(struct mlx5e_sq *sq)
 4042 {
 4043         int err;
 4044 
 4045         /* check if already enabled */
 4046         if (READ_ONCE(sq->running) != 0)
 4047                 return;
 4048 
 4049         err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR,
 4050             MLX5_SQC_STATE_RST);
 4051         if (err != 0) {
 4052                 mlx5_en_err(sq->ifp,
 4053                     "mlx5e_modify_sq() from ERR to RST failed: %d\n", err);
 4054         }
 4055 
 4056         sq->cc = 0;
 4057         sq->pc = 0;
 4058 
 4059         /* reset doorbell prior to moving from RST to RDY */
 4060         mlx5e_reset_sq_doorbell_record(sq);
 4061 
 4062         err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST,
 4063             MLX5_SQC_STATE_RDY);
 4064         if (err != 0) {
 4065                 mlx5_en_err(sq->ifp,
 4066                     "mlx5e_modify_sq() from RST to RDY failed: %d\n", err);
 4067         }
 4068 
 4069         sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
 4070         WRITE_ONCE(sq->running, 1);
 4071 }
 4072 
 4073 static void
 4074 mlx5e_enable_tx_dma(struct mlx5e_channel *ch)
 4075 {
 4076         int i;
 4077 
 4078         for (i = 0; i < ch->priv->num_tc; i++)
 4079                 mlx5e_resume_sq(&ch->sq[i]);
 4080 }
 4081 
 4082 static void
 4083 mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
 4084 {
 4085         struct mlx5e_rq *rq = &ch->rq;
 4086         struct epoch_tracker et;
 4087         int err;
 4088 
 4089         mtx_lock(&rq->mtx);
 4090         rq->enabled = 0;
 4091         callout_stop(&rq->watchdog);
 4092         mtx_unlock(&rq->mtx);
 4093 
 4094         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
 4095         if (err != 0) {
 4096                 mlx5_en_err(rq->ifp,
 4097                     "mlx5e_modify_rq() from RDY to RST failed: %d\n", err);
 4098         }
 4099 
 4100         while (!mlx5_wq_ll_is_empty(&rq->wq)) {
 4101                 msleep(1);
 4102                 NET_EPOCH_ENTER(et);
 4103                 rq->cq.mcq.comp(&rq->cq.mcq, NULL);
 4104                 NET_EPOCH_EXIT(et);
 4105         }
 4106 
 4107         /*
 4108          * Transitioning into RST state will allow the FW to track less ERR state queues,
 4109          * thus reducing the recv queue flushing time
 4110          */
 4111         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST);
 4112         if (err != 0) {
 4113                 mlx5_en_err(rq->ifp,
 4114                     "mlx5e_modify_rq() from ERR to RST failed: %d\n", err);
 4115         }
 4116 }
 4117 
 4118 static void
 4119 mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
 4120 {
 4121         struct mlx5e_rq *rq = &ch->rq;
 4122         struct epoch_tracker et;
 4123         int err;
 4124 
 4125         rq->wq.wqe_ctr = 0;
 4126         mlx5_wq_ll_update_db_record(&rq->wq);
 4127         err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
 4128         if (err != 0) {
 4129                 mlx5_en_err(rq->ifp,
 4130                     "mlx5e_modify_rq() from RST to RDY failed: %d\n", err);
 4131         }
 4132 
 4133         rq->enabled = 1;
 4134 
 4135         NET_EPOCH_ENTER(et);
 4136         rq->cq.mcq.comp(&rq->cq.mcq, NULL);
 4137         NET_EPOCH_EXIT(et);
 4138 }
 4139 
 4140 void
 4141 mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value)
 4142 {
 4143         int i;
 4144 
 4145         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
 4146                 return;
 4147 
 4148         for (i = 0; i < priv->params.num_channels; i++) {
 4149                 if (value)
 4150                         mlx5e_disable_tx_dma(&priv->channel[i]);
 4151                 else
 4152                         mlx5e_enable_tx_dma(&priv->channel[i]);
 4153         }
 4154 }
 4155 
 4156 void
 4157 mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value)
 4158 {
 4159         int i;
 4160 
 4161         if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
 4162                 return;
 4163 
 4164         for (i = 0; i < priv->params.num_channels; i++) {
 4165                 if (value)
 4166                         mlx5e_disable_rx_dma(&priv->channel[i]);
 4167                 else
 4168                         mlx5e_enable_rx_dma(&priv->channel[i]);
 4169         }
 4170 }
 4171 
 4172 static void
 4173 mlx5e_add_hw_stats(struct mlx5e_priv *priv)
 4174 {
 4175         SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
 4176             OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
 4177             priv, 0, sysctl_firmware, "A", "HCA firmware version");
 4178 
 4179         SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
 4180             OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
 4181             "Board ID");
 4182 }
 4183 
 4184 static int
 4185 mlx5e_sysctl_tx_priority_flow_control(SYSCTL_HANDLER_ARGS)
 4186 {
 4187         struct mlx5e_priv *priv = arg1;
 4188         uint8_t temp[MLX5E_MAX_PRIORITY];
 4189         uint32_t tx_pfc;
 4190         int err;
 4191         int i;
 4192 
 4193         PRIV_LOCK(priv);
 4194 
 4195         tx_pfc = priv->params.tx_priority_flow_control;
 4196 
 4197         for (i = 0; i != MLX5E_MAX_PRIORITY; i++)
 4198                 temp[i] = (tx_pfc >> i) & 1;
 4199 
 4200         err = SYSCTL_OUT(req, temp, MLX5E_MAX_PRIORITY);
 4201         if (err || !req->newptr)
 4202                 goto done;
 4203         err = SYSCTL_IN(req, temp, MLX5E_MAX_PRIORITY);
 4204         if (err)
 4205                 goto done;
 4206 
 4207         priv->params.tx_priority_flow_control = 0;
 4208 
 4209         /* range check input value */
 4210         for (i = 0; i != MLX5E_MAX_PRIORITY; i++) {
 4211                 if (temp[i] > 1) {
 4212                         err = ERANGE;
 4213                         goto done;
 4214                 }
 4215                 priv->params.tx_priority_flow_control |= (temp[i] << i);
 4216         }
 4217 
 4218         /* check if update is required */
 4219         if (tx_pfc != priv->params.tx_priority_flow_control)
 4220                 err = -mlx5e_set_port_pfc(priv);
 4221 done:
 4222         if (err != 0)
 4223                 priv->params.tx_priority_flow_control= tx_pfc;
 4224         PRIV_UNLOCK(priv);
 4225 
 4226         return (err);
 4227 }
 4228 
 4229 static int
 4230 mlx5e_sysctl_rx_priority_flow_control(SYSCTL_HANDLER_ARGS)
 4231 {
 4232         struct mlx5e_priv *priv = arg1;
 4233         uint8_t temp[MLX5E_MAX_PRIORITY];
 4234         uint32_t rx_pfc;
 4235         int err;
 4236         int i;
 4237 
 4238         PRIV_LOCK(priv);
 4239 
 4240         rx_pfc = priv->params.rx_priority_flow_control;
 4241 
 4242         for (i = 0; i != MLX5E_MAX_PRIORITY; i++)
 4243                 temp[i] = (rx_pfc >> i) & 1;
 4244 
 4245         err = SYSCTL_OUT(req, temp, MLX5E_MAX_PRIORITY);
 4246         if (err || !req->newptr)
 4247                 goto done;
 4248         err = SYSCTL_IN(req, temp, MLX5E_MAX_PRIORITY);
 4249         if (err)
 4250                 goto done;
 4251 
 4252         priv->params.rx_priority_flow_control = 0;
 4253 
 4254         /* range check input value */
 4255         for (i = 0; i != MLX5E_MAX_PRIORITY; i++) {
 4256                 if (temp[i] > 1) {
 4257                         err = ERANGE;
 4258                         goto done;
 4259                 }
 4260                 priv->params.rx_priority_flow_control |= (temp[i] << i);
 4261         }
 4262 
 4263         /* check if update is required */
 4264         if (rx_pfc != priv->params.rx_priority_flow_control) {
 4265                 err = -mlx5e_set_port_pfc(priv);
 4266                 if (err == 0 && priv->sw_is_port_buf_owner)
 4267                         err = mlx5e_update_buf_lossy(priv);
 4268         }
 4269 done:
 4270         if (err != 0)
 4271                 priv->params.rx_priority_flow_control= rx_pfc;
 4272         PRIV_UNLOCK(priv);
 4273 
 4274         return (err);
 4275 }
 4276 
 4277 static void
 4278 mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
 4279 {
 4280         int error;
 4281 
 4282         /* enable pauseframes by default */
 4283         priv->params.tx_pauseframe_control = 1;
 4284         priv->params.rx_pauseframe_control = 1;
 4285 
 4286         /* disable ports flow control, PFC, by default */
 4287         priv->params.tx_priority_flow_control = 0;
 4288         priv->params.rx_priority_flow_control = 0;
 4289 
 4290         /* register pauseframe SYSCTLs */
 4291         SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 4292             OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
 4293             &priv->params.tx_pauseframe_control, 0,
 4294             "Set to enable TX pause frames. Clear to disable.");
 4295 
 4296         SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 4297             OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
 4298             &priv->params.rx_pauseframe_control, 0,
 4299             "Set to enable RX pause frames. Clear to disable.");
 4300 
 4301         /* register priority flow control, PFC, SYSCTLs */
 4302         SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 4303             OID_AUTO, "tx_priority_flow_control", CTLTYPE_U8 | CTLFLAG_RWTUN |
 4304             CTLFLAG_MPSAFE, priv, 0, &mlx5e_sysctl_tx_priority_flow_control, "CU",
 4305             "Set to enable TX ports flow control frames for priorities 0..7. Clear to disable.");
 4306 
 4307         SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 4308             OID_AUTO, "rx_priority_flow_control", CTLTYPE_U8 | CTLFLAG_RWTUN |
 4309             CTLFLAG_MPSAFE, priv, 0, &mlx5e_sysctl_rx_priority_flow_control, "CU",
 4310             "Set to enable RX ports flow control frames for priorities 0..7. Clear to disable.");
 4311 
 4312         PRIV_LOCK(priv);
 4313 
 4314         /* range check */
 4315         priv->params.tx_pauseframe_control =
 4316             priv->params.tx_pauseframe_control ? 1 : 0;
 4317         priv->params.rx_pauseframe_control =
 4318             priv->params.rx_pauseframe_control ? 1 : 0;
 4319 
 4320         /* update firmware */
 4321         error = mlx5e_set_port_pause_and_pfc(priv);
 4322         if (error == -EINVAL) {
 4323                 mlx5_en_err(priv->ifp,
 4324                     "Global pauseframes must be disabled before enabling PFC.\n");
 4325                 priv->params.rx_priority_flow_control = 0;
 4326                 priv->params.tx_priority_flow_control = 0;
 4327 
 4328                 /* update firmware */
 4329                 (void) mlx5e_set_port_pause_and_pfc(priv);
 4330         }
 4331         PRIV_UNLOCK(priv);
 4332 }
 4333 
 4334 static int
 4335 mlx5e_ul_snd_tag_alloc(struct ifnet *ifp,
 4336     union if_snd_tag_alloc_params *params,
 4337     struct m_snd_tag **ppmt)
 4338 {
 4339         struct mlx5e_priv *priv;
 4340         struct mlx5e_channel *pch;
 4341 
 4342         priv = ifp->if_softc;
 4343 
 4344         if (unlikely(priv->gone || params->hdr.flowtype == M_HASHTYPE_NONE)) {
 4345                 return (EOPNOTSUPP);
 4346         } else {
 4347                 /* keep this code synced with mlx5e_select_queue() */
 4348                 u32 ch = priv->params.num_channels;
 4349 #ifdef RSS
 4350                 u32 temp;
 4351 
 4352                 if (rss_hash2bucket(params->hdr.flowid,
 4353                     params->hdr.flowtype, &temp) == 0)
 4354                         ch = temp % ch;
 4355                 else
 4356 #endif
 4357                         ch = (params->hdr.flowid % 128) % ch;
 4358 
 4359                 /*
 4360                  * NOTE: The channels array is only freed at detach
 4361                  * and it safe to return a pointer to the send tag
 4362                  * inside the channels structure as long as we
 4363                  * reference the priv.
 4364                  */
 4365                 pch = priv->channel + ch;
 4366 
 4367                 /* check if send queue is not running */
 4368                 if (unlikely(pch->sq[0].running == 0))
 4369                         return (ENXIO);
 4370                 m_snd_tag_ref(&pch->tag);
 4371                 *ppmt = &pch->tag;
 4372                 return (0);
 4373         }
 4374 }
 4375 
 4376 static int
 4377 mlx5e_ul_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params)
 4378 {
 4379         struct mlx5e_channel *pch =
 4380             container_of(pmt, struct mlx5e_channel, tag);
 4381 
 4382         params->unlimited.max_rate = -1ULL;
 4383         params->unlimited.queue_level = mlx5e_sq_queue_level(&pch->sq[0]);
 4384         return (0);
 4385 }
 4386 
 4387 static void
 4388 mlx5e_ul_snd_tag_free(struct m_snd_tag *pmt)
 4389 {
 4390         struct mlx5e_channel *pch =
 4391             container_of(pmt, struct mlx5e_channel, tag);
 4392 
 4393         complete(&pch->completion);
 4394 }
 4395 
 4396 static int
 4397 mlx5e_snd_tag_alloc(struct ifnet *ifp,
 4398     union if_snd_tag_alloc_params *params,
 4399     struct m_snd_tag **ppmt)
 4400 {
 4401 
 4402         switch (params->hdr.type) {
 4403 #ifdef RATELIMIT
 4404         case IF_SND_TAG_TYPE_RATE_LIMIT:
 4405                 return (mlx5e_rl_snd_tag_alloc(ifp, params, ppmt));
 4406 #ifdef KERN_TLS
 4407         case IF_SND_TAG_TYPE_TLS_RATE_LIMIT:
 4408                 return (mlx5e_tls_snd_tag_alloc(ifp, params, ppmt));
 4409 #endif
 4410 #endif
 4411         case IF_SND_TAG_TYPE_UNLIMITED:
 4412                 return (mlx5e_ul_snd_tag_alloc(ifp, params, ppmt));
 4413 #ifdef KERN_TLS
 4414         case IF_SND_TAG_TYPE_TLS:
 4415                 return (mlx5e_tls_snd_tag_alloc(ifp, params, ppmt));
 4416         case IF_SND_TAG_TYPE_TLS_RX:
 4417                 return (mlx5e_tls_rx_snd_tag_alloc(ifp, params, ppmt));
 4418 #endif
 4419         default:
 4420                 return (EOPNOTSUPP);
 4421         }
 4422 }
 4423 
 4424 #ifdef RATELIMIT
 4425 #define NUM_HDWR_RATES_MLX 13
 4426 static const uint64_t adapter_rates_mlx[NUM_HDWR_RATES_MLX] = {
 4427         135375,                 /* 1,083,000 */
 4428         180500,                 /* 1,444,000 */
 4429         270750,                 /* 2,166,000 */
 4430         361000,                 /* 2,888,000 */
 4431         541500,                 /* 4,332,000 */
 4432         721875,                 /* 5,775,000 */
 4433         1082875,                /* 8,663,000 */
 4434         1443875,                /* 11,551,000 */
 4435         2165750,                /* 17,326,000 */
 4436         2887750,                /* 23,102,000 */
 4437         4331625,                /* 34,653,000 */
 4438         5775500,                /* 46,204,000 */
 4439         8663125                 /* 69,305,000 */
 4440 };
 4441 
 4442 static void
 4443 mlx5e_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q)
 4444 {
 4445         /*
 4446          * This function needs updating by the driver maintainer!
 4447          * For the MLX card there are currently (ConectX-4?) 13 
 4448          * pre-set rates and others i.e. ConnectX-5, 6, 7??
 4449          *
 4450          * This will change based on later adapters
 4451          * and this code should be updated to look at ifp
 4452          * and figure out the specific adapter type
 4453          * settings i.e. how many rates as well
 4454          * as if they are fixed (as is shown here) or
 4455          * if they are dynamic (example chelsio t4). Also if there
 4456          * is a maximum number of flows that the adapter
 4457          * can handle that too needs to be updated in
 4458          * the max_flows field.
 4459          */
 4460         q->rate_table = adapter_rates_mlx;
 4461         q->flags = RT_IS_FIXED_TABLE;
 4462         q->max_flows = 0;       /* mlx has no limit */
 4463         q->number_of_rates = NUM_HDWR_RATES_MLX;
 4464         q->min_segment_burst = 1;
 4465 }
 4466 #endif
 4467 
 4468 static void
 4469 mlx5e_ifm_add(struct mlx5e_priv *priv, int type)
 4470 {
 4471         ifmedia_add(&priv->media, type | IFM_ETHER, 0, NULL);
 4472         ifmedia_add(&priv->media, type | IFM_ETHER |
 4473             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
 4474         ifmedia_add(&priv->media, type | IFM_ETHER | IFM_ETH_RXPAUSE, 0, NULL);
 4475         ifmedia_add(&priv->media, type | IFM_ETHER | IFM_ETH_TXPAUSE, 0, NULL);
 4476         ifmedia_add(&priv->media, type | IFM_ETHER | IFM_FDX, 0, NULL);
 4477         ifmedia_add(&priv->media, type | IFM_ETHER | IFM_FDX |
 4478             IFM_ETH_RXPAUSE, 0, NULL);
 4479         ifmedia_add(&priv->media, type | IFM_ETHER | IFM_FDX |
 4480             IFM_ETH_TXPAUSE, 0, NULL);
 4481         ifmedia_add(&priv->media, type | IFM_ETHER | IFM_FDX |
 4482             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
 4483 }
 4484 
 4485 static void *
 4486 mlx5e_create_ifp(struct mlx5_core_dev *mdev)
 4487 {
 4488         struct ifnet *ifp;
 4489         struct mlx5e_priv *priv;
 4490         u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
 4491         struct sysctl_oid_list *child;
 4492         int ncv = mdev->priv.eq_table.num_comp_vectors;
 4493         char unit[16];
 4494         struct pfil_head_args pa;
 4495         int err;
 4496         u32 eth_proto_cap;
 4497         u32 out[MLX5_ST_SZ_DW(ptys_reg)];
 4498         bool ext;
 4499         struct media media_entry = {};
 4500 
 4501         if (mlx5e_check_required_hca_cap(mdev)) {
 4502                 mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
 4503                 return (NULL);
 4504         }
 4505 
 4506         /*
 4507          * Try to allocate the priv and make room for worst-case
 4508          * number of channel structures:
 4509          */
 4510         priv = malloc_domainset(sizeof(*priv) +
 4511             (sizeof(priv->channel[0]) * mdev->priv.eq_table.num_comp_vectors),
 4512             M_MLX5EN, mlx5_dev_domainset(mdev), M_WAITOK | M_ZERO);
 4513 
 4514         ifp = priv->ifp = if_alloc_dev(IFT_ETHER, mdev->pdev->dev.bsddev);
 4515         if (ifp == NULL) {
 4516                 mlx5_core_err(mdev, "if_alloc() failed\n");
 4517                 goto err_free_priv;
 4518         }
 4519         /* setup all static fields */
 4520         if (mlx5e_priv_static_init(priv, mdev, mdev->priv.eq_table.num_comp_vectors)) {
 4521                 mlx5_core_err(mdev, "mlx5e_priv_static_init() failed\n");
 4522                 goto err_free_ifp;
 4523         }
 4524 
 4525         ifp->if_softc = priv;
 4526         if_initname(ifp, "mce", device_get_unit(mdev->pdev->dev.bsddev));
 4527         ifp->if_mtu = ETHERMTU;
 4528         ifp->if_init = mlx5e_open;
 4529         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
 4530             IFF_KNOWSEPOCH;
 4531         ifp->if_ioctl = mlx5e_ioctl;
 4532         ifp->if_transmit = mlx5e_xmit;
 4533         ifp->if_qflush = if_qflush;
 4534         ifp->if_get_counter = mlx5e_get_counter;
 4535         ifp->if_snd.ifq_maxlen = ifqmaxlen;
 4536         /*
 4537          * Set driver features
 4538          */
 4539         ifp->if_capabilities |= IFCAP_NV;
 4540         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
 4541         ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
 4542         ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
 4543         ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
 4544         ifp->if_capabilities |= IFCAP_LRO;
 4545         ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
 4546         ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP;
 4547         ifp->if_capabilities |= IFCAP_MEXTPG;
 4548         ifp->if_capabilities |= IFCAP_TXTLS4 | IFCAP_TXTLS6;
 4549 #ifdef RATELIMIT
 4550         ifp->if_capabilities |= IFCAP_TXRTLMT | IFCAP_TXTLS_RTLMT;
 4551 #endif
 4552         ifp->if_capabilities |= IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO;
 4553         ifp->if_capabilities2 |= IFCAP2_BIT(IFCAP2_RXTLS4) |
 4554             IFCAP2_BIT(IFCAP2_RXTLS6);
 4555         ifp->if_snd_tag_alloc = mlx5e_snd_tag_alloc;
 4556 #ifdef RATELIMIT
 4557         ifp->if_ratelimit_query = mlx5e_ratelimit_query;
 4558 #endif
 4559         /* set TSO limits so that we don't have to drop TX packets */
 4560         ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
 4561         ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
 4562         ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
 4563 
 4564         ifp->if_capenable = ifp->if_capabilities;
 4565         ifp->if_capenable2 = ifp->if_capabilities2;
 4566         ifp->if_hwassist = 0;
 4567         if (ifp->if_capenable & IFCAP_TSO)
 4568                 ifp->if_hwassist |= CSUM_TSO;
 4569         if (ifp->if_capenable & IFCAP_TXCSUM)
 4570                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
 4571         if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
 4572                 ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
 4573         if (ifp->if_capabilities & IFCAP_VXLAN_HWCSUM)
 4574                 ifp->if_hwassist |= CSUM_INNER_IP6_UDP | CSUM_INNER_IP6_TCP |
 4575                     CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP |
 4576                     CSUM_ENCAP_VXLAN;
 4577         if (ifp->if_capabilities  & IFCAP_VXLAN_HWTSO)
 4578                 ifp->if_hwassist |= CSUM_INNER_IP6_TSO | CSUM_INNER_IP_TSO;
 4579 
 4580         /* ifnet sysctl tree */
 4581         sysctl_ctx_init(&priv->sysctl_ctx);
 4582         priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
 4583             OID_AUTO, ifp->if_dname, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
 4584             "MLX5 ethernet - interface name");
 4585         if (priv->sysctl_ifnet == NULL) {
 4586                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
 4587                 goto err_free_sysctl;
 4588         }
 4589         snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
 4590         priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 4591             OID_AUTO, unit, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
 4592             "MLX5 ethernet - interface unit");
 4593         if (priv->sysctl_ifnet == NULL) {
 4594                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
 4595                 goto err_free_sysctl;
 4596         }
 4597 
 4598         /* HW sysctl tree */
 4599         child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
 4600         priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
 4601             OID_AUTO, "hw", CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
 4602             "MLX5 ethernet dev hw");
 4603         if (priv->sysctl_hw == NULL) {
 4604                 mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
 4605                 goto err_free_sysctl;
 4606         }
 4607 
 4608         err = mlx5e_build_ifp_priv(mdev, priv, ncv);
 4609         if (err) {
 4610                 mlx5_core_err(mdev, "mlx5e_build_ifp_priv() failed (%d)\n", err);
 4611                 goto err_free_sysctl;
 4612         }
 4613 
 4614         /* reuse mlx5core's watchdog workqueue */
 4615         priv->wq = mdev->priv.health.wq_watchdog;
 4616 
 4617         err = mlx5_core_alloc_pd(mdev, &priv->pdn, 0);
 4618         if (err) {
 4619                 mlx5_en_err(ifp, "mlx5_core_alloc_pd failed, %d\n", err);
 4620                 goto err_free_wq;
 4621         }
 4622         err = mlx5_alloc_transport_domain(mdev, &priv->tdn, 0);
 4623         if (err) {
 4624                 mlx5_en_err(ifp,
 4625                     "mlx5_alloc_transport_domain failed, %d\n", err);
 4626                 goto err_dealloc_pd;
 4627         }
 4628         err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
 4629         if (err) {
 4630                 mlx5_en_err(ifp, "mlx5e_create_mkey failed, %d\n", err);
 4631                 goto err_dealloc_transport_domain;
 4632         }
 4633         mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
 4634 
 4635         /* check if we should generate a random MAC address */
 4636         if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
 4637             is_zero_ether_addr(dev_addr)) {
 4638                 random_ether_addr(dev_addr);
 4639                 mlx5_en_err(ifp, "Assigned random MAC address\n");
 4640         }
 4641 
 4642         err = mlx5e_rl_init(priv);
 4643         if (err) {
 4644                 mlx5_en_err(ifp, "mlx5e_rl_init failed, %d\n", err);
 4645                 goto err_create_mkey;
 4646         }
 4647 
 4648         err = mlx5e_tls_init(priv);
 4649         if (err) {
 4650                 if_printf(ifp, "%s: mlx5e_tls_init failed\n", __func__);
 4651                 goto err_rl_init;
 4652         }
 4653 
 4654         err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
 4655         if (err) {
 4656                 if_printf(ifp, "%s: mlx5e_open_drop_rq failed (%d)\n", __func__, err);
 4657                 goto err_tls_init;
 4658         }
 4659 
 4660         err = mlx5e_open_rqts(priv);
 4661         if (err) {
 4662                 if_printf(ifp, "%s: mlx5e_open_rqts failed (%d)\n", __func__, err);
 4663                 goto err_open_drop_rq;
 4664         }
 4665 
 4666         err = mlx5e_open_tirs(priv);
 4667         if (err) {
 4668                 mlx5_en_err(ifp, "mlx5e_open_tirs() failed, %d\n", err);
 4669                 goto err_open_rqts;
 4670         }
 4671 
 4672         err = mlx5e_open_flow_tables(priv);
 4673         if (err) {
 4674                 if_printf(ifp, "%s: mlx5e_open_flow_tables failed (%d)\n", __func__, err);
 4675                 goto err_open_tirs;
 4676         }
 4677 
 4678         err = mlx5e_tls_rx_init(priv);
 4679         if (err) {
 4680                 if_printf(ifp, "%s: mlx5e_tls_rx_init() failed, %d\n", __func__, err);
 4681                 goto err_open_flow_tables;
 4682         }
 4683 
 4684         /* set default MTU */
 4685         mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
 4686 
 4687         /* Set default media status */
 4688         priv->media_status_last = IFM_AVALID;
 4689         priv->media_active_last = IFM_ETHER | IFM_AUTO | IFM_FDX;
 4690 
 4691         /* setup default pauseframes configuration */
 4692         mlx5e_setup_pauseframes(priv);
 4693 
 4694         /* Setup supported medias */
 4695         if (!mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1)) {
 4696                 ext = MLX5_CAP_PCAM_FEATURE(mdev,
 4697                     ptys_extended_ethernet);
 4698                 eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
 4699                     eth_proto_capability);
 4700         } else {
 4701                 ext = false;
 4702                 eth_proto_cap = 0;
 4703                 mlx5_en_err(ifp, "Query port media capability failed, %d\n", err);
 4704         }
 4705 
 4706         ifmedia_init(&priv->media, IFM_IMASK,
 4707             mlx5e_media_change, mlx5e_media_status);
 4708 
 4709         if (ext) {
 4710                 for (unsigned i = 0; i != MLX5E_EXT_LINK_SPEEDS_NUMBER; i++) {
 4711                         /* check if hardware has the right capability */
 4712                         if (MLX5E_PROT_MASK(i) & ~eth_proto_cap)
 4713                                 continue;
 4714                         for (unsigned j = 0; j != MLX5E_CABLE_TYPE_NUMBER; j++) {
 4715                                 media_entry = mlx5e_ext_mode_table[i][j];
 4716                                 if (media_entry.subtype == 0)
 4717                                         continue;
 4718                                 /* check if this subtype was already added */
 4719                                 for (unsigned k = 0; k != i; k++) {
 4720                                         /* check if hardware has the right capability */
 4721                                         if (MLX5E_PROT_MASK(k) & ~eth_proto_cap)
 4722                                                 continue;
 4723                                         for (unsigned m = 0; m != MLX5E_CABLE_TYPE_NUMBER; m++) {
 4724                                                 if (media_entry.subtype == mlx5e_ext_mode_table[k][m].subtype)
 4725                                                         goto skip_ext_media;
 4726                                         }
 4727                                 }
 4728                                 mlx5e_ifm_add(priv, media_entry.subtype);
 4729                         skip_ext_media:;
 4730                         }
 4731                 }
 4732         } else {
 4733                 for (unsigned i = 0; i != MLX5E_LINK_SPEEDS_NUMBER; i++) {
 4734                         media_entry = mlx5e_mode_table[i];
 4735                         if (media_entry.subtype == 0)
 4736                                 continue;
 4737                         if (MLX5E_PROT_MASK(i) & ~eth_proto_cap)
 4738                                 continue;
 4739                         /* check if this subtype was already added */
 4740                         for (unsigned k = 0; k != i; k++) {
 4741                                 if (media_entry.subtype == mlx5e_mode_table[k].subtype)
 4742                                         goto skip_media;
 4743                         }
 4744                         mlx5e_ifm_add(priv, media_entry.subtype);
 4745 
 4746                         /* NOTE: 10G ER and LR shares the same entry */
 4747                         if (media_entry.subtype == IFM_10G_ER)
 4748                                 mlx5e_ifm_add(priv, IFM_10G_LR);
 4749                 skip_media:;
 4750                 }
 4751         }
 4752 
 4753         mlx5e_ifm_add(priv, IFM_AUTO);
 4754 
 4755         /* Set autoselect by default */
 4756         ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
 4757             IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
 4758 
 4759         DEBUGNET_SET(ifp, mlx5_en);
 4760 
 4761         ether_ifattach(ifp, dev_addr);
 4762 
 4763         /* Register for VLAN events */
 4764         priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 4765             mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
 4766         priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 4767             mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
 4768 
 4769         /* Register for VxLAN events */
 4770         priv->vxlan_start = EVENTHANDLER_REGISTER(vxlan_start,
 4771             mlx5e_vxlan_start, priv, EVENTHANDLER_PRI_ANY);
 4772         priv->vxlan_stop = EVENTHANDLER_REGISTER(vxlan_stop,
 4773             mlx5e_vxlan_stop, priv, EVENTHANDLER_PRI_ANY);
 4774 
 4775         /* Link is down by default */
 4776         if_link_state_change(ifp, LINK_STATE_DOWN);
 4777 
 4778         mlx5e_enable_async_events(priv);
 4779 
 4780         mlx5e_add_hw_stats(priv);
 4781 
 4782         mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 4783             "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
 4784             priv->stats.vport.arg);
 4785 
 4786         mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 4787             "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
 4788             priv->stats.pport.arg);
 4789 
 4790         mlx5e_create_ethtool(priv);
 4791 
 4792         mtx_lock(&priv->async_events_mtx);
 4793         mlx5e_update_stats(priv);
 4794         mtx_unlock(&priv->async_events_mtx);
 4795 
 4796         SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 4797             OID_AUTO, "rx_clbr_done", CTLFLAG_RD,
 4798             &priv->clbr_done, 0,
 4799             "RX timestamps calibration state");
 4800         callout_init(&priv->tstmp_clbr, 1);
 4801         /* Pull out the frequency of the clock in hz */
 4802         priv->cclk = (uint64_t)MLX5_CAP_GEN(mdev, device_frequency_khz) * 1000ULL;
 4803         mlx5e_reset_calibration_callout(priv);
 4804 
 4805         pa.pa_version = PFIL_VERSION;
 4806         pa.pa_flags = PFIL_IN;
 4807         pa.pa_type = PFIL_TYPE_ETHERNET;
 4808         pa.pa_headname = ifp->if_xname;
 4809         priv->pfil = pfil_head_register(&pa);
 4810 
 4811         PRIV_LOCK(priv);
 4812         err = mlx5e_open_flow_rules(priv);
 4813         if (err) {
 4814                 mlx5_en_err(ifp,
 4815                     "mlx5e_open_flow_rules() failed, %d (ignored)\n", err);
 4816         }
 4817         PRIV_UNLOCK(priv);
 4818 
 4819         return (priv);
 4820 
 4821 err_open_flow_tables:
 4822         mlx5e_close_flow_tables(priv);
 4823 
 4824 err_open_tirs:
 4825         mlx5e_close_tirs(priv);
 4826 
 4827 err_open_rqts:
 4828         mlx5e_close_rqts(priv);
 4829 
 4830 err_open_drop_rq:
 4831         mlx5e_close_drop_rq(&priv->drop_rq);
 4832 
 4833 err_tls_init:
 4834         mlx5e_tls_cleanup(priv);
 4835 
 4836 err_rl_init:
 4837         mlx5e_rl_cleanup(priv);
 4838 
 4839 err_create_mkey:
 4840         mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
 4841 
 4842 err_dealloc_transport_domain:
 4843         mlx5_dealloc_transport_domain(mdev, priv->tdn, 0);
 4844 
 4845 err_dealloc_pd:
 4846         mlx5_core_dealloc_pd(mdev, priv->pdn, 0);
 4847 
 4848 err_free_wq:
 4849         flush_workqueue(priv->wq);
 4850 
 4851 err_free_sysctl:
 4852         sysctl_ctx_free(&priv->sysctl_ctx);
 4853         if (priv->sysctl_debug)
 4854                 sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
 4855         mlx5e_priv_static_destroy(priv, mdev, mdev->priv.eq_table.num_comp_vectors);
 4856 
 4857 err_free_ifp:
 4858         if_free(ifp);
 4859 
 4860 err_free_priv:
 4861         free(priv, M_MLX5EN);
 4862         return (NULL);
 4863 }
 4864 
 4865 static void
 4866 mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
 4867 {
 4868         struct mlx5e_priv *priv = vpriv;
 4869         struct ifnet *ifp = priv->ifp;
 4870 
 4871         /* don't allow more IOCTLs */
 4872         priv->gone = 1;
 4873 
 4874         /* XXX wait a bit to allow IOCTL handlers to complete */
 4875         pause("W", hz);
 4876 
 4877 #ifdef RATELIMIT
 4878         /*
 4879          * The kernel can have reference(s) via the m_snd_tag's into
 4880          * the ratelimit channels, and these must go away before
 4881          * detaching:
 4882          */
 4883         while (READ_ONCE(priv->rl.stats.tx_active_connections) != 0) {
 4884                 mlx5_en_err(priv->ifp,
 4885                     "Waiting for all ratelimit connections to terminate\n");
 4886                 pause("W", hz);
 4887         }
 4888 #endif
 4889 
 4890 #ifdef KERN_TLS
 4891         /* wait for all TLS tags to get freed */
 4892         while (priv->tls.init != 0 &&
 4893             uma_zone_get_cur(priv->tls.zone) != 0)  {
 4894                 mlx5_en_err(priv->ifp,
 4895                     "Waiting for all TLS connections to terminate\n");
 4896                 pause("W", hz);
 4897         }
 4898 
 4899         /* wait for all TLS RX tags to get freed */
 4900         while (priv->tls_rx.init != 0 &&
 4901             uma_zone_get_cur(priv->tls_rx.zone) != 0)  {
 4902                 mlx5_en_err(priv->ifp,
 4903                     "Waiting for all TLS RX connections to terminate\n");
 4904                 pause("W", hz);
 4905         }
 4906 #endif
 4907         /* wait for all unlimited send tags to complete */
 4908         mlx5e_priv_wait_for_completion(priv, mdev->priv.eq_table.num_comp_vectors);
 4909 
 4910         /* stop watchdog timer */
 4911         callout_drain(&priv->watchdog);
 4912 
 4913         callout_drain(&priv->tstmp_clbr);
 4914 
 4915         if (priv->vlan_attach != NULL)
 4916                 EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
 4917         if (priv->vlan_detach != NULL)
 4918                 EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
 4919         if (priv->vxlan_start != NULL)
 4920                 EVENTHANDLER_DEREGISTER(vxlan_start, priv->vxlan_start);
 4921         if (priv->vxlan_stop != NULL)
 4922                 EVENTHANDLER_DEREGISTER(vxlan_stop, priv->vxlan_stop);
 4923 
 4924         /* make sure device gets closed */
 4925         PRIV_LOCK(priv);
 4926         mlx5e_close_locked(ifp);
 4927         mlx5e_close_flow_rules(priv);
 4928         PRIV_UNLOCK(priv);
 4929 
 4930         /* deregister pfil */
 4931         if (priv->pfil != NULL) {
 4932                 pfil_head_unregister(priv->pfil);
 4933                 priv->pfil = NULL;
 4934         }
 4935 
 4936         /* unregister device */
 4937         ifmedia_removeall(&priv->media);
 4938         ether_ifdetach(ifp);
 4939 
 4940         mlx5e_tls_rx_cleanup(priv);
 4941         mlx5e_close_flow_tables(priv);
 4942         mlx5e_close_tirs(priv);
 4943         mlx5e_close_rqts(priv);
 4944         mlx5e_close_drop_rq(&priv->drop_rq);
 4945         mlx5e_tls_cleanup(priv);
 4946         mlx5e_rl_cleanup(priv);
 4947 
 4948         /* destroy all remaining sysctl nodes */
 4949         sysctl_ctx_free(&priv->stats.vport.ctx);
 4950         sysctl_ctx_free(&priv->stats.pport.ctx);
 4951         if (priv->sysctl_debug)
 4952                 sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
 4953         sysctl_ctx_free(&priv->sysctl_ctx);
 4954 
 4955         mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
 4956         mlx5_dealloc_transport_domain(priv->mdev, priv->tdn, 0);
 4957         mlx5_core_dealloc_pd(priv->mdev, priv->pdn, 0);
 4958         mlx5e_disable_async_events(priv);
 4959         flush_workqueue(priv->wq);
 4960         mlx5e_priv_static_destroy(priv, mdev, mdev->priv.eq_table.num_comp_vectors);
 4961         if_free(ifp);
 4962         free(priv, M_MLX5EN);
 4963 }
 4964 
 4965 #ifdef DEBUGNET
 4966 static void
 4967 mlx5_en_debugnet_init(struct ifnet *dev, int *nrxr, int *ncl, int *clsize)
 4968 {
 4969         struct mlx5e_priv *priv = if_getsoftc(dev);
 4970 
 4971         PRIV_LOCK(priv);
 4972         *nrxr = priv->params.num_channels;
 4973         *ncl = DEBUGNET_MAX_IN_FLIGHT;
 4974         *clsize = MLX5E_MAX_RX_BYTES;
 4975         PRIV_UNLOCK(priv);
 4976 }
 4977 
 4978 static void
 4979 mlx5_en_debugnet_event(struct ifnet *dev, enum debugnet_ev event)
 4980 {
 4981 }
 4982 
 4983 static int
 4984 mlx5_en_debugnet_transmit(struct ifnet *dev, struct mbuf *m)
 4985 {
 4986         struct mlx5e_priv *priv = if_getsoftc(dev);
 4987         struct mlx5e_sq *sq;
 4988         int err;
 4989 
 4990         if ((if_getdrvflags(dev) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 4991             IFF_DRV_RUNNING || (priv->media_status_last & IFM_ACTIVE) == 0)
 4992                 return (ENOENT);
 4993 
 4994         sq = &priv->channel[0].sq[0];
 4995 
 4996         if (sq->running == 0) {
 4997                 m_freem(m);
 4998                 return (ENOENT);
 4999         }
 5000 
 5001         if (mlx5e_sq_xmit(sq, &m) != 0) {
 5002                 m_freem(m);
 5003                 err = ENOBUFS;
 5004         } else {
 5005                 err = 0;
 5006         }
 5007 
 5008         mlx5e_tx_notify_hw(sq, true);
 5009 
 5010         return (err);
 5011 }
 5012 
 5013 static int
 5014 mlx5_en_debugnet_poll(struct ifnet *dev, int count)
 5015 {
 5016         struct mlx5e_priv *priv = if_getsoftc(dev);
 5017 
 5018         if ((if_getdrvflags(dev) & IFF_DRV_RUNNING) == 0 ||
 5019             (priv->media_status_last & IFM_ACTIVE) == 0)
 5020                 return (ENOENT);
 5021 
 5022         mlx5_poll_interrupts(priv->mdev);
 5023 
 5024         return (0);
 5025 }
 5026 #endif /* DEBUGNET */
 5027 
 5028 static void *
 5029 mlx5e_get_ifp(void *vpriv)
 5030 {
 5031         struct mlx5e_priv *priv = vpriv;
 5032 
 5033         return (priv->ifp);
 5034 }
 5035 
 5036 static struct mlx5_interface mlx5e_interface = {
 5037         .add = mlx5e_create_ifp,
 5038         .remove = mlx5e_destroy_ifp,
 5039         .event = mlx5e_async_event,
 5040         .protocol = MLX5_INTERFACE_PROTOCOL_ETH,
 5041         .get_dev = mlx5e_get_ifp,
 5042 };
 5043 
 5044 void
 5045 mlx5e_init(void)
 5046 {
 5047         mlx5_register_interface(&mlx5e_interface);
 5048 }
 5049 
 5050 void
 5051 mlx5e_cleanup(void)
 5052 {
 5053         mlx5_unregister_interface(&mlx5e_interface);
 5054 }
 5055 
 5056 module_init_order(mlx5e_init, SI_ORDER_SIXTH);
 5057 module_exit_order(mlx5e_cleanup, SI_ORDER_SIXTH);
 5058 
 5059 MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
 5060 MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
 5061 MODULE_VERSION(mlx5en, 1);

Cache object: 404311be4fad8b5763d6ceb13771ebd4


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.