The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2013-2021, Mellanox Technologies, Ltd.  All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without
    5  * modification, are permitted provided that the following conditions
    6  * are met:
    7  * 1. Redistributions of source code must retain the above copyright
    8  *    notice, this list of conditions and the following disclaimer.
    9  * 2. Redistributions in binary form must reproduce the above copyright
   10  *    notice, this list of conditions and the following disclaimer in the
   11  *    documentation and/or other materials provided with the distribution.
   12  *
   13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
   14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
   17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   23  * SUCH DAMAGE.
   24  *
   25  * $FreeBSD$
   26  */
   27 
   28 #include "opt_rss.h"
   29 #include "opt_ratelimit.h"
   30 
   31 #include <linux/module.h>
   32 #include <linux/errno.h>
   33 #include <linux/pci.h>
   34 #include <linux/dma-mapping.h>
   35 #include <linux/slab.h>
   36 #if defined(CONFIG_X86)
   37 #include <asm/pat.h>
   38 #endif
   39 #include <linux/sched.h>
   40 #include <linux/delay.h>
   41 #include <linux/fs.h>
   42 #undef inode
   43 #include <rdma/ib_user_verbs.h>
   44 #include <rdma/ib_addr.h>
   45 #include <rdma/ib_cache.h>
   46 #include <dev/mlx5/port.h>
   47 #include <dev/mlx5/vport.h>
   48 #include <linux/list.h>
   49 #include <rdma/ib_smi.h>
   50 #include <rdma/ib_umem.h>
   51 #include <rdma/uverbs_ioctl.h>
   52 #include <linux/in.h>
   53 #include <linux/etherdevice.h>
   54 #include <dev/mlx5/fs.h>
   55 #include <dev/mlx5/mlx5_ib/mlx5_ib.h>
   56 
   57 MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
   58 MODULE_LICENSE("Dual BSD/GPL");
   59 MODULE_DEPEND(mlx5ib, linuxkpi, 1, 1, 1);
   60 MODULE_DEPEND(mlx5ib, mlx5, 1, 1, 1);
   61 MODULE_DEPEND(mlx5ib, ibcore, 1, 1, 1);
   62 MODULE_VERSION(mlx5ib, 1);
   63 
   64 enum {
   65         MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
   66 };
   67 
   68 static enum rdma_link_layer
   69 mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
   70 {
   71         switch (port_type_cap) {
   72         case MLX5_CAP_PORT_TYPE_IB:
   73                 return IB_LINK_LAYER_INFINIBAND;
   74         case MLX5_CAP_PORT_TYPE_ETH:
   75                 return IB_LINK_LAYER_ETHERNET;
   76         default:
   77                 return IB_LINK_LAYER_UNSPECIFIED;
   78         }
   79 }
   80 
   81 static enum rdma_link_layer
   82 mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
   83 {
   84         struct mlx5_ib_dev *dev = to_mdev(device);
   85         int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
   86 
   87         return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
   88 }
   89 
   90 static bool mlx5_netdev_match(struct ifnet *ndev,
   91                               struct mlx5_core_dev *mdev,
   92                               const char *dname)
   93 {
   94         return ndev->if_type == IFT_ETHER &&
   95           ndev->if_dname != NULL &&
   96           strcmp(ndev->if_dname, dname) == 0 &&
   97           ndev->if_softc != NULL &&
   98           *(struct mlx5_core_dev **)ndev->if_softc == mdev;
   99 }
  100 
  101 static int mlx5_netdev_event(struct notifier_block *this,
  102                              unsigned long event, void *ptr)
  103 {
  104         struct ifnet *ndev = netdev_notifier_info_to_ifp(ptr);
  105         struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
  106                                                  roce.nb);
  107 
  108         switch (event) {
  109         case NETDEV_REGISTER:
  110         case NETDEV_UNREGISTER:
  111                 write_lock(&ibdev->roce.netdev_lock);
  112                 /* check if network interface belongs to mlx5en */
  113                 if (mlx5_netdev_match(ndev, ibdev->mdev, "mce"))
  114                         ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ?
  115                                              NULL : ndev;
  116                 write_unlock(&ibdev->roce.netdev_lock);
  117                 break;
  118 
  119         case NETDEV_UP:
  120         case NETDEV_DOWN: {
  121                 struct ifnet *upper = NULL;
  122 
  123                 if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev))
  124                     && ibdev->ib_active) {
  125                         struct ib_event ibev = {0};
  126 
  127                         ibev.device = &ibdev->ib_dev;
  128                         ibev.event = (event == NETDEV_UP) ?
  129                                      IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
  130                         ibev.element.port_num = 1;
  131                         ib_dispatch_event(&ibev);
  132                 }
  133                 break;
  134         }
  135 
  136         default:
  137                 break;
  138         }
  139 
  140         return NOTIFY_DONE;
  141 }
  142 
  143 static struct ifnet *mlx5_ib_get_netdev(struct ib_device *device,
  144                                              u8 port_num)
  145 {
  146         struct mlx5_ib_dev *ibdev = to_mdev(device);
  147         struct ifnet *ndev;
  148 
  149         /* Ensure ndev does not disappear before we invoke if_ref()
  150          */
  151         read_lock(&ibdev->roce.netdev_lock);
  152         ndev = ibdev->roce.netdev;
  153         if (ndev)
  154                 if_ref(ndev);
  155         read_unlock(&ibdev->roce.netdev_lock);
  156 
  157         return ndev;
  158 }
  159 
  160 static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
  161                                     u8 *active_width)
  162 {
  163         switch (eth_proto_oper) {
  164         case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII):
  165         case MLX5E_PROT_MASK(MLX5E_1000BASE_KX):
  166         case MLX5E_PROT_MASK(MLX5E_100BASE_TX):
  167         case MLX5E_PROT_MASK(MLX5E_1000BASE_T):
  168                 *active_width = IB_WIDTH_1X;
  169                 *active_speed = IB_SPEED_SDR;
  170                 break;
  171         case MLX5E_PROT_MASK(MLX5E_10GBASE_T):
  172         case MLX5E_PROT_MASK(MLX5E_10GBASE_CX4):
  173         case MLX5E_PROT_MASK(MLX5E_10GBASE_KX4):
  174         case MLX5E_PROT_MASK(MLX5E_10GBASE_KR):
  175         case MLX5E_PROT_MASK(MLX5E_10GBASE_CR):
  176         case MLX5E_PROT_MASK(MLX5E_10GBASE_SR):
  177         case MLX5E_PROT_MASK(MLX5E_10GBASE_ER_LR):
  178                 *active_width = IB_WIDTH_1X;
  179                 *active_speed = IB_SPEED_QDR;
  180                 break;
  181         case MLX5E_PROT_MASK(MLX5E_25GBASE_CR):
  182         case MLX5E_PROT_MASK(MLX5E_25GBASE_KR):
  183         case MLX5E_PROT_MASK(MLX5E_25GBASE_SR):
  184                 *active_width = IB_WIDTH_1X;
  185                 *active_speed = IB_SPEED_EDR;
  186                 break;
  187         case MLX5E_PROT_MASK(MLX5E_40GBASE_CR4):
  188         case MLX5E_PROT_MASK(MLX5E_40GBASE_KR4):
  189         case MLX5E_PROT_MASK(MLX5E_40GBASE_SR4):
  190         case MLX5E_PROT_MASK(MLX5E_40GBASE_LR4_ER4):
  191                 *active_width = IB_WIDTH_4X;
  192                 *active_speed = IB_SPEED_QDR;
  193                 break;
  194         case MLX5E_PROT_MASK(MLX5E_50GBASE_CR2):
  195         case MLX5E_PROT_MASK(MLX5E_50GBASE_KR2):
  196         case MLX5E_PROT_MASK(MLX5E_50GBASE_KR4):
  197         case MLX5E_PROT_MASK(MLX5E_50GBASE_SR2):
  198                 *active_width = IB_WIDTH_1X;
  199                 *active_speed = IB_SPEED_HDR;
  200                 break;
  201         case MLX5E_PROT_MASK(MLX5E_56GBASE_R4):
  202                 *active_width = IB_WIDTH_4X;
  203                 *active_speed = IB_SPEED_FDR;
  204                 break;
  205         case MLX5E_PROT_MASK(MLX5E_100GBASE_CR4):
  206         case MLX5E_PROT_MASK(MLX5E_100GBASE_SR4):
  207         case MLX5E_PROT_MASK(MLX5E_100GBASE_KR4):
  208         case MLX5E_PROT_MASK(MLX5E_100GBASE_LR4):
  209                 *active_width = IB_WIDTH_4X;
  210                 *active_speed = IB_SPEED_EDR;
  211                 break;
  212         default:
  213                 *active_width = IB_WIDTH_4X;
  214                 *active_speed = IB_SPEED_QDR;
  215                 return -EINVAL;
  216         }
  217 
  218         return 0;
  219 }
  220 
  221 static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed,
  222                                         u8 *active_width)
  223 {
  224         switch (eth_proto_oper) {
  225         case MLX5E_PROT_MASK(MLX5E_SGMII_100M):
  226         case MLX5E_PROT_MASK(MLX5E_1000BASE_X_SGMII):
  227                 *active_width = IB_WIDTH_1X;
  228                 *active_speed = IB_SPEED_SDR;
  229                 break;
  230         case MLX5E_PROT_MASK(MLX5E_5GBASE_R):
  231                 *active_width = IB_WIDTH_1X;
  232                 *active_speed = IB_SPEED_DDR;
  233                 break;
  234         case MLX5E_PROT_MASK(MLX5E_10GBASE_XFI_XAUI_1):
  235                 *active_width = IB_WIDTH_1X;
  236                 *active_speed = IB_SPEED_QDR;
  237                 break;
  238         case MLX5E_PROT_MASK(MLX5E_40GBASE_XLAUI_4_XLPPI_4):
  239                 *active_width = IB_WIDTH_4X;
  240                 *active_speed = IB_SPEED_QDR;
  241                 break;
  242         case MLX5E_PROT_MASK(MLX5E_25GAUI_1_25GBASE_CR_KR):
  243                 *active_width = IB_WIDTH_1X;
  244                 *active_speed = IB_SPEED_EDR;
  245                 break;
  246         case MLX5E_PROT_MASK(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2):
  247                 *active_width = IB_WIDTH_2X;
  248                 *active_speed = IB_SPEED_EDR;
  249                 break;
  250         case MLX5E_PROT_MASK(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR):
  251                 *active_width = IB_WIDTH_1X;
  252                 *active_speed = IB_SPEED_HDR;
  253                 break;
  254         case MLX5E_PROT_MASK(MLX5E_CAUI_4_100GBASE_CR4_KR4):
  255                 *active_width = IB_WIDTH_4X;
  256                 *active_speed = IB_SPEED_EDR;
  257                 break;
  258         case MLX5E_PROT_MASK(MLX5E_100GAUI_2_100GBASE_CR2_KR2):
  259                 *active_width = IB_WIDTH_2X;
  260                 *active_speed = IB_SPEED_HDR;
  261                 break;
  262         case MLX5E_PROT_MASK(MLX5E_100GAUI_1_100GBASE_CR_KR):
  263                 *active_width = IB_WIDTH_1X;
  264                 *active_speed = IB_SPEED_NDR;
  265                 break;
  266         case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4):
  267                 *active_width = IB_WIDTH_4X;
  268                 *active_speed = IB_SPEED_HDR;
  269                 break;
  270         case MLX5E_PROT_MASK(MLX5E_200GAUI_2_200GBASE_CR2_KR2):
  271                 *active_width = IB_WIDTH_2X;
  272                 *active_speed = IB_SPEED_NDR;
  273                 break;
  274         case MLX5E_PROT_MASK(MLX5E_400GAUI_4_400GBASE_CR4_KR4):
  275                 *active_width = IB_WIDTH_4X;
  276                 *active_speed = IB_SPEED_NDR;
  277                 break;
  278         default:
  279                 *active_width = IB_WIDTH_4X;
  280                 *active_speed = IB_SPEED_QDR;
  281                 return -EINVAL;
  282         }
  283 
  284         return 0;
  285 }
  286 
  287 static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
  288                                 struct ib_port_attr *props)
  289 {
  290         struct mlx5_ib_dev *dev = to_mdev(device);
  291         u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {};
  292         struct ifnet *ndev;
  293         enum ib_mtu ndev_ib_mtu;
  294         u16 qkey_viol_cntr;
  295         u32 eth_prot_oper;
  296         bool ext;
  297         int err;
  298 
  299         memset(props, 0, sizeof(*props));
  300 
  301         /* Possible bad flows are checked before filling out props so in case
  302          * of an error it will still be zeroed out.
  303          */
  304         err = mlx5_query_port_ptys(dev->mdev, out, sizeof(out), MLX5_PTYS_EN,
  305             port_num);
  306         if (err)
  307                 return err;
  308 
  309         ext = MLX5_CAP_PCAM_FEATURE(dev->mdev, ptys_extended_ethernet);
  310         eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
  311 
  312         if (ext)
  313                 translate_eth_ext_proto_oper(eth_prot_oper, &props->active_speed,
  314                     &props->active_width);
  315         else
  316                 translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
  317                     &props->active_width);
  318 
  319         props->port_cap_flags  |= IB_PORT_CM_SUP;
  320         props->port_cap_flags  |= IB_PORT_IP_BASED_GIDS;
  321 
  322         props->gid_tbl_len      = MLX5_CAP_ROCE(dev->mdev,
  323                                                 roce_address_table_size);
  324         props->max_mtu          = IB_MTU_4096;
  325         props->max_msg_sz       = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
  326         props->pkey_tbl_len     = 1;
  327         props->state            = IB_PORT_DOWN;
  328         props->phys_state       = IB_PORT_PHYS_STATE_DISABLED;
  329 
  330         mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr);
  331         props->qkey_viol_cntr = qkey_viol_cntr;
  332 
  333         ndev = mlx5_ib_get_netdev(device, port_num);
  334         if (!ndev)
  335                 return 0;
  336 
  337         if (ndev->if_drv_flags & IFF_DRV_RUNNING &&
  338             ndev->if_link_state == LINK_STATE_UP) {
  339                 props->state      = IB_PORT_ACTIVE;
  340                 props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
  341         }
  342 
  343         ndev_ib_mtu = iboe_get_mtu(ndev->if_mtu);
  344 
  345         if_rele(ndev);
  346 
  347         props->active_mtu       = min(props->max_mtu, ndev_ib_mtu);
  348         return 0;
  349 }
  350 
  351 static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
  352                                      const struct ib_gid_attr *attr,
  353                                      void *mlx5_addr)
  354 {
  355 #define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
  356         char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
  357                                                source_l3_address);
  358         void *mlx5_addr_mac     = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
  359                                                source_mac_47_32);
  360         u16 vlan_id;
  361 
  362         if (!gid)
  363                 return;
  364         ether_addr_copy(mlx5_addr_mac, IF_LLADDR(attr->ndev));
  365 
  366         vlan_id = rdma_vlan_dev_vlan_id(attr->ndev);
  367         if (vlan_id != 0xffff) {
  368                 MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
  369                 MLX5_SET_RA(mlx5_addr, vlan_id, vlan_id);
  370         }
  371 
  372         switch (attr->gid_type) {
  373         case IB_GID_TYPE_IB:
  374                 MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
  375                 break;
  376         case IB_GID_TYPE_ROCE_UDP_ENCAP:
  377                 MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
  378                 break;
  379 
  380         default:
  381                 WARN_ON(true);
  382         }
  383 
  384         if (attr->gid_type != IB_GID_TYPE_IB) {
  385                 if (ipv6_addr_v4mapped((void *)gid))
  386                         MLX5_SET_RA(mlx5_addr, roce_l3_type,
  387                                     MLX5_ROCE_L3_TYPE_IPV4);
  388                 else
  389                         MLX5_SET_RA(mlx5_addr, roce_l3_type,
  390                                     MLX5_ROCE_L3_TYPE_IPV6);
  391         }
  392 
  393         if ((attr->gid_type == IB_GID_TYPE_IB) ||
  394             !ipv6_addr_v4mapped((void *)gid))
  395                 memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
  396         else
  397                 memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
  398 }
  399 
  400 static int set_roce_addr(struct ib_device *device, u8 port_num,
  401                          unsigned int index,
  402                          const union ib_gid *gid,
  403                          const struct ib_gid_attr *attr)
  404 {
  405         struct mlx5_ib_dev *dev = to_mdev(device);
  406         u32  in[MLX5_ST_SZ_DW(set_roce_address_in)]  = {0};
  407         u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
  408         void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
  409         enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
  410 
  411         if (ll != IB_LINK_LAYER_ETHERNET)
  412                 return -EINVAL;
  413 
  414         ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
  415 
  416         MLX5_SET(set_roce_address_in, in, roce_address_index, index);
  417         MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
  418         return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
  419 }
  420 
  421 static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
  422                            unsigned int index, const union ib_gid *gid,
  423                            const struct ib_gid_attr *attr,
  424                            __always_unused void **context)
  425 {
  426         return set_roce_addr(device, port_num, index, gid, attr);
  427 }
  428 
  429 static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
  430                            unsigned int index, __always_unused void **context)
  431 {
  432         return set_roce_addr(device, port_num, index, NULL, NULL);
  433 }
  434 
  435 __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
  436                                int index)
  437 {
  438         struct ib_gid_attr attr;
  439         union ib_gid gid;
  440 
  441         if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
  442                 return 0;
  443 
  444         if (!attr.ndev)
  445                 return 0;
  446 
  447         if_rele(attr.ndev);
  448 
  449         if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
  450                 return 0;
  451 
  452         return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
  453 }
  454 
  455 int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
  456                            int index, enum ib_gid_type *gid_type)
  457 {
  458         struct ib_gid_attr attr;
  459         union ib_gid gid;
  460         int ret;
  461 
  462         ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr);
  463         if (ret)
  464                 return ret;
  465 
  466         if (!attr.ndev)
  467                 return -ENODEV;
  468 
  469         if_rele(attr.ndev);
  470 
  471         *gid_type = attr.gid_type;
  472 
  473         return 0;
  474 }
  475 
  476 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
  477 {
  478         if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
  479                 return !MLX5_CAP_GEN(dev->mdev, ib_virt);
  480         return 0;
  481 }
  482 
  483 enum {
  484         MLX5_VPORT_ACCESS_METHOD_MAD,
  485         MLX5_VPORT_ACCESS_METHOD_HCA,
  486         MLX5_VPORT_ACCESS_METHOD_NIC,
  487 };
  488 
  489 static int mlx5_get_vport_access_method(struct ib_device *ibdev)
  490 {
  491         if (mlx5_use_mad_ifc(to_mdev(ibdev)))
  492                 return MLX5_VPORT_ACCESS_METHOD_MAD;
  493 
  494         if (mlx5_ib_port_link_layer(ibdev, 1) ==
  495             IB_LINK_LAYER_ETHERNET)
  496                 return MLX5_VPORT_ACCESS_METHOD_NIC;
  497 
  498         return MLX5_VPORT_ACCESS_METHOD_HCA;
  499 }
  500 
  501 static void get_atomic_caps(struct mlx5_ib_dev *dev,
  502                             struct ib_device_attr *props)
  503 {
  504         u8 tmp;
  505         u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
  506         u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
  507         u8 atomic_req_8B_endianness_mode =
  508                 MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode);
  509 
  510         /* Check if HW supports 8 bytes standard atomic operations and capable
  511          * of host endianness respond
  512          */
  513         tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
  514         if (((atomic_operations & tmp) == tmp) &&
  515             (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) &&
  516             (atomic_req_8B_endianness_mode)) {
  517                 props->atomic_cap = IB_ATOMIC_HCA;
  518         } else {
  519                 props->atomic_cap = IB_ATOMIC_NONE;
  520         }
  521 }
  522 
  523 static int mlx5_query_system_image_guid(struct ib_device *ibdev,
  524                                         __be64 *sys_image_guid)
  525 {
  526         struct mlx5_ib_dev *dev = to_mdev(ibdev);
  527         struct mlx5_core_dev *mdev = dev->mdev;
  528         u64 tmp;
  529         int err;
  530 
  531         switch (mlx5_get_vport_access_method(ibdev)) {
  532         case MLX5_VPORT_ACCESS_METHOD_MAD:
  533                 return mlx5_query_mad_ifc_system_image_guid(ibdev,
  534                                                             sys_image_guid);
  535 
  536         case MLX5_VPORT_ACCESS_METHOD_HCA:
  537                 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
  538                 break;
  539 
  540         case MLX5_VPORT_ACCESS_METHOD_NIC:
  541                 err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
  542                 break;
  543 
  544         default:
  545                 return -EINVAL;
  546         }
  547 
  548         if (!err)
  549                 *sys_image_guid = cpu_to_be64(tmp);
  550 
  551         return err;
  552 
  553 }
  554 
  555 static int mlx5_query_max_pkeys(struct ib_device *ibdev,
  556                                 u16 *max_pkeys)
  557 {
  558         struct mlx5_ib_dev *dev = to_mdev(ibdev);
  559         struct mlx5_core_dev *mdev = dev->mdev;
  560 
  561         switch (mlx5_get_vport_access_method(ibdev)) {
  562         case MLX5_VPORT_ACCESS_METHOD_MAD:
  563                 return mlx5_query_mad_ifc_max_pkeys(ibdev, max_pkeys);
  564 
  565         case MLX5_VPORT_ACCESS_METHOD_HCA:
  566         case MLX5_VPORT_ACCESS_METHOD_NIC:
  567                 *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
  568                                                 pkey_table_size));
  569                 return 0;
  570 
  571         default:
  572                 return -EINVAL;
  573         }
  574 }
  575 
  576 static int mlx5_query_vendor_id(struct ib_device *ibdev,
  577                                 u32 *vendor_id)
  578 {
  579         struct mlx5_ib_dev *dev = to_mdev(ibdev);
  580 
  581         switch (mlx5_get_vport_access_method(ibdev)) {
  582         case MLX5_VPORT_ACCESS_METHOD_MAD:
  583                 return mlx5_query_mad_ifc_vendor_id(ibdev, vendor_id);
  584 
  585         case MLX5_VPORT_ACCESS_METHOD_HCA:
  586         case MLX5_VPORT_ACCESS_METHOD_NIC:
  587                 return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
  588 
  589         default:
  590                 return -EINVAL;
  591         }
  592 }
  593 
  594 static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
  595                                 __be64 *node_guid)
  596 {
  597         u64 tmp;
  598         int err;
  599 
  600         switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
  601         case MLX5_VPORT_ACCESS_METHOD_MAD:
  602                 return mlx5_query_mad_ifc_node_guid(dev, node_guid);
  603 
  604         case MLX5_VPORT_ACCESS_METHOD_HCA:
  605                 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
  606                 break;
  607 
  608         case MLX5_VPORT_ACCESS_METHOD_NIC:
  609                 err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
  610                 break;
  611 
  612         default:
  613                 return -EINVAL;
  614         }
  615 
  616         if (!err)
  617                 *node_guid = cpu_to_be64(tmp);
  618 
  619         return err;
  620 }
  621 
  622 struct mlx5_reg_node_desc {
  623         u8      desc[IB_DEVICE_NODE_DESC_MAX];
  624 };
  625 
  626 static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
  627 {
  628         struct mlx5_reg_node_desc in;
  629 
  630         if (mlx5_use_mad_ifc(dev))
  631                 return mlx5_query_mad_ifc_node_desc(dev, node_desc);
  632 
  633         memset(&in, 0, sizeof(in));
  634 
  635         return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
  636                                     sizeof(struct mlx5_reg_node_desc),
  637                                     MLX5_REG_NODE_DESC, 0, 0);
  638 }
  639 
  640 static int mlx5_ib_query_device(struct ib_device *ibdev,
  641                                 struct ib_device_attr *props,
  642                                 struct ib_udata *uhw)
  643 {
  644         struct mlx5_ib_dev *dev = to_mdev(ibdev);
  645         struct mlx5_core_dev *mdev = dev->mdev;
  646         int err = -ENOMEM;
  647         int max_sq_desc;
  648         int max_rq_sg;
  649         int max_sq_sg;
  650         u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
  651         struct mlx5_ib_query_device_resp resp = {};
  652         size_t resp_len;
  653         u64 max_tso;
  654 
  655         resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length);
  656         if (uhw->outlen && uhw->outlen < resp_len)
  657                 return -EINVAL;
  658         else
  659                 resp.response_length = resp_len;
  660 
  661         if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
  662                 return -EINVAL;
  663 
  664         memset(props, 0, sizeof(*props));
  665         err = mlx5_query_system_image_guid(ibdev,
  666                                            &props->sys_image_guid);
  667         if (err)
  668                 return err;
  669 
  670         err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
  671         if (err)
  672                 return err;
  673 
  674         err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
  675         if (err)
  676                 return err;
  677 
  678         props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
  679                 ((u32)fw_rev_min(dev->mdev) << 16) |
  680                 fw_rev_sub(dev->mdev);
  681         props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
  682                 IB_DEVICE_PORT_ACTIVE_EVENT             |
  683                 IB_DEVICE_SYS_IMAGE_GUID                |
  684                 IB_DEVICE_RC_RNR_NAK_GEN;
  685 
  686         if (MLX5_CAP_GEN(mdev, pkv))
  687                 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
  688         if (MLX5_CAP_GEN(mdev, qkv))
  689                 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
  690         if (MLX5_CAP_GEN(mdev, apm))
  691                 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
  692         if (MLX5_CAP_GEN(mdev, xrc))
  693                 props->device_cap_flags |= IB_DEVICE_XRC;
  694         if (MLX5_CAP_GEN(mdev, imaicl)) {
  695                 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW |
  696                                            IB_DEVICE_MEM_WINDOW_TYPE_2B;
  697                 props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
  698                 /* We support 'Gappy' memory registration too */
  699                 props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
  700         }
  701         props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
  702         if (MLX5_CAP_GEN(mdev, sho)) {
  703                 props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
  704                 /* At this stage no support for signature handover */
  705                 props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
  706                                       IB_PROT_T10DIF_TYPE_2 |
  707                                       IB_PROT_T10DIF_TYPE_3;
  708                 props->sig_guard_cap = IB_GUARD_T10DIF_CRC |
  709                                        IB_GUARD_T10DIF_CSUM;
  710         }
  711         if (MLX5_CAP_GEN(mdev, block_lb_mc))
  712                 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
  713 
  714         if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
  715                 if (MLX5_CAP_ETH(mdev, csum_cap))
  716                         props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
  717 
  718                 if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
  719                         max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
  720                         if (max_tso) {
  721                                 resp.tso_caps.max_tso = 1 << max_tso;
  722                                 resp.tso_caps.supported_qpts |=
  723                                         1 << IB_QPT_RAW_PACKET;
  724                                 resp.response_length += sizeof(resp.tso_caps);
  725                         }
  726                 }
  727 
  728                 if (field_avail(typeof(resp), rss_caps, uhw->outlen)) {
  729                         resp.rss_caps.rx_hash_function =
  730                                                 MLX5_RX_HASH_FUNC_TOEPLITZ;
  731                         resp.rss_caps.rx_hash_fields_mask =
  732                                                 MLX5_RX_HASH_SRC_IPV4 |
  733                                                 MLX5_RX_HASH_DST_IPV4 |
  734                                                 MLX5_RX_HASH_SRC_IPV6 |
  735                                                 MLX5_RX_HASH_DST_IPV6 |
  736                                                 MLX5_RX_HASH_SRC_PORT_TCP |
  737                                                 MLX5_RX_HASH_DST_PORT_TCP |
  738                                                 MLX5_RX_HASH_SRC_PORT_UDP |
  739                                                 MLX5_RX_HASH_DST_PORT_UDP;
  740                         resp.response_length += sizeof(resp.rss_caps);
  741                 }
  742         } else {
  743                 if (field_avail(typeof(resp), tso_caps, uhw->outlen))
  744                         resp.response_length += sizeof(resp.tso_caps);
  745                 if (field_avail(typeof(resp), rss_caps, uhw->outlen))
  746                         resp.response_length += sizeof(resp.rss_caps);
  747         }
  748 
  749         if (MLX5_CAP_GEN(mdev, ipoib_ipoib_offloads)) {
  750                 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
  751                 props->device_cap_flags |= IB_DEVICE_UD_TSO;
  752         }
  753 
  754         if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
  755             MLX5_CAP_ETH(dev->mdev, scatter_fcs))
  756                 props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
  757 
  758         if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
  759                 props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
  760 
  761         props->vendor_part_id      = mdev->pdev->device;
  762         props->hw_ver              = mdev->pdev->revision;
  763 
  764         props->max_mr_size         = ~0ull;
  765         props->page_size_cap       = ~(min_page_size - 1);
  766         props->max_qp              = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
  767         props->max_qp_wr           = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
  768         max_rq_sg =  MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
  769                      sizeof(struct mlx5_wqe_data_seg);
  770         max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
  771         max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) -
  772                      sizeof(struct mlx5_wqe_raddr_seg)) /
  773                 sizeof(struct mlx5_wqe_data_seg);
  774         props->max_sge = min(max_rq_sg, max_sq_sg);
  775         props->max_sge_rd          = MLX5_MAX_SGE_RD;
  776         props->max_cq              = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
  777         props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
  778         props->max_mr              = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
  779         props->max_pd              = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
  780         props->max_qp_rd_atom      = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
  781         props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
  782         props->max_srq             = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
  783         props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
  784         props->local_ca_ack_delay  = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
  785         props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
  786         props->max_srq_sge         = max_rq_sg - 1;
  787         props->max_fast_reg_page_list_len =
  788                 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
  789         get_atomic_caps(dev, props);
  790         props->masked_atomic_cap   = IB_ATOMIC_NONE;
  791         props->max_mcast_grp       = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
  792         props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
  793         props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
  794                                            props->max_mcast_grp;
  795         props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
  796         props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
  797         props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
  798 
  799 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  800         if (MLX5_CAP_GEN(mdev, pg))
  801                 props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
  802         props->odp_caps = dev->odp_caps;
  803 #endif
  804 
  805         if (MLX5_CAP_GEN(mdev, cd))
  806                 props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
  807 
  808         if (!mlx5_core_is_pf(mdev))
  809                 props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
  810 
  811         if (mlx5_ib_port_link_layer(ibdev, 1) ==
  812             IB_LINK_LAYER_ETHERNET) {
  813                 props->rss_caps.max_rwq_indirection_tables =
  814                         1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt);
  815                 props->rss_caps.max_rwq_indirection_table_size =
  816                         1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt_size);
  817                 props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
  818                 props->max_wq_type_rq =
  819                         1 << MLX5_CAP_GEN(dev->mdev, log_max_rq);
  820         }
  821 
  822         if (uhw->outlen) {
  823                 err = ib_copy_to_udata(uhw, &resp, resp.response_length);
  824 
  825                 if (err)
  826                         return err;
  827         }
  828 
  829         return 0;
  830 }
  831 
  832 enum mlx5_ib_width {
  833         MLX5_IB_WIDTH_1X        = 1 << 0,
  834         MLX5_IB_WIDTH_2X        = 1 << 1,
  835         MLX5_IB_WIDTH_4X        = 1 << 2,
  836         MLX5_IB_WIDTH_8X        = 1 << 3,
  837         MLX5_IB_WIDTH_12X       = 1 << 4
  838 };
  839 
  840 static int translate_active_width(struct ib_device *ibdev, u8 active_width,
  841                                   u8 *ib_width)
  842 {
  843         struct mlx5_ib_dev *dev = to_mdev(ibdev);
  844         int err = 0;
  845 
  846         if (active_width & MLX5_IB_WIDTH_1X) {
  847                 *ib_width = IB_WIDTH_1X;
  848         } else if (active_width & MLX5_IB_WIDTH_2X) {
  849                 *ib_width = IB_WIDTH_2X;
  850         } else if (active_width & MLX5_IB_WIDTH_4X) {
  851                 *ib_width = IB_WIDTH_4X;
  852         } else if (active_width & MLX5_IB_WIDTH_8X) {
  853                 *ib_width = IB_WIDTH_8X;
  854         } else if (active_width & MLX5_IB_WIDTH_12X) {
  855                 *ib_width = IB_WIDTH_12X;
  856         } else {
  857                 mlx5_ib_dbg(dev, "Invalid active_width %d\n",
  858                             (int)active_width);
  859                 err = -EINVAL;
  860         }
  861 
  862         return err;
  863 }
  864 
  865 enum ib_max_vl_num {
  866         __IB_MAX_VL_0           = 1,
  867         __IB_MAX_VL_0_1         = 2,
  868         __IB_MAX_VL_0_3         = 3,
  869         __IB_MAX_VL_0_7         = 4,
  870         __IB_MAX_VL_0_14        = 5,
  871 };
  872 
  873 enum mlx5_vl_hw_cap {
  874         MLX5_VL_HW_0    = 1,
  875         MLX5_VL_HW_0_1  = 2,
  876         MLX5_VL_HW_0_2  = 3,
  877         MLX5_VL_HW_0_3  = 4,
  878         MLX5_VL_HW_0_4  = 5,
  879         MLX5_VL_HW_0_5  = 6,
  880         MLX5_VL_HW_0_6  = 7,
  881         MLX5_VL_HW_0_7  = 8,
  882         MLX5_VL_HW_0_14 = 15
  883 };
  884 
  885 static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
  886                                 u8 *max_vl_num)
  887 {
  888         switch (vl_hw_cap) {
  889         case MLX5_VL_HW_0:
  890                 *max_vl_num = __IB_MAX_VL_0;
  891                 break;
  892         case MLX5_VL_HW_0_1:
  893                 *max_vl_num = __IB_MAX_VL_0_1;
  894                 break;
  895         case MLX5_VL_HW_0_3:
  896                 *max_vl_num = __IB_MAX_VL_0_3;
  897                 break;
  898         case MLX5_VL_HW_0_7:
  899                 *max_vl_num = __IB_MAX_VL_0_7;
  900                 break;
  901         case MLX5_VL_HW_0_14:
  902                 *max_vl_num = __IB_MAX_VL_0_14;
  903                 break;
  904 
  905         default:
  906                 return -EINVAL;
  907         }
  908 
  909         return 0;
  910 }
  911 
  912 static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
  913                                struct ib_port_attr *props)
  914 {
  915         struct mlx5_ib_dev *dev = to_mdev(ibdev);
  916         struct mlx5_core_dev *mdev = dev->mdev;
  917         u32 *rep;
  918         int replen = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
  919         struct mlx5_ptys_reg *ptys;
  920         struct mlx5_pmtu_reg *pmtu;
  921         struct mlx5_pvlc_reg pvlc;
  922         void *ctx;
  923         int err;
  924 
  925         rep = mlx5_vzalloc(replen);
  926         ptys = kzalloc(sizeof(*ptys), GFP_KERNEL);
  927         pmtu = kzalloc(sizeof(*pmtu), GFP_KERNEL);
  928         if (!rep || !ptys || !pmtu) {
  929                 err = -ENOMEM;
  930                 goto out;
  931         }
  932 
  933         memset(props, 0, sizeof(*props));
  934 
  935         err = mlx5_query_hca_vport_context(mdev, port, 0, rep, replen);
  936         if (err)
  937                 goto out;
  938 
  939         ctx = MLX5_ADDR_OF(query_hca_vport_context_out, rep, hca_vport_context);
  940 
  941         props->lid              = MLX5_GET(hca_vport_context, ctx, lid);
  942         props->lmc              = MLX5_GET(hca_vport_context, ctx, lmc);
  943         props->sm_lid           = MLX5_GET(hca_vport_context, ctx, sm_lid);
  944         props->sm_sl            = MLX5_GET(hca_vport_context, ctx, sm_sl);
  945         props->state            = MLX5_GET(hca_vport_context, ctx, vport_state);
  946         props->phys_state       = MLX5_GET(hca_vport_context, ctx,
  947                                         port_physical_state);
  948         props->port_cap_flags   = MLX5_GET(hca_vport_context, ctx, cap_mask1);
  949         props->gid_tbl_len      = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
  950         props->max_msg_sz       = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
  951         props->pkey_tbl_len     = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
  952         props->bad_pkey_cntr    = MLX5_GET(hca_vport_context, ctx,
  953                                         pkey_violation_counter);
  954         props->qkey_viol_cntr   = MLX5_GET(hca_vport_context, ctx,
  955                                         qkey_violation_counter);
  956         props->subnet_timeout   = MLX5_GET(hca_vport_context, ctx,
  957                                         subnet_timeout);
  958         props->init_type_reply  = MLX5_GET(hca_vport_context, ctx,
  959                                         init_type_reply);
  960         props->grh_required     = MLX5_GET(hca_vport_context, ctx, grh_required);
  961 
  962         ptys->proto_mask |= MLX5_PTYS_IB;
  963         ptys->local_port = port;
  964         err = mlx5_core_access_ptys(mdev, ptys, 0);
  965         if (err)
  966                 goto out;
  967 
  968         err = translate_active_width(ibdev, ptys->ib_link_width_oper,
  969                                      &props->active_width);
  970         if (err)
  971                 goto out;
  972 
  973         props->active_speed     = (u8)ptys->ib_proto_oper;
  974 
  975         pmtu->local_port = port;
  976         err = mlx5_core_access_pmtu(mdev, pmtu, 0);
  977         if (err)
  978                 goto out;
  979 
  980         props->max_mtu          = pmtu->max_mtu;
  981         props->active_mtu       = pmtu->oper_mtu;
  982 
  983         memset(&pvlc, 0, sizeof(pvlc));
  984         pvlc.local_port = port;
  985         err = mlx5_core_access_pvlc(mdev, &pvlc, 0);
  986         if (err)
  987                 goto out;
  988 
  989         err = translate_max_vl_num(ibdev, pvlc.vl_hw_cap,
  990                                    &props->max_vl_num);
  991 out:
  992         kvfree(rep);
  993         kfree(ptys);
  994         kfree(pmtu);
  995         return err;
  996 }
  997 
  998 int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
  999                        struct ib_port_attr *props)
 1000 {
 1001         switch (mlx5_get_vport_access_method(ibdev)) {
 1002         case MLX5_VPORT_ACCESS_METHOD_MAD:
 1003                 return mlx5_query_mad_ifc_port(ibdev, port, props);
 1004 
 1005         case MLX5_VPORT_ACCESS_METHOD_HCA:
 1006                 return mlx5_query_hca_port(ibdev, port, props);
 1007 
 1008         case MLX5_VPORT_ACCESS_METHOD_NIC:
 1009                 return mlx5_query_port_roce(ibdev, port, props);
 1010 
 1011         default:
 1012                 return -EINVAL;
 1013         }
 1014 }
 1015 
 1016 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
 1017                              union ib_gid *gid)
 1018 {
 1019         struct mlx5_ib_dev *dev = to_mdev(ibdev);
 1020         struct mlx5_core_dev *mdev = dev->mdev;
 1021 
 1022         switch (mlx5_get_vport_access_method(ibdev)) {
 1023         case MLX5_VPORT_ACCESS_METHOD_MAD:
 1024                 return mlx5_query_mad_ifc_gids(ibdev, port, index, gid);
 1025 
 1026         case MLX5_VPORT_ACCESS_METHOD_HCA:
 1027                 return mlx5_query_hca_vport_gid(mdev, port, 0, index, gid);
 1028 
 1029         default:
 1030                 return -EINVAL;
 1031         }
 1032 
 1033 }
 1034 
 1035 static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
 1036                               u16 *pkey)
 1037 {
 1038         struct mlx5_ib_dev *dev = to_mdev(ibdev);
 1039         struct mlx5_core_dev *mdev = dev->mdev;
 1040 
 1041         switch (mlx5_get_vport_access_method(ibdev)) {
 1042         case MLX5_VPORT_ACCESS_METHOD_MAD:
 1043                 return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey);
 1044 
 1045         case MLX5_VPORT_ACCESS_METHOD_HCA:
 1046         case MLX5_VPORT_ACCESS_METHOD_NIC:
 1047                 return mlx5_query_hca_vport_pkey(mdev, 0, port,  0, index,
 1048                                                  pkey);
 1049         default:
 1050                 return -EINVAL;
 1051         }
 1052 }
 1053 
 1054 static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
 1055                                  struct ib_device_modify *props)
 1056 {
 1057         struct mlx5_ib_dev *dev = to_mdev(ibdev);
 1058         struct mlx5_reg_node_desc in;
 1059         struct mlx5_reg_node_desc out;
 1060         int err;
 1061 
 1062         if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
 1063                 return -EOPNOTSUPP;
 1064 
 1065         if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
 1066                 return 0;
 1067 
 1068         /*
 1069          * If possible, pass node desc to FW, so it can generate
 1070          * a 144 trap.  If cmd fails, just ignore.
 1071          */
 1072         memcpy(&in, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
 1073         err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
 1074                                    sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
 1075         if (err)
 1076                 return err;
 1077 
 1078         memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
 1079 
 1080         return err;
 1081 }
 1082 
 1083 static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
 1084                                struct ib_port_modify *props)
 1085 {
 1086         struct mlx5_ib_dev *dev = to_mdev(ibdev);
 1087         struct ib_port_attr attr;
 1088         u32 tmp;
 1089         int err;
 1090 
 1091         /*
 1092          * CM layer calls ib_modify_port() regardless of the link
 1093          * layer. For Ethernet ports, qkey violation and Port
 1094          * capabilities are meaningless.
 1095          */
 1096         if (mlx5_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_ETHERNET)
 1097                 return 0;
 1098 
 1099         mutex_lock(&dev->cap_mask_mutex);
 1100 
 1101         err = mlx5_ib_query_port(ibdev, port, &attr);
 1102         if (err)
 1103                 goto out;
 1104 
 1105         tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
 1106                 ~props->clr_port_cap_mask;
 1107 
 1108         err = mlx5_set_port_caps(dev->mdev, port, tmp);
 1109 
 1110 out:
 1111         mutex_unlock(&dev->cap_mask_mutex);
 1112         return err;
 1113 }
 1114 
 1115 static void print_lib_caps(struct mlx5_ib_dev *dev, u64 caps)
 1116 {
 1117         mlx5_ib_dbg(dev, "MLX5_LIB_CAP_4K_UAR = %s\n",
 1118                     caps & MLX5_LIB_CAP_4K_UAR ? "y" : "n");
 1119 }
 1120 
 1121 static u16 calc_dynamic_bfregs(int uars_per_sys_page)
 1122 {
 1123         /* Large page with non 4k uar support might limit the dynamic size */
 1124         if (uars_per_sys_page == 1  && PAGE_SIZE > 4096)
 1125                 return MLX5_MIN_DYN_BFREGS;
 1126 
 1127         return MLX5_MAX_DYN_BFREGS;
 1128 }
 1129 
 1130 static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k,
 1131                              struct mlx5_ib_alloc_ucontext_req_v2 *req,
 1132                              struct mlx5_bfreg_info *bfregi)
 1133 {
 1134         int uars_per_sys_page;
 1135         int bfregs_per_sys_page;
 1136         int ref_bfregs = req->total_num_bfregs;
 1137 
 1138         if (req->total_num_bfregs == 0)
 1139                 return -EINVAL;
 1140 
 1141         BUILD_BUG_ON(MLX5_MAX_BFREGS % MLX5_NON_FP_BFREGS_IN_PAGE);
 1142         BUILD_BUG_ON(MLX5_MAX_BFREGS < MLX5_NON_FP_BFREGS_IN_PAGE);
 1143 
 1144         if (req->total_num_bfregs > MLX5_MAX_BFREGS)
 1145                 return -ENOMEM;
 1146 
 1147         uars_per_sys_page = get_uars_per_sys_page(dev, lib_uar_4k);
 1148         bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR;
 1149         /* This holds the required static allocation asked by the user */
 1150         req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page);
 1151         if (req->num_low_latency_bfregs > req->total_num_bfregs - 1)
 1152                 return -EINVAL;
 1153 
 1154         bfregi->num_static_sys_pages = req->total_num_bfregs / bfregs_per_sys_page;
 1155         bfregi->num_dyn_bfregs = ALIGN(calc_dynamic_bfregs(uars_per_sys_page), bfregs_per_sys_page);
 1156         bfregi->total_num_bfregs = req->total_num_bfregs + bfregi->num_dyn_bfregs;
 1157         bfregi->num_sys_pages = bfregi->total_num_bfregs / bfregs_per_sys_page;
 1158 
 1159         mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, total bfregs %d, using %d sys pages\n",
 1160                     MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no",
 1161                     lib_uar_4k ? "yes" : "no", ref_bfregs,
 1162                     req->total_num_bfregs, bfregi->total_num_bfregs,
 1163                     bfregi->num_sys_pages);
 1164 
 1165         return 0;
 1166 }
 1167 
 1168 static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
 1169 {
 1170         struct mlx5_bfreg_info *bfregi;
 1171         int err;
 1172         int i;
 1173 
 1174         bfregi = &context->bfregi;
 1175         for (i = 0; i < bfregi->num_static_sys_pages; i++) {
 1176                 err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]);
 1177                 if (err)
 1178                         goto error;
 1179 
 1180                 mlx5_ib_dbg(dev, "allocated uar %d\n", bfregi->sys_pages[i]);
 1181         }
 1182 
 1183         for (i = bfregi->num_static_sys_pages; i < bfregi->num_sys_pages; i++)
 1184                 bfregi->sys_pages[i] = MLX5_IB_INVALID_UAR_INDEX;
 1185 
 1186         return 0;
 1187 
 1188 error:
 1189         for (--i; i >= 0; i--)
 1190                 if (mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]))
 1191                         mlx5_ib_warn(dev, "failed to free uar %d\n", i);
 1192 
 1193         return err;
 1194 }
 1195 
 1196 static void deallocate_uars(struct mlx5_ib_dev *dev,
 1197                             struct mlx5_ib_ucontext *context)
 1198 {
 1199         struct mlx5_bfreg_info *bfregi;
 1200         int i;
 1201 
 1202         bfregi = &context->bfregi;
 1203         for (i = 0; i < bfregi->num_sys_pages; i++)
 1204                 if (i < bfregi->num_static_sys_pages ||
 1205                     bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX)
 1206                         mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
 1207 }
 1208 
 1209 static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn,
 1210                                           u16 uid)
 1211 {
 1212         int err;
 1213 
 1214         if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
 1215                 return 0;
 1216 
 1217         err = mlx5_alloc_transport_domain(dev->mdev, tdn, uid);
 1218         if (err)
 1219                 return err;
 1220 
 1221         if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
 1222             (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) &&
 1223              !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
 1224                 return 0;
 1225 
 1226         mutex_lock(&dev->lb_mutex);
 1227         dev->user_td++;
 1228 
 1229         if (dev->user_td == 2)
 1230                 err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
 1231 
 1232         mutex_unlock(&dev->lb_mutex);
 1233 
 1234         if (err != 0)
 1235                 mlx5_dealloc_transport_domain(dev->mdev, *tdn, uid);
 1236         return err;
 1237 }
 1238 
 1239 static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn,
 1240                                              u16 uid)
 1241 {
 1242         if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
 1243                 return;
 1244 
 1245         mlx5_dealloc_transport_domain(dev->mdev, tdn, uid);
 1246 
 1247         if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
 1248             (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) &&
 1249              !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
 1250                 return;
 1251 
 1252         mutex_lock(&dev->lb_mutex);
 1253         dev->user_td--;
 1254 
 1255         if (dev->user_td < 2)
 1256                 mlx5_nic_vport_update_local_lb(dev->mdev, false);
 1257 
 1258         mutex_unlock(&dev->lb_mutex);
 1259 }
 1260 
 1261 static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
 1262                                   struct ib_udata *udata)
 1263 {
 1264         struct ib_device *ibdev = uctx->device;
 1265         struct mlx5_ib_dev *dev = to_mdev(ibdev);
 1266         struct mlx5_ib_alloc_ucontext_req_v2 req = {};
 1267         struct mlx5_ib_alloc_ucontext_resp resp = {};
 1268         struct mlx5_ib_ucontext *context = to_mucontext(uctx);
 1269         struct mlx5_bfreg_info *bfregi;
 1270         int ver;
 1271         int err;
 1272         size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
 1273                                      max_cqe_version);
 1274         bool lib_uar_4k;
 1275         bool lib_uar_dyn;
 1276 
 1277         if (!dev->ib_active)
 1278                 return -EAGAIN;
 1279 
 1280         if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
 1281                 ver = 0;
 1282         else if (udata->inlen >= min_req_v2)
 1283                 ver = 2;
 1284         else
 1285                 return -EINVAL;
 1286 
 1287         err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
 1288         if (err)
 1289                 return err;
 1290 
 1291         if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX)
 1292                 return -EOPNOTSUPP;
 1293 
 1294         if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
 1295                 return -EOPNOTSUPP;
 1296 
 1297         req.total_num_bfregs = ALIGN(req.total_num_bfregs,
 1298                                     MLX5_NON_FP_BFREGS_PER_UAR);
 1299         if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
 1300                 return -EINVAL;
 1301 
 1302         resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
 1303         if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
 1304                 resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
 1305         resp.cache_line_size = cache_line_size();
 1306         resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
 1307         resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
 1308         resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
 1309         resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
 1310         resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
 1311         resp.cqe_version = min_t(__u8,
 1312                                  (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
 1313                                  req.max_cqe_version);
 1314         resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
 1315                                 MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
 1316         resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
 1317                                         MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1;
 1318         resp.response_length = min(offsetof(typeof(resp), response_length) +
 1319                                    sizeof(resp.response_length), udata->outlen);
 1320 
 1321         lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
 1322         lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR;
 1323         bfregi = &context->bfregi;
 1324 
 1325         if (lib_uar_dyn) {
 1326                 bfregi->lib_uar_dyn = lib_uar_dyn;
 1327                 goto uar_done;
 1328         }
 1329 
 1330         /* updates req->total_num_bfregs */
 1331         err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi);
 1332         if (err)
 1333                 goto out_ctx;
 1334 
 1335         mutex_init(&bfregi->lock);
 1336         bfregi->lib_uar_4k = lib_uar_4k;
 1337         bfregi->count = kcalloc(bfregi->total_num_bfregs, sizeof(*bfregi->count),
 1338                                 GFP_KERNEL);
 1339         if (!bfregi->count) {
 1340                 err = -ENOMEM;
 1341                 goto out_ctx;
 1342         }
 1343 
 1344         bfregi->sys_pages = kcalloc(bfregi->num_sys_pages,
 1345                                     sizeof(*bfregi->sys_pages),
 1346                                     GFP_KERNEL);
 1347         if (!bfregi->sys_pages) {
 1348                 err = -ENOMEM;
 1349                 goto out_count;
 1350         }
 1351 
 1352         err = allocate_uars(dev, context);
 1353         if (err)
 1354                 goto out_sys_pages;
 1355 
 1356 uar_done:
 1357         if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
 1358                 err = mlx5_ib_devx_create(dev, true);
 1359                 if (err < 0)
 1360                         goto out_uars;
 1361                 context->devx_uid = err;
 1362         }
 1363 
 1364         err = mlx5_ib_alloc_transport_domain(dev, &context->tdn,
 1365                                              context->devx_uid);
 1366         if (err)
 1367                 goto out_devx;
 1368 
 1369         INIT_LIST_HEAD(&context->db_page_list);
 1370         mutex_init(&context->db_page_mutex);
 1371 
 1372         resp.tot_bfregs = lib_uar_dyn ? 0 : req.total_num_bfregs;
 1373         resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
 1374 
 1375         if (field_avail(typeof(resp), cqe_version, udata->outlen))
 1376                 resp.response_length += sizeof(resp.cqe_version);
 1377 
 1378         if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) {
 1379                 resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
 1380                                       MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
 1381                 resp.response_length += sizeof(resp.cmds_supp_uhw);
 1382         }
 1383 
 1384         /*
 1385          * We don't want to expose information from the PCI bar that is located
 1386          * after 4096 bytes, so if the arch only supports larger pages, let's
 1387          * pretend we don't support reading the HCA's core clock. This is also
 1388          * forced by mmap function.
 1389          */
 1390         if (offsetofend(typeof(resp), hca_core_clock_offset) <= udata->outlen) {
 1391                 if (PAGE_SIZE <= 4096) {
 1392                         resp.comp_mask |=
 1393                                 MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
 1394                         resp.hca_core_clock_offset =
 1395                                 offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE;
 1396                 }
 1397                 resp.response_length += sizeof(resp.hca_core_clock_offset);
 1398         }
 1399 
 1400         if (offsetofend(typeof(resp), log_uar_size) <= udata->outlen)
 1401                 resp.response_length += sizeof(resp.log_uar_size);
 1402 
 1403         if (offsetofend(typeof(resp), num_uars_per_page) <= udata->outlen)
 1404                 resp.response_length += sizeof(resp.num_uars_per_page);
 1405 
 1406         if (offsetofend(typeof(resp), num_dyn_bfregs) <= udata->outlen) {
 1407                 resp.num_dyn_bfregs = bfregi->num_dyn_bfregs;
 1408                 resp.response_length += sizeof(resp.num_dyn_bfregs);
 1409         }
 1410 
 1411         err = ib_copy_to_udata(udata, &resp, resp.response_length);
 1412         if (err)
 1413                 goto out_mdev;
 1414 
 1415         bfregi->ver = ver;
 1416         bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
 1417         context->cqe_version = resp.cqe_version;
 1418         context->lib_caps = req.lib_caps;
 1419         print_lib_caps(dev, context->lib_caps);
 1420 
 1421         return 0;
 1422 
 1423 out_mdev:
 1424         mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
 1425 out_devx:
 1426         if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
 1427                 mlx5_ib_devx_destroy(dev, context->devx_uid);
 1428 
 1429 out_uars:
 1430         deallocate_uars(dev, context);
 1431 
 1432 out_sys_pages:
 1433         kfree(bfregi->sys_pages);
 1434 
 1435 out_count:
 1436         kfree(bfregi->count);
 1437 
 1438 out_ctx:
 1439         return err;
 1440 }
 1441 
 1442 static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 1443 {
 1444         struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
 1445         struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
 1446         struct mlx5_bfreg_info *bfregi;
 1447 
 1448         bfregi = &context->bfregi;
 1449         mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
 1450 
 1451         if (context->devx_uid)
 1452                 mlx5_ib_devx_destroy(dev, context->devx_uid);
 1453 
 1454         deallocate_uars(dev, context);
 1455         kfree(bfregi->sys_pages);
 1456         kfree(bfregi->count);
 1457 }
 1458 
 1459 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
 1460                                  int uar_idx)
 1461 {
 1462         int fw_uars_per_page;
 1463 
 1464         fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1;
 1465 
 1466         return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + uar_idx / fw_uars_per_page;
 1467 }
 1468 
 1469 static int get_command(unsigned long offset)
 1470 {
 1471         return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
 1472 }
 1473 
 1474 static int get_arg(unsigned long offset)
 1475 {
 1476         return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
 1477 }
 1478 
 1479 static int get_index(unsigned long offset)
 1480 {
 1481         return get_arg(offset);
 1482 }
 1483 
 1484 /* Index resides in an extra byte to enable larger values than 255 */
 1485 static int get_extended_index(unsigned long offset)
 1486 {
 1487         return get_arg(offset) | ((offset >> 16) & 0xff) << 8;
 1488 }
 1489 
 1490 
 1491 static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
 1492 {
 1493 }
 1494 
 1495 static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
 1496 {
 1497         switch (cmd) {
 1498         case MLX5_IB_MMAP_WC_PAGE:
 1499                 return "WC";
 1500         case MLX5_IB_MMAP_REGULAR_PAGE:
 1501                 return "best effort WC";
 1502         case MLX5_IB_MMAP_NC_PAGE:
 1503                 return "NC";
 1504         default:
 1505                 return NULL;
 1506         }
 1507 }
 1508 
 1509 static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
 1510                                         struct vm_area_struct *vma,
 1511                                         struct mlx5_ib_ucontext *context)
 1512 {
 1513         if ((vma->vm_end - vma->vm_start != PAGE_SIZE) ||
 1514             !(vma->vm_flags & VM_SHARED))
 1515                 return -EINVAL;
 1516 
 1517         if (get_index(vma->vm_pgoff) != MLX5_IB_CLOCK_INFO_V1)
 1518                 return -EOPNOTSUPP;
 1519 
 1520         if (vma->vm_flags & (VM_WRITE | VM_EXEC))
 1521                 return -EPERM;
 1522 
 1523         return -EOPNOTSUPP;
 1524 }
 1525 
 1526 static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
 1527 {
 1528         struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
 1529         struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device);
 1530 
 1531         switch (mentry->mmap_flag) {
 1532         case MLX5_IB_MMAP_TYPE_UAR_WC:
 1533         case MLX5_IB_MMAP_TYPE_UAR_NC:
 1534                 mlx5_cmd_free_uar(dev->mdev, mentry->page_idx);
 1535                 kfree(mentry);
 1536                 break;
 1537         default:
 1538                 WARN_ON(true);
 1539         }
 1540 }
 1541 
 1542 static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
 1543                     struct vm_area_struct *vma,
 1544                     struct mlx5_ib_ucontext *context)
 1545 {
 1546         struct mlx5_bfreg_info *bfregi = &context->bfregi;
 1547         int err;
 1548         unsigned long idx;
 1549         phys_addr_t pfn;
 1550         pgprot_t prot;
 1551         u32 bfreg_dyn_idx = 0;
 1552         u32 uar_index;
 1553         int dyn_uar = (cmd == MLX5_IB_MMAP_ALLOC_WC);
 1554         int max_valid_idx = dyn_uar ? bfregi->num_sys_pages :
 1555                                 bfregi->num_static_sys_pages;
 1556 
 1557         if (bfregi->lib_uar_dyn)
 1558                 return -EINVAL;
 1559 
 1560         if (vma->vm_end - vma->vm_start != PAGE_SIZE)
 1561                 return -EINVAL;
 1562 
 1563         if (dyn_uar)
 1564                 idx = get_extended_index(vma->vm_pgoff) + bfregi->num_static_sys_pages;
 1565         else
 1566                 idx = get_index(vma->vm_pgoff);
 1567 
 1568         if (idx >= max_valid_idx) {
 1569                 mlx5_ib_warn(dev, "invalid uar index %lu, max=%d\n",
 1570                              idx, max_valid_idx);
 1571                 return -EINVAL;
 1572         }
 1573 
 1574         switch (cmd) {
 1575         case MLX5_IB_MMAP_WC_PAGE:
 1576         case MLX5_IB_MMAP_ALLOC_WC:
 1577         case MLX5_IB_MMAP_REGULAR_PAGE:
 1578                 /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */
 1579                 prot = pgprot_writecombine(vma->vm_page_prot);
 1580                 break;
 1581         case MLX5_IB_MMAP_NC_PAGE:
 1582                 prot = pgprot_noncached(vma->vm_page_prot);
 1583                 break;
 1584         default:
 1585                 return -EINVAL;
 1586         }
 1587 
 1588         if (dyn_uar) {
 1589                 int uars_per_page;
 1590 
 1591                 uars_per_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k);
 1592                 bfreg_dyn_idx = idx * (uars_per_page * MLX5_NON_FP_BFREGS_PER_UAR);
 1593                 if (bfreg_dyn_idx >= bfregi->total_num_bfregs) {
 1594                         mlx5_ib_warn(dev, "invalid bfreg_dyn_idx %u, max=%u\n",
 1595                                      bfreg_dyn_idx, bfregi->total_num_bfregs);
 1596                         return -EINVAL;
 1597                 }
 1598 
 1599                 mutex_lock(&bfregi->lock);
 1600                 /* Fail if uar already allocated, first bfreg index of each
 1601                  * page holds its count.
 1602                  */
 1603                 if (bfregi->count[bfreg_dyn_idx]) {
 1604                         mlx5_ib_warn(dev, "wrong offset, idx %lu is busy, bfregn=%u\n", idx, bfreg_dyn_idx);
 1605                         mutex_unlock(&bfregi->lock);
 1606                         return -EINVAL;
 1607                 }
 1608 
 1609                 bfregi->count[bfreg_dyn_idx]++;
 1610                 mutex_unlock(&bfregi->lock);
 1611 
 1612                 err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index);
 1613                 if (err) {
 1614                         mlx5_ib_warn(dev, "UAR alloc failed\n");
 1615                         goto free_bfreg;
 1616                 }
 1617         } else {
 1618                 uar_index = bfregi->sys_pages[idx];
 1619         }
 1620 
 1621         pfn = uar_index2pfn(dev, uar_index);
 1622         mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
 1623 
 1624         err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE,
 1625                                 prot, NULL);
 1626         if (err) {
 1627                 mlx5_ib_err(dev,
 1628                             "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n",
 1629                             err, mmap_cmd2str(cmd));
 1630                 goto err;
 1631         }
 1632 
 1633         if (dyn_uar)
 1634                 bfregi->sys_pages[idx] = uar_index;
 1635         return 0;
 1636 
 1637 err:
 1638         if (!dyn_uar)
 1639                 return err;
 1640 
 1641         mlx5_cmd_free_uar(dev->mdev, idx);
 1642 
 1643 free_bfreg:
 1644         mlx5_ib_free_bfreg(dev, bfregi, bfreg_dyn_idx);
 1645 
 1646         return err;
 1647 }
 1648 
 1649 static unsigned long mlx5_vma_to_pgoff(struct vm_area_struct *vma)
 1650 {
 1651         unsigned long idx;
 1652         u8 command;
 1653 
 1654         command = get_command(vma->vm_pgoff);
 1655         idx = get_extended_index(vma->vm_pgoff);
 1656 
 1657         return (command << 16 | idx);
 1658 }
 1659 
 1660 static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev,
 1661                                struct vm_area_struct *vma,
 1662                                struct ib_ucontext *ucontext)
 1663 {
 1664         struct mlx5_user_mmap_entry *mentry;
 1665         struct rdma_user_mmap_entry *entry;
 1666         unsigned long pgoff;
 1667         pgprot_t prot;
 1668         phys_addr_t pfn;
 1669         int ret;
 1670 
 1671         pgoff = mlx5_vma_to_pgoff(vma);
 1672         entry = rdma_user_mmap_entry_get_pgoff(ucontext, pgoff);
 1673         if (!entry)
 1674                 return -EINVAL;
 1675 
 1676         mentry = to_mmmap(entry);
 1677         pfn = (mentry->address >> PAGE_SHIFT);
 1678         if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR ||
 1679             mentry->mmap_flag == MLX5_IB_MMAP_TYPE_UAR_NC)
 1680                 prot = pgprot_noncached(vma->vm_page_prot);
 1681         else
 1682                 prot = pgprot_writecombine(vma->vm_page_prot);
 1683         ret = rdma_user_mmap_io(ucontext, vma, pfn,
 1684                                 entry->npages * PAGE_SIZE,
 1685                                 prot,
 1686                                 entry);
 1687         rdma_user_mmap_entry_put(&mentry->rdma_entry);
 1688         return ret;
 1689 }
 1690 
 1691 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
 1692 {
 1693         struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
 1694         struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
 1695         unsigned long command;
 1696         phys_addr_t pfn;
 1697 
 1698         command = get_command(vma->vm_pgoff);
 1699         switch (command) {
 1700         case MLX5_IB_MMAP_WC_PAGE:
 1701         case MLX5_IB_MMAP_ALLOC_WC:
 1702                 if (!dev->wc_support)
 1703                         return -EPERM;
 1704                 /* FALLTHROUGH */
 1705         case MLX5_IB_MMAP_NC_PAGE:
 1706         case MLX5_IB_MMAP_REGULAR_PAGE:
 1707                 return uar_mmap(dev, command, vma, context);
 1708 
 1709         case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
 1710                 return -ENOSYS;
 1711 
 1712         case MLX5_IB_MMAP_CORE_CLOCK:
 1713                 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
 1714                         return -EINVAL;
 1715 
 1716                 if (vma->vm_flags & VM_WRITE)
 1717                         return -EPERM;
 1718 
 1719                 /* Don't expose to user-space information it shouldn't have */
 1720                 if (PAGE_SIZE > 4096)
 1721                         return -EOPNOTSUPP;
 1722 
 1723                 pfn = (dev->mdev->iseg_base +
 1724                        offsetof(struct mlx5_init_seg, internal_timer_h)) >>
 1725                         PAGE_SHIFT;
 1726                 return rdma_user_mmap_io(&context->ibucontext, vma, pfn,
 1727                                          PAGE_SIZE,
 1728                                          pgprot_noncached(vma->vm_page_prot),
 1729                                          NULL);
 1730         case MLX5_IB_MMAP_CLOCK_INFO:
 1731                 return mlx5_ib_mmap_clock_info_page(dev, vma, context);
 1732 
 1733         default:
 1734                 return mlx5_ib_mmap_offset(dev, vma, ibcontext);
 1735         }
 1736 
 1737         return 0;
 1738 }
 1739 
 1740 static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 1741 {
 1742         struct mlx5_ib_pd *pd = to_mpd(ibpd);
 1743         struct ib_device *ibdev = ibpd->device;
 1744         struct mlx5_ib_alloc_pd_resp resp;
 1745         int err;
 1746         struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
 1747                 udata, struct mlx5_ib_ucontext, ibucontext);
 1748         u16 uid = context ? context->devx_uid : 0;
 1749 
 1750         err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn, uid);
 1751         if (err)
 1752                 return (err);
 1753 
 1754         pd->uid = uid;
 1755         if (udata) {
 1756                 resp.pdn = pd->pdn;
 1757                 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
 1758                         mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid);
 1759                         return -EFAULT;
 1760                 }
 1761         }
 1762 
 1763         return 0;
 1764 }
 1765 
 1766 static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
 1767 {
 1768         struct mlx5_ib_dev *mdev = to_mdev(pd->device);
 1769         struct mlx5_ib_pd *mpd = to_mpd(pd);
 1770 
 1771         mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
 1772 }
 1773 
 1774 enum {
 1775         MATCH_CRITERIA_ENABLE_OUTER_BIT,
 1776         MATCH_CRITERIA_ENABLE_MISC_BIT,
 1777         MATCH_CRITERIA_ENABLE_INNER_BIT
 1778 };
 1779 
 1780 #define HEADER_IS_ZERO(match_criteria, headers)                            \
 1781         !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
 1782                     0, MLX5_FLD_SZ_BYTES(fte_match_param, headers)))       \
 1783 
 1784 static u8 get_match_criteria_enable(u32 *match_criteria)
 1785 {
 1786         u8 match_criteria_enable;
 1787 
 1788         match_criteria_enable =
 1789                 (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
 1790                 MATCH_CRITERIA_ENABLE_OUTER_BIT;
 1791         match_criteria_enable |=
 1792                 (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
 1793                 MATCH_CRITERIA_ENABLE_MISC_BIT;
 1794         match_criteria_enable |=
 1795                 (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
 1796                 MATCH_CRITERIA_ENABLE_INNER_BIT;
 1797 
 1798         return match_criteria_enable;
 1799 }
 1800 
 1801 static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
 1802 {
 1803         MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
 1804         MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
 1805 }
 1806 
 1807 static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
 1808 {
 1809         MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
 1810         MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
 1811         MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
 1812         MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
 1813 }
 1814 
 1815 #define LAST_ETH_FIELD vlan_tag
 1816 #define LAST_IB_FIELD sl
 1817 #define LAST_IPV4_FIELD tos
 1818 #define LAST_IPV6_FIELD traffic_class
 1819 #define LAST_TCP_UDP_FIELD src_port
 1820 
 1821 /* Field is the last supported field */
 1822 #define FIELDS_NOT_SUPPORTED(filter, field)\
 1823         memchr_inv((void *)&filter.field  +\
 1824                    sizeof(filter.field), 0,\
 1825                    sizeof(filter) -\
 1826                    offsetof(typeof(filter), field) -\
 1827                    sizeof(filter.field))
 1828 
 1829 static int parse_flow_attr(u32 *match_c, u32 *match_v,
 1830                            const union ib_flow_spec *ib_spec)
 1831 {
 1832         void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
 1833                                              outer_headers);
 1834         void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
 1835                                              outer_headers);
 1836         void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
 1837                                            misc_parameters);
 1838         void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
 1839                                            misc_parameters);
 1840 
 1841         switch (ib_spec->type) {
 1842         case IB_FLOW_SPEC_ETH:
 1843                 if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
 1844                         return -ENOTSUPP;
 1845 
 1846                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
 1847                                              dmac_47_16),
 1848                                 ib_spec->eth.mask.dst_mac);
 1849                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
 1850                                              dmac_47_16),
 1851                                 ib_spec->eth.val.dst_mac);
 1852 
 1853                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
 1854                                              smac_47_16),
 1855                                 ib_spec->eth.mask.src_mac);
 1856                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
 1857                                              smac_47_16),
 1858                                 ib_spec->eth.val.src_mac);
 1859 
 1860                 if (ib_spec->eth.mask.vlan_tag) {
 1861                         MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
 1862                                  cvlan_tag, 1);
 1863                         MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
 1864                                  cvlan_tag, 1);
 1865 
 1866                         MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
 1867                                  first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
 1868                         MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
 1869                                  first_vid, ntohs(ib_spec->eth.val.vlan_tag));
 1870 
 1871                         MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
 1872                                  first_cfi,
 1873                                  ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
 1874                         MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
 1875                                  first_cfi,
 1876                                  ntohs(ib_spec->eth.val.vlan_tag) >> 12);
 1877 
 1878                         MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
 1879                                  first_prio,
 1880                                  ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
 1881                         MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
 1882                                  first_prio,
 1883                                  ntohs(ib_spec->eth.val.vlan_tag) >> 13);
 1884                 }
 1885                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
 1886                          ethertype, ntohs(ib_spec->eth.mask.ether_type));
 1887                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
 1888                          ethertype, ntohs(ib_spec->eth.val.ether_type));
 1889                 break;
 1890         case IB_FLOW_SPEC_IPV4:
 1891                 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
 1892                         return -ENOTSUPP;
 1893 
 1894                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
 1895                          ethertype, 0xffff);
 1896                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
 1897                          ethertype, ETH_P_IP);
 1898 
 1899                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
 1900                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
 1901                        &ib_spec->ipv4.mask.src_ip,
 1902                        sizeof(ib_spec->ipv4.mask.src_ip));
 1903                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
 1904                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
 1905                        &ib_spec->ipv4.val.src_ip,
 1906                        sizeof(ib_spec->ipv4.val.src_ip));
 1907                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
 1908                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 1909                        &ib_spec->ipv4.mask.dst_ip,
 1910                        sizeof(ib_spec->ipv4.mask.dst_ip));
 1911                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
 1912                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 1913                        &ib_spec->ipv4.val.dst_ip,
 1914                        sizeof(ib_spec->ipv4.val.dst_ip));
 1915 
 1916                 set_tos(outer_headers_c, outer_headers_v,
 1917                         ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
 1918 
 1919                 set_proto(outer_headers_c, outer_headers_v,
 1920                           ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto);
 1921                 break;
 1922         case IB_FLOW_SPEC_IPV6:
 1923                 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
 1924                         return -ENOTSUPP;
 1925 
 1926                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
 1927                          ethertype, 0xffff);
 1928                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
 1929                          ethertype, IPPROTO_IPV6);
 1930 
 1931                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
 1932                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
 1933                        &ib_spec->ipv6.mask.src_ip,
 1934                        sizeof(ib_spec->ipv6.mask.src_ip));
 1935                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
 1936                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
 1937                        &ib_spec->ipv6.val.src_ip,
 1938                        sizeof(ib_spec->ipv6.val.src_ip));
 1939                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
 1940                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 1941                        &ib_spec->ipv6.mask.dst_ip,
 1942                        sizeof(ib_spec->ipv6.mask.dst_ip));
 1943                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
 1944                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 1945                        &ib_spec->ipv6.val.dst_ip,
 1946                        sizeof(ib_spec->ipv6.val.dst_ip));
 1947 
 1948                 set_tos(outer_headers_c, outer_headers_v,
 1949                         ib_spec->ipv6.mask.traffic_class,
 1950                         ib_spec->ipv6.val.traffic_class);
 1951 
 1952                 set_proto(outer_headers_c, outer_headers_v,
 1953                           ib_spec->ipv6.mask.next_hdr,
 1954                           ib_spec->ipv6.val.next_hdr);
 1955 
 1956                 MLX5_SET(fte_match_set_misc, misc_params_c,
 1957                          outer_ipv6_flow_label,
 1958                          ntohl(ib_spec->ipv6.mask.flow_label));
 1959                 MLX5_SET(fte_match_set_misc, misc_params_v,
 1960                          outer_ipv6_flow_label,
 1961                          ntohl(ib_spec->ipv6.val.flow_label));
 1962                 break;
 1963         case IB_FLOW_SPEC_TCP:
 1964                 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
 1965                                          LAST_TCP_UDP_FIELD))
 1966                         return -ENOTSUPP;
 1967 
 1968                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
 1969                          0xff);
 1970                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
 1971                          IPPROTO_TCP);
 1972 
 1973                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport,
 1974                          ntohs(ib_spec->tcp_udp.mask.src_port));
 1975                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport,
 1976                          ntohs(ib_spec->tcp_udp.val.src_port));
 1977 
 1978                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport,
 1979                          ntohs(ib_spec->tcp_udp.mask.dst_port));
 1980                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport,
 1981                          ntohs(ib_spec->tcp_udp.val.dst_port));
 1982                 break;
 1983         case IB_FLOW_SPEC_UDP:
 1984                 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
 1985                                          LAST_TCP_UDP_FIELD))
 1986                         return -ENOTSUPP;
 1987 
 1988                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
 1989                          0xff);
 1990                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
 1991                          IPPROTO_UDP);
 1992 
 1993                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport,
 1994                          ntohs(ib_spec->tcp_udp.mask.src_port));
 1995                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport,
 1996                          ntohs(ib_spec->tcp_udp.val.src_port));
 1997 
 1998                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport,
 1999                          ntohs(ib_spec->tcp_udp.mask.dst_port));
 2000                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport,
 2001                          ntohs(ib_spec->tcp_udp.val.dst_port));
 2002                 break;
 2003         default:
 2004                 return -EINVAL;
 2005         }
 2006 
 2007         return 0;
 2008 }
 2009 
 2010 /* If a flow could catch both multicast and unicast packets,
 2011  * it won't fall into the multicast flow steering table and this rule
 2012  * could steal other multicast packets.
 2013  */
 2014 static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
 2015 {
 2016         struct ib_flow_spec_eth *eth_spec;
 2017 
 2018         if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
 2019             ib_attr->size < sizeof(struct ib_flow_attr) +
 2020             sizeof(struct ib_flow_spec_eth) ||
 2021             ib_attr->num_of_specs < 1)
 2022                 return false;
 2023 
 2024         eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1);
 2025         if (eth_spec->type != IB_FLOW_SPEC_ETH ||
 2026             eth_spec->size != sizeof(*eth_spec))
 2027                 return false;
 2028 
 2029         return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
 2030                is_multicast_ether_addr(eth_spec->val.dst_mac);
 2031 }
 2032 
 2033 static bool is_valid_attr(const struct ib_flow_attr *flow_attr)
 2034 {
 2035         union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
 2036         bool has_ipv4_spec = false;
 2037         bool eth_type_ipv4 = true;
 2038         unsigned int spec_index;
 2039 
 2040         /* Validate that ethertype is correct */
 2041         for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
 2042                 if (ib_spec->type == IB_FLOW_SPEC_ETH &&
 2043                     ib_spec->eth.mask.ether_type) {
 2044                         if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) &&
 2045                               ib_spec->eth.val.ether_type == htons(ETH_P_IP)))
 2046                                 eth_type_ipv4 = false;
 2047                 } else if (ib_spec->type == IB_FLOW_SPEC_IPV4) {
 2048                         has_ipv4_spec = true;
 2049                 }
 2050                 ib_spec = (void *)ib_spec + ib_spec->size;
 2051         }
 2052         return !has_ipv4_spec || eth_type_ipv4;
 2053 }
 2054 
 2055 static void put_flow_table(struct mlx5_ib_dev *dev,
 2056                            struct mlx5_ib_flow_prio *prio, bool ft_added)
 2057 {
 2058         prio->refcount -= !!ft_added;
 2059         if (!prio->refcount) {
 2060                 mlx5_destroy_flow_table(prio->flow_table);
 2061                 prio->flow_table = NULL;
 2062         }
 2063 }
 2064 
 2065 static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
 2066 {
 2067         struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
 2068         struct mlx5_ib_flow_handler *handler = container_of(flow_id,
 2069                                                           struct mlx5_ib_flow_handler,
 2070                                                           ibflow);
 2071         struct mlx5_ib_flow_handler *iter, *tmp;
 2072 
 2073         mutex_lock(&dev->flow_db.lock);
 2074 
 2075         list_for_each_entry_safe(iter, tmp, &handler->list, list) {
 2076                 mlx5_del_flow_rule(iter->rule);
 2077                 put_flow_table(dev, iter->prio, true);
 2078                 list_del(&iter->list);
 2079                 kfree(iter);
 2080         }
 2081 
 2082         mlx5_del_flow_rule(handler->rule);
 2083         put_flow_table(dev, handler->prio, true);
 2084         mutex_unlock(&dev->flow_db.lock);
 2085 
 2086         kfree(handler);
 2087 
 2088         return 0;
 2089 }
 2090 
 2091 static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
 2092 {
 2093         priority *= 2;
 2094         if (!dont_trap)
 2095                 priority++;
 2096         return priority;
 2097 }
 2098 
 2099 enum flow_table_type {
 2100         MLX5_IB_FT_RX,
 2101         MLX5_IB_FT_TX
 2102 };
 2103 
 2104 #define MLX5_FS_MAX_TYPES        10
 2105 #define MLX5_FS_MAX_ENTRIES      32000UL
 2106 static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 2107                                                 struct ib_flow_attr *flow_attr,
 2108                                                 enum flow_table_type ft_type)
 2109 {
 2110         bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
 2111         struct mlx5_flow_namespace *ns = NULL;
 2112         struct mlx5_ib_flow_prio *prio;
 2113         struct mlx5_flow_table *ft;
 2114         int num_entries;
 2115         int num_groups;
 2116         int priority;
 2117         int err = 0;
 2118 
 2119         if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
 2120                 if (flow_is_multicast_only(flow_attr) &&
 2121                     !dont_trap)
 2122                         priority = MLX5_IB_FLOW_MCAST_PRIO;
 2123                 else
 2124                         priority = ib_prio_to_core_prio(flow_attr->priority,
 2125                                                         dont_trap);
 2126                 ns = mlx5_get_flow_namespace(dev->mdev,
 2127                                              MLX5_FLOW_NAMESPACE_BYPASS);
 2128                 num_entries = MLX5_FS_MAX_ENTRIES;
 2129                 num_groups = MLX5_FS_MAX_TYPES;
 2130                 prio = &dev->flow_db.prios[priority];
 2131         } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
 2132                    flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
 2133                 ns = mlx5_get_flow_namespace(dev->mdev,
 2134                                              MLX5_FLOW_NAMESPACE_LEFTOVERS);
 2135                 build_leftovers_ft_param("bypass", &priority,
 2136                                          &num_entries,
 2137                                          &num_groups);
 2138                 prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
 2139         } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
 2140                 if (!MLX5_CAP_FLOWTABLE(dev->mdev,
 2141                                         allow_sniffer_and_nic_rx_shared_tir))
 2142                         return ERR_PTR(-ENOTSUPP);
 2143 
 2144                 ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ?
 2145                                              MLX5_FLOW_NAMESPACE_SNIFFER_RX :
 2146                                              MLX5_FLOW_NAMESPACE_SNIFFER_TX);
 2147 
 2148                 prio = &dev->flow_db.sniffer[ft_type];
 2149                 priority = 0;
 2150                 num_entries = 1;
 2151                 num_groups = 1;
 2152         }
 2153 
 2154         if (!ns)
 2155                 return ERR_PTR(-ENOTSUPP);
 2156 
 2157         ft = prio->flow_table;
 2158         if (!ft) {
 2159                 ft = mlx5_create_auto_grouped_flow_table(ns, priority, "bypass",
 2160                                                          num_entries,
 2161                                                          num_groups);
 2162 
 2163                 if (!IS_ERR(ft)) {
 2164                         prio->refcount = 0;
 2165                         prio->flow_table = ft;
 2166                 } else {
 2167                         err = PTR_ERR(ft);
 2168                 }
 2169         }
 2170 
 2171         return err ? ERR_PTR(err) : prio;
 2172 }
 2173 
 2174 static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 2175                                                      struct mlx5_ib_flow_prio *ft_prio,
 2176                                                      const struct ib_flow_attr *flow_attr,
 2177                                                      struct mlx5_flow_destination *dst)
 2178 {
 2179         struct mlx5_flow_table  *ft = ft_prio->flow_table;
 2180         struct mlx5_ib_flow_handler *handler;
 2181         struct mlx5_flow_spec *spec;
 2182         const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
 2183         unsigned int spec_index;
 2184         u32 action;
 2185         int err = 0;
 2186 
 2187         if (!is_valid_attr(flow_attr))
 2188                 return ERR_PTR(-EINVAL);
 2189 
 2190         spec = mlx5_vzalloc(sizeof(*spec));
 2191         handler = kzalloc(sizeof(*handler), GFP_KERNEL);
 2192         if (!handler || !spec) {
 2193                 err = -ENOMEM;
 2194                 goto free;
 2195         }
 2196 
 2197         INIT_LIST_HEAD(&handler->list);
 2198 
 2199         for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
 2200                 err = parse_flow_attr(spec->match_criteria,
 2201                                       spec->match_value, ib_flow);
 2202                 if (err < 0)
 2203                         goto free;
 2204 
 2205                 ib_flow += ((union ib_flow_spec *)ib_flow)->size;
 2206         }
 2207 
 2208         spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
 2209         action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
 2210                 MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
 2211         handler->rule = mlx5_add_flow_rule(ft, spec->match_criteria_enable,
 2212                                            spec->match_criteria,
 2213                                            spec->match_value,
 2214                                            action,
 2215                                            MLX5_FS_DEFAULT_FLOW_TAG,
 2216                                            dst);
 2217 
 2218         if (IS_ERR(handler->rule)) {
 2219                 err = PTR_ERR(handler->rule);
 2220                 goto free;
 2221         }
 2222 
 2223         ft_prio->refcount++;
 2224         handler->prio = ft_prio;
 2225 
 2226         ft_prio->flow_table = ft;
 2227 free:
 2228         if (err)
 2229                 kfree(handler);
 2230         kvfree(spec);
 2231         return err ? ERR_PTR(err) : handler;
 2232 }
 2233 
 2234 static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
 2235                                                           struct mlx5_ib_flow_prio *ft_prio,
 2236                                                           struct ib_flow_attr *flow_attr,
 2237                                                           struct mlx5_flow_destination *dst)
 2238 {
 2239         struct mlx5_ib_flow_handler *handler_dst = NULL;
 2240         struct mlx5_ib_flow_handler *handler = NULL;
 2241 
 2242         handler = create_flow_rule(dev, ft_prio, flow_attr, NULL);
 2243         if (!IS_ERR(handler)) {
 2244                 handler_dst = create_flow_rule(dev, ft_prio,
 2245                                                flow_attr, dst);
 2246                 if (IS_ERR(handler_dst)) {
 2247                         mlx5_del_flow_rule(handler->rule);
 2248                         ft_prio->refcount--;
 2249                         kfree(handler);
 2250                         handler = handler_dst;
 2251                 } else {
 2252                         list_add(&handler_dst->list, &handler->list);
 2253                 }
 2254         }
 2255 
 2256         return handler;
 2257 }
 2258 enum {
 2259         LEFTOVERS_MC,
 2260         LEFTOVERS_UC,
 2261 };
 2262 
 2263 static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
 2264                                                           struct mlx5_ib_flow_prio *ft_prio,
 2265                                                           struct ib_flow_attr *flow_attr,
 2266                                                           struct mlx5_flow_destination *dst)
 2267 {
 2268         struct mlx5_ib_flow_handler *handler_ucast = NULL;
 2269         struct mlx5_ib_flow_handler *handler = NULL;
 2270 
 2271         static struct {
 2272                 struct ib_flow_attr     flow_attr;
 2273                 struct ib_flow_spec_eth eth_flow;
 2274         } leftovers_specs[] = {
 2275                 [LEFTOVERS_MC] = {
 2276                         .flow_attr = {
 2277                                 .num_of_specs = 1,
 2278                                 .size = sizeof(leftovers_specs[0])
 2279                         },
 2280                         .eth_flow = {
 2281                                 .type = IB_FLOW_SPEC_ETH,
 2282                                 .size = sizeof(struct ib_flow_spec_eth),
 2283                                 .mask = {.dst_mac = {0x1} },
 2284                                 .val =  {.dst_mac = {0x1} }
 2285                         }
 2286                 },
 2287                 [LEFTOVERS_UC] = {
 2288                         .flow_attr = {
 2289                                 .num_of_specs = 1,
 2290                                 .size = sizeof(leftovers_specs[0])
 2291                         },
 2292                         .eth_flow = {
 2293                                 .type = IB_FLOW_SPEC_ETH,
 2294                                 .size = sizeof(struct ib_flow_spec_eth),
 2295                                 .mask = {.dst_mac = {0x1} },
 2296                                 .val = {.dst_mac = {} }
 2297                         }
 2298                 }
 2299         };
 2300 
 2301         handler = create_flow_rule(dev, ft_prio,
 2302                                    &leftovers_specs[LEFTOVERS_MC].flow_attr,
 2303                                    dst);
 2304         if (!IS_ERR(handler) &&
 2305             flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
 2306                 handler_ucast = create_flow_rule(dev, ft_prio,
 2307                                                  &leftovers_specs[LEFTOVERS_UC].flow_attr,
 2308                                                  dst);
 2309                 if (IS_ERR(handler_ucast)) {
 2310                         mlx5_del_flow_rule(handler->rule);
 2311                         ft_prio->refcount--;
 2312                         kfree(handler);
 2313                         handler = handler_ucast;
 2314                 } else {
 2315                         list_add(&handler_ucast->list, &handler->list);
 2316                 }
 2317         }
 2318 
 2319         return handler;
 2320 }
 2321 
 2322 static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
 2323                                                         struct mlx5_ib_flow_prio *ft_rx,
 2324                                                         struct mlx5_ib_flow_prio *ft_tx,
 2325                                                         struct mlx5_flow_destination *dst)
 2326 {
 2327         struct mlx5_ib_flow_handler *handler_rx;
 2328         struct mlx5_ib_flow_handler *handler_tx;
 2329         int err;
 2330         static const struct ib_flow_attr flow_attr  = {
 2331                 .num_of_specs = 0,
 2332                 .type = IB_FLOW_ATTR_SNIFFER,
 2333                 .size = sizeof(flow_attr)
 2334         };
 2335 
 2336         handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
 2337         if (IS_ERR(handler_rx)) {
 2338                 err = PTR_ERR(handler_rx);
 2339                 goto err;
 2340         }
 2341 
 2342         handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
 2343         if (IS_ERR(handler_tx)) {
 2344                 err = PTR_ERR(handler_tx);
 2345                 goto err_tx;
 2346         }
 2347 
 2348         list_add(&handler_tx->list, &handler_rx->list);
 2349 
 2350         return handler_rx;
 2351 
 2352 err_tx:
 2353         mlx5_del_flow_rule(handler_rx->rule);
 2354         ft_rx->refcount--;
 2355         kfree(handler_rx);
 2356 err:
 2357         return ERR_PTR(err);
 2358 }
 2359 
 2360 static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 2361                                            struct ib_flow_attr *flow_attr,
 2362                                            int domain,
 2363                                            struct ib_udata *udata)
 2364 {
 2365         struct mlx5_ib_dev *dev = to_mdev(qp->device);
 2366         struct mlx5_ib_qp *mqp = to_mqp(qp);
 2367         struct mlx5_ib_flow_handler *handler = NULL;
 2368         struct mlx5_flow_destination *dst = NULL;
 2369         struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
 2370         struct mlx5_ib_flow_prio *ft_prio;
 2371         struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
 2372         size_t min_ucmd_sz, required_ucmd_sz;
 2373         int err;
 2374 
 2375         if (udata && udata->inlen) {
 2376                 min_ucmd_sz = offsetofend(struct mlx5_ib_create_flow, reserved);
 2377                 if (udata->inlen < min_ucmd_sz)
 2378                         return ERR_PTR(-EOPNOTSUPP);
 2379 
 2380                 err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
 2381                 if (err)
 2382                         return ERR_PTR(err);
 2383 
 2384                 /* currently supports only one counters data */
 2385                 if (ucmd_hdr.ncounters_data > 1)
 2386                         return ERR_PTR(-EINVAL);
 2387 
 2388                 required_ucmd_sz = min_ucmd_sz +
 2389                         sizeof(struct mlx5_ib_flow_counters_data) *
 2390                         ucmd_hdr.ncounters_data;
 2391                 if (udata->inlen > required_ucmd_sz &&
 2392                     !ib_is_udata_cleared(udata, required_ucmd_sz,
 2393                                          udata->inlen - required_ucmd_sz))
 2394                         return ERR_PTR(-EOPNOTSUPP);
 2395 
 2396                 ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
 2397                 if (!ucmd)
 2398                         return ERR_PTR(-ENOMEM);
 2399 
 2400                 err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
 2401                 if (err)
 2402                         goto free_ucmd;
 2403         }
 2404 
 2405         if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
 2406                 err = -ENOMEM;
 2407                 goto free_ucmd;
 2408         }
 2409 
 2410         if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP) {
 2411                 err = -EINVAL;
 2412                 goto free_ucmd;
 2413         }
 2414 
 2415         dst = kzalloc(sizeof(*dst), GFP_KERNEL);
 2416         if (!dst) {
 2417                 err = -ENOMEM;
 2418                 goto free_ucmd;
 2419         }
 2420 
 2421         mutex_lock(&dev->flow_db.lock);
 2422 
 2423         ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
 2424         if (IS_ERR(ft_prio)) {
 2425                 err = PTR_ERR(ft_prio);
 2426                 goto unlock;
 2427         }
 2428         if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
 2429                 ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
 2430                 if (IS_ERR(ft_prio_tx)) {
 2431                         err = PTR_ERR(ft_prio_tx);
 2432                         ft_prio_tx = NULL;
 2433                         goto destroy_ft;
 2434                 }
 2435         }
 2436 
 2437         dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 2438         if (mqp->flags & MLX5_IB_QP_RSS)
 2439                 dst->tir_num = mqp->rss_qp.tirn;
 2440         else
 2441                 dst->tir_num = mqp->raw_packet_qp.rq.tirn;
 2442 
 2443         switch (flow_attr->type) {
 2444         case IB_FLOW_ATTR_NORMAL:
 2445                 if (mqp->flags & IB_QP_CREATE_SOURCE_QPN) {
 2446                         err = -EOPNOTSUPP;
 2447                         goto destroy_ft;
 2448                 }
 2449                 if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) {
 2450                         handler = create_dont_trap_rule(dev, ft_prio, flow_attr, dst);
 2451                 } else {
 2452                         handler = create_flow_rule(dev, ft_prio, flow_attr, dst);
 2453                 }
 2454                 break;
 2455         case IB_FLOW_ATTR_ALL_DEFAULT:
 2456         case IB_FLOW_ATTR_MC_DEFAULT:
 2457                 handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst);
 2458                 break;
 2459         case IB_FLOW_ATTR_SNIFFER:
 2460                 handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
 2461                 break;
 2462         default:
 2463                 err = -EINVAL;
 2464                 goto destroy_ft;
 2465         }
 2466 
 2467         if (IS_ERR(handler)) {
 2468                 err = PTR_ERR(handler);
 2469                 handler = NULL;
 2470                 goto destroy_ft;
 2471         }
 2472 
 2473         mutex_unlock(&dev->flow_db.lock);
 2474         kfree(dst);
 2475         kfree(ucmd);
 2476 
 2477         return &handler->ibflow;
 2478 
 2479 destroy_ft:
 2480         put_flow_table(dev, ft_prio, false);
 2481         if (ft_prio_tx)
 2482                 put_flow_table(dev, ft_prio_tx, false);
 2483 unlock:
 2484         mutex_unlock(&dev->flow_db.lock);
 2485         kfree(dst);
 2486 free_ucmd:
 2487         kfree(ucmd);
 2488         return ERR_PTR(err);
 2489 }
 2490 
 2491 static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 2492 {
 2493         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
 2494         int err;
 2495 
 2496         err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
 2497         if (err)
 2498                 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
 2499                              ibqp->qp_num, gid->raw);
 2500 
 2501         return err;
 2502 }
 2503 
 2504 static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 2505 {
 2506         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
 2507         int err;
 2508 
 2509         err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
 2510         if (err)
 2511                 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
 2512                              ibqp->qp_num, gid->raw);
 2513 
 2514         return err;
 2515 }
 2516 
 2517 static int init_node_data(struct mlx5_ib_dev *dev)
 2518 {
 2519         int err;
 2520 
 2521         err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
 2522         if (err)
 2523                 return err;
 2524 
 2525         return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
 2526 }
 2527 
 2528 static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
 2529                              char *buf)
 2530 {
 2531         struct mlx5_ib_dev *dev =
 2532                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 2533 
 2534         return sprintf(buf, "%lld\n", (long long)dev->mdev->priv.fw_pages);
 2535 }
 2536 
 2537 static ssize_t show_reg_pages(struct device *device,
 2538                               struct device_attribute *attr, char *buf)
 2539 {
 2540         struct mlx5_ib_dev *dev =
 2541                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 2542 
 2543         return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
 2544 }
 2545 
 2546 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
 2547                         char *buf)
 2548 {
 2549         struct mlx5_ib_dev *dev =
 2550                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 2551         return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
 2552 }
 2553 
 2554 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
 2555                         char *buf)
 2556 {
 2557         struct mlx5_ib_dev *dev =
 2558                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 2559         return sprintf(buf, "%x\n", dev->mdev->pdev->revision);
 2560 }
 2561 
 2562 static ssize_t show_board(struct device *device, struct device_attribute *attr,
 2563                           char *buf)
 2564 {
 2565         struct mlx5_ib_dev *dev =
 2566                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 2567         return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
 2568                        dev->mdev->board_id);
 2569 }
 2570 
 2571 static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
 2572 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
 2573 static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
 2574 static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
 2575 static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
 2576 
 2577 static struct device_attribute *mlx5_class_attributes[] = {
 2578         &dev_attr_hw_rev,
 2579         &dev_attr_hca_type,
 2580         &dev_attr_board_id,
 2581         &dev_attr_fw_pages,
 2582         &dev_attr_reg_pages,
 2583 };
 2584 
 2585 static void pkey_change_handler(struct work_struct *work)
 2586 {
 2587         struct mlx5_ib_port_resources *ports =
 2588                 container_of(work, struct mlx5_ib_port_resources,
 2589                              pkey_change_work);
 2590 
 2591         mutex_lock(&ports->devr->mutex);
 2592         mlx5_ib_gsi_pkey_change(ports->gsi);
 2593         mutex_unlock(&ports->devr->mutex);
 2594 }
 2595 
 2596 static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
 2597 {
 2598         struct mlx5_ib_qp *mqp;
 2599         struct mlx5_ib_cq *send_mcq, *recv_mcq;
 2600         struct mlx5_core_cq *mcq;
 2601         struct list_head cq_armed_list;
 2602         unsigned long flags_qp;
 2603         unsigned long flags_cq;
 2604         unsigned long flags;
 2605 
 2606         INIT_LIST_HEAD(&cq_armed_list);
 2607 
 2608         /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
 2609         spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
 2610         list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
 2611                 spin_lock_irqsave(&mqp->sq.lock, flags_qp);
 2612                 if (mqp->sq.tail != mqp->sq.head) {
 2613                         send_mcq = to_mcq(mqp->ibqp.send_cq);
 2614                         spin_lock_irqsave(&send_mcq->lock, flags_cq);
 2615                         if (send_mcq->mcq.comp &&
 2616                             mqp->ibqp.send_cq->comp_handler) {
 2617                                 if (!send_mcq->mcq.reset_notify_added) {
 2618                                         send_mcq->mcq.reset_notify_added = 1;
 2619                                         list_add_tail(&send_mcq->mcq.reset_notify,
 2620                                                       &cq_armed_list);
 2621                                 }
 2622                         }
 2623                         spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
 2624                 }
 2625                 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
 2626                 spin_lock_irqsave(&mqp->rq.lock, flags_qp);
 2627                 /* no handling is needed for SRQ */
 2628                 if (!mqp->ibqp.srq) {
 2629                         if (mqp->rq.tail != mqp->rq.head) {
 2630                                 recv_mcq = to_mcq(mqp->ibqp.recv_cq);
 2631                                 spin_lock_irqsave(&recv_mcq->lock, flags_cq);
 2632                                 if (recv_mcq->mcq.comp &&
 2633                                     mqp->ibqp.recv_cq->comp_handler) {
 2634                                         if (!recv_mcq->mcq.reset_notify_added) {
 2635                                                 recv_mcq->mcq.reset_notify_added = 1;
 2636                                                 list_add_tail(&recv_mcq->mcq.reset_notify,
 2637                                                               &cq_armed_list);
 2638                                         }
 2639                                 }
 2640                                 spin_unlock_irqrestore(&recv_mcq->lock,
 2641                                                        flags_cq);
 2642                         }
 2643                 }
 2644                 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
 2645         }
 2646         /*At that point all inflight post send were put to be executed as of we
 2647          * lock/unlock above locks Now need to arm all involved CQs.
 2648          */
 2649         list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
 2650                 mcq->comp(mcq, NULL);
 2651         }
 2652         spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
 2653 }
 2654 
 2655 static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
 2656                           enum mlx5_dev_event event, unsigned long param)
 2657 {
 2658         struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
 2659         struct ib_event ibev;
 2660         bool fatal = false;
 2661         u8 port = (u8)param;
 2662 
 2663         switch (event) {
 2664         case MLX5_DEV_EVENT_SYS_ERROR:
 2665                 ibev.event = IB_EVENT_DEVICE_FATAL;
 2666                 mlx5_ib_handle_internal_error(ibdev);
 2667                 fatal = true;
 2668                 break;
 2669 
 2670         case MLX5_DEV_EVENT_PORT_UP:
 2671         case MLX5_DEV_EVENT_PORT_DOWN:
 2672         case MLX5_DEV_EVENT_PORT_INITIALIZED:
 2673                 /* In RoCE, port up/down events are handled in
 2674                  * mlx5_netdev_event().
 2675                  */
 2676                 if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
 2677                         IB_LINK_LAYER_ETHERNET)
 2678                         return;
 2679 
 2680                 ibev.event = (event == MLX5_DEV_EVENT_PORT_UP) ?
 2681                              IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
 2682                 break;
 2683 
 2684         case MLX5_DEV_EVENT_LID_CHANGE:
 2685                 ibev.event = IB_EVENT_LID_CHANGE;
 2686                 break;
 2687 
 2688         case MLX5_DEV_EVENT_PKEY_CHANGE:
 2689                 ibev.event = IB_EVENT_PKEY_CHANGE;
 2690 
 2691                 schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
 2692                 break;
 2693 
 2694         case MLX5_DEV_EVENT_GUID_CHANGE:
 2695                 ibev.event = IB_EVENT_GID_CHANGE;
 2696                 break;
 2697 
 2698         case MLX5_DEV_EVENT_CLIENT_REREG:
 2699                 ibev.event = IB_EVENT_CLIENT_REREGISTER;
 2700                 break;
 2701 
 2702         default:
 2703                 /* unsupported event */
 2704                 return;
 2705         }
 2706 
 2707         ibev.device           = &ibdev->ib_dev;
 2708         ibev.element.port_num = port;
 2709 
 2710         if (!rdma_is_port_valid(&ibdev->ib_dev, port)) {
 2711                 mlx5_ib_warn(ibdev, "warning: event(%d) on port %d\n", event, port);
 2712                 return;
 2713         }
 2714 
 2715         if (ibdev->ib_active)
 2716                 ib_dispatch_event(&ibev);
 2717 
 2718         if (fatal)
 2719                 ibdev->ib_active = false;
 2720 }
 2721 
 2722 static void get_ext_port_caps(struct mlx5_ib_dev *dev)
 2723 {
 2724         int port;
 2725 
 2726         for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
 2727                 mlx5_query_ext_port_caps(dev, port);
 2728 }
 2729 
 2730 static int get_port_caps(struct mlx5_ib_dev *dev)
 2731 {
 2732         struct ib_device_attr *dprops = NULL;
 2733         struct ib_port_attr *pprops = NULL;
 2734         int err = -ENOMEM;
 2735         int port;
 2736         struct ib_udata uhw = {.inlen = 0, .outlen = 0};
 2737 
 2738         pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
 2739         if (!pprops)
 2740                 goto out;
 2741 
 2742         dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
 2743         if (!dprops)
 2744                 goto out;
 2745 
 2746         err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
 2747         if (err) {
 2748                 mlx5_ib_warn(dev, "query_device failed %d\n", err);
 2749                 goto out;
 2750         }
 2751 
 2752         for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
 2753                 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
 2754                 if (err) {
 2755                         mlx5_ib_warn(dev, "query_port %d failed %d\n",
 2756                                      port, err);
 2757                         break;
 2758                 }
 2759                 dev->mdev->port_caps[port - 1].pkey_table_len =
 2760                                                 dprops->max_pkeys;
 2761                 dev->mdev->port_caps[port - 1].gid_table_len =
 2762                                                 pprops->gid_tbl_len;
 2763                 mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
 2764                             dprops->max_pkeys, pprops->gid_tbl_len);
 2765         }
 2766 
 2767 out:
 2768         kfree(pprops);
 2769         kfree(dprops);
 2770 
 2771         return err;
 2772 }
 2773 
 2774 static void destroy_umrc_res(struct mlx5_ib_dev *dev)
 2775 {
 2776         int err;
 2777 
 2778         err = mlx5_mr_cache_cleanup(dev);
 2779         if (err)
 2780                 mlx5_ib_warn(dev, "mr cache cleanup failed\n");
 2781 
 2782         if (dev->umrc.qp)
 2783                 mlx5_ib_destroy_qp(dev->umrc.qp, NULL);
 2784         if (dev->umrc.cq)
 2785                 ib_free_cq(dev->umrc.cq);
 2786         if (dev->umrc.pd)
 2787                 ib_dealloc_pd(dev->umrc.pd);
 2788 }
 2789 
 2790 enum {
 2791         MAX_UMR_WR = 128,
 2792 };
 2793 
 2794 static int create_umr_res(struct mlx5_ib_dev *dev)
 2795 {
 2796         struct ib_qp_init_attr *init_attr = NULL;
 2797         struct ib_qp_attr *attr = NULL;
 2798         struct ib_pd *pd;
 2799         struct ib_cq *cq;
 2800         struct ib_qp *qp;
 2801         int ret;
 2802 
 2803         attr = kzalloc(sizeof(*attr), GFP_KERNEL);
 2804         init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
 2805         if (!attr || !init_attr) {
 2806                 ret = -ENOMEM;
 2807                 goto error_0;
 2808         }
 2809 
 2810         pd = ib_alloc_pd(&dev->ib_dev, 0);
 2811         if (IS_ERR(pd)) {
 2812                 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
 2813                 ret = PTR_ERR(pd);
 2814                 goto error_0;
 2815         }
 2816 
 2817         cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
 2818         if (IS_ERR(cq)) {
 2819                 mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
 2820                 ret = PTR_ERR(cq);
 2821                 goto error_2;
 2822         }
 2823 
 2824         init_attr->send_cq = cq;
 2825         init_attr->recv_cq = cq;
 2826         init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
 2827         init_attr->cap.max_send_wr = MAX_UMR_WR;
 2828         init_attr->cap.max_send_sge = 1;
 2829         init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
 2830         init_attr->port_num = 1;
 2831         qp = mlx5_ib_create_qp(pd, init_attr, NULL);
 2832         if (IS_ERR(qp)) {
 2833                 mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
 2834                 ret = PTR_ERR(qp);
 2835                 goto error_3;
 2836         }
 2837         qp->device     = &dev->ib_dev;
 2838         qp->real_qp    = qp;
 2839         qp->uobject    = NULL;
 2840         qp->qp_type    = MLX5_IB_QPT_REG_UMR;
 2841 
 2842         attr->qp_state = IB_QPS_INIT;
 2843         attr->port_num = 1;
 2844         ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
 2845                                 IB_QP_PORT, NULL);
 2846         if (ret) {
 2847                 mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
 2848                 goto error_4;
 2849         }
 2850 
 2851         memset(attr, 0, sizeof(*attr));
 2852         attr->qp_state = IB_QPS_RTR;
 2853         attr->path_mtu = IB_MTU_256;
 2854 
 2855         ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
 2856         if (ret) {
 2857                 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
 2858                 goto error_4;
 2859         }
 2860 
 2861         memset(attr, 0, sizeof(*attr));
 2862         attr->qp_state = IB_QPS_RTS;
 2863         ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
 2864         if (ret) {
 2865                 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
 2866                 goto error_4;
 2867         }
 2868 
 2869         dev->umrc.qp = qp;
 2870         dev->umrc.cq = cq;
 2871         dev->umrc.pd = pd;
 2872 
 2873         sema_init(&dev->umrc.sem, MAX_UMR_WR);
 2874         ret = mlx5_mr_cache_init(dev);
 2875         if (ret) {
 2876                 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
 2877                 goto error_4;
 2878         }
 2879 
 2880         kfree(attr);
 2881         kfree(init_attr);
 2882 
 2883         return 0;
 2884 
 2885 error_4:
 2886         mlx5_ib_destroy_qp(qp, NULL);
 2887         dev->umrc.qp = NULL;
 2888 
 2889 error_3:
 2890         ib_free_cq(cq);
 2891         dev->umrc.cq = NULL;
 2892 
 2893 error_2:
 2894         ib_dealloc_pd(pd);
 2895         dev->umrc.pd = NULL;
 2896 
 2897 error_0:
 2898         kfree(attr);
 2899         kfree(init_attr);
 2900         return ret;
 2901 }
 2902 
 2903 static int create_dev_resources(struct mlx5_ib_resources *devr)
 2904 {
 2905         struct ib_srq_init_attr attr;
 2906         struct mlx5_ib_dev *dev;
 2907         struct ib_device *ibdev;
 2908         struct ib_cq_init_attr cq_attr = {.cqe = 1};
 2909         int port;
 2910         int ret = 0;
 2911 
 2912         dev = container_of(devr, struct mlx5_ib_dev, devr);
 2913         ibdev = &dev->ib_dev;
 2914 
 2915         mutex_init(&devr->mutex);
 2916 
 2917         devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd);
 2918         if (!devr->p0)
 2919                 return -ENOMEM;
 2920 
 2921         devr->p0->device  = ibdev;
 2922         devr->p0->uobject = NULL;
 2923         atomic_set(&devr->p0->usecnt, 0);
 2924 
 2925         ret = mlx5_ib_alloc_pd(devr->p0, NULL);
 2926         if (ret)
 2927                 goto error0;
 2928 
 2929         devr->c0 = rdma_zalloc_drv_obj(ibdev, ib_cq);
 2930         if (!devr->c0) {
 2931                 ret = -ENOMEM;
 2932                 goto error1;
 2933         }
 2934 
 2935         devr->c0->device = &dev->ib_dev;
 2936         atomic_set(&devr->c0->usecnt, 0);
 2937 
 2938         ret = mlx5_ib_create_cq(devr->c0, &cq_attr, NULL);
 2939         if (ret)
 2940                 goto err_create_cq;
 2941 
 2942         devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
 2943         if (IS_ERR(devr->x0)) {
 2944                 ret = PTR_ERR(devr->x0);
 2945                 goto error2;
 2946         }
 2947         devr->x0->device = &dev->ib_dev;
 2948         devr->x0->inode = NULL;
 2949         atomic_set(&devr->x0->usecnt, 0);
 2950         mutex_init(&devr->x0->tgt_qp_mutex);
 2951         INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
 2952 
 2953         devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
 2954         if (IS_ERR(devr->x1)) {
 2955                 ret = PTR_ERR(devr->x1);
 2956                 goto error3;
 2957         }
 2958         devr->x1->device = &dev->ib_dev;
 2959         devr->x1->inode = NULL;
 2960         atomic_set(&devr->x1->usecnt, 0);
 2961         mutex_init(&devr->x1->tgt_qp_mutex);
 2962         INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
 2963 
 2964         memset(&attr, 0, sizeof(attr));
 2965         attr.attr.max_sge = 1;
 2966         attr.attr.max_wr = 1;
 2967         attr.srq_type = IB_SRQT_XRC;
 2968         attr.ext.cq = devr->c0;
 2969         attr.ext.xrc.xrcd = devr->x0;
 2970 
 2971         devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq);
 2972         if (!devr->s0) {
 2973                 ret = -ENOMEM;
 2974                 goto error4;
 2975         }
 2976 
 2977         devr->s0->device        = &dev->ib_dev;
 2978         devr->s0->pd            = devr->p0;
 2979         devr->s0->srq_type      = IB_SRQT_XRC;
 2980         devr->s0->ext.xrc.xrcd  = devr->x0;
 2981         devr->s0->ext.cq        = devr->c0;
 2982         ret = mlx5_ib_create_srq(devr->s0, &attr, NULL);
 2983         if (ret)
 2984                 goto err_create;
 2985 
 2986         atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
 2987         atomic_inc(&devr->s0->ext.cq->usecnt);
 2988         atomic_inc(&devr->p0->usecnt);
 2989         atomic_set(&devr->s0->usecnt, 0);
 2990 
 2991         memset(&attr, 0, sizeof(attr));
 2992         attr.attr.max_sge = 1;
 2993         attr.attr.max_wr = 1;
 2994         attr.srq_type = IB_SRQT_BASIC;
 2995         devr->s1 = rdma_zalloc_drv_obj(ibdev, ib_srq);
 2996         if (!devr->s1) {
 2997                 ret = -ENOMEM;
 2998                 goto error5;
 2999         }
 3000 
 3001         devr->s1->device        = &dev->ib_dev;
 3002         devr->s1->pd            = devr->p0;
 3003         devr->s1->srq_type      = IB_SRQT_BASIC;
 3004         devr->s1->ext.cq        = devr->c0;
 3005 
 3006         ret = mlx5_ib_create_srq(devr->s1, &attr, NULL);
 3007         if (ret)
 3008                 goto error6;
 3009 
 3010         atomic_inc(&devr->p0->usecnt);
 3011         atomic_set(&devr->s1->usecnt, 0);
 3012 
 3013         for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) {
 3014                 INIT_WORK(&devr->ports[port].pkey_change_work,
 3015                           pkey_change_handler);
 3016                 devr->ports[port].devr = devr;
 3017         }
 3018 
 3019         return 0;
 3020 
 3021 error6:
 3022         kfree(devr->s1);
 3023 error5:
 3024         mlx5_ib_destroy_srq(devr->s0, NULL);
 3025 err_create:
 3026         kfree(devr->s0);
 3027 error4:
 3028         mlx5_ib_dealloc_xrcd(devr->x1, NULL);
 3029 error3:
 3030         mlx5_ib_dealloc_xrcd(devr->x0, NULL);
 3031 error2:
 3032         mlx5_ib_destroy_cq(devr->c0, NULL);
 3033 err_create_cq:
 3034         kfree(devr->c0);
 3035 error1:
 3036         mlx5_ib_dealloc_pd(devr->p0, NULL);
 3037 error0:
 3038         kfree(devr->p0);
 3039         return ret;
 3040 }
 3041 
 3042 static void destroy_dev_resources(struct mlx5_ib_resources *devr)
 3043 {
 3044         int port;
 3045 
 3046         mlx5_ib_destroy_srq(devr->s1, NULL);
 3047         kfree(devr->s1);
 3048         mlx5_ib_destroy_srq(devr->s0, NULL);
 3049         kfree(devr->s0);
 3050         mlx5_ib_dealloc_xrcd(devr->x0, NULL);
 3051         mlx5_ib_dealloc_xrcd(devr->x1, NULL);
 3052         mlx5_ib_destroy_cq(devr->c0, NULL);
 3053         kfree(devr->c0);
 3054         mlx5_ib_dealloc_pd(devr->p0, NULL);
 3055         kfree(devr->p0);
 3056 
 3057         /* Make sure no change P_Key work items are still executing */
 3058         for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
 3059                 cancel_work_sync(&devr->ports[port].pkey_change_work);
 3060 }
 3061 
 3062 static u32 get_core_cap_flags(struct ib_device *ibdev)
 3063 {
 3064         struct mlx5_ib_dev *dev = to_mdev(ibdev);
 3065         enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
 3066         u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
 3067         u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
 3068         u32 ret = 0;
 3069 
 3070         if (ll == IB_LINK_LAYER_INFINIBAND)
 3071                 return RDMA_CORE_PORT_IBA_IB;
 3072 
 3073         if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
 3074                 return 0;
 3075 
 3076         if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
 3077                 return 0;
 3078 
 3079         if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
 3080                 ret |= RDMA_CORE_PORT_IBA_ROCE;
 3081 
 3082         if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
 3083                 ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
 3084 
 3085         return ret;
 3086 }
 3087 
 3088 static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
 3089                                struct ib_port_immutable *immutable)
 3090 {
 3091         struct ib_port_attr attr;
 3092         struct mlx5_ib_dev *dev = to_mdev(ibdev);
 3093         enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
 3094         int err;
 3095 
 3096         err = mlx5_ib_query_port(ibdev, port_num, &attr);
 3097         if (err)
 3098                 return err;
 3099 
 3100         immutable->pkey_tbl_len = attr.pkey_tbl_len;
 3101         immutable->gid_tbl_len = attr.gid_tbl_len;
 3102         immutable->core_cap_flags = get_core_cap_flags(ibdev);
 3103         if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
 3104                 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 3105 
 3106         return 0;
 3107 }
 3108 
 3109 static void get_dev_fw_str(struct ib_device *ibdev, char *str,
 3110                            size_t str_len)
 3111 {
 3112         struct mlx5_ib_dev *dev =
 3113                 container_of(ibdev, struct mlx5_ib_dev, ib_dev);
 3114         snprintf(str, str_len, "%d.%d.%04d", fw_rev_maj(dev->mdev),
 3115                        fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
 3116 }
 3117 
 3118 static int mlx5_roce_lag_init(struct mlx5_ib_dev *dev)
 3119 {
 3120         return 0;
 3121 }
 3122 
 3123 static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev)
 3124 {
 3125 }
 3126 
 3127 static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev)
 3128 {
 3129         if (dev->roce.nb.notifier_call) {
 3130                 unregister_netdevice_notifier(&dev->roce.nb);
 3131                 dev->roce.nb.notifier_call = NULL;
 3132         }
 3133 }
 3134 
 3135 static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
 3136 {
 3137         VNET_ITERATOR_DECL(vnet_iter);
 3138         struct ifnet *idev;
 3139         int err;
 3140 
 3141         /* Check if mlx5en net device already exists */
 3142         VNET_LIST_RLOCK();
 3143         VNET_FOREACH(vnet_iter) {
 3144                 IFNET_RLOCK();
 3145                 CURVNET_SET_QUIET(vnet_iter);
 3146                 CK_STAILQ_FOREACH(idev, &V_ifnet, if_link) {
 3147                         /* check if network interface belongs to mlx5en */
 3148                         if (!mlx5_netdev_match(idev, dev->mdev, "mce"))
 3149                                 continue;
 3150                         write_lock(&dev->roce.netdev_lock);
 3151                         dev->roce.netdev = idev;
 3152                         write_unlock(&dev->roce.netdev_lock);
 3153                 }
 3154                 CURVNET_RESTORE();
 3155                 IFNET_RUNLOCK();
 3156         }
 3157         VNET_LIST_RUNLOCK();
 3158 
 3159         dev->roce.nb.notifier_call = mlx5_netdev_event;
 3160         err = register_netdevice_notifier(&dev->roce.nb);
 3161         if (err) {
 3162                 dev->roce.nb.notifier_call = NULL;
 3163                 return err;
 3164         }
 3165 
 3166         if (MLX5_CAP_GEN(dev->mdev, roce)) {
 3167                 err = mlx5_nic_vport_enable_roce(dev->mdev);
 3168                 if (err)
 3169                         goto err_unregister_netdevice_notifier;
 3170         }
 3171 
 3172         err = mlx5_roce_lag_init(dev);
 3173         if (err)
 3174                 goto err_disable_roce;
 3175 
 3176         return 0;
 3177 
 3178 err_disable_roce:
 3179         if (MLX5_CAP_GEN(dev->mdev, roce))
 3180                 mlx5_nic_vport_disable_roce(dev->mdev);
 3181 
 3182 err_unregister_netdevice_notifier:
 3183         mlx5_remove_roce_notifier(dev);
 3184         return err;
 3185 }
 3186 
 3187 static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
 3188 {
 3189         mlx5_roce_lag_cleanup(dev);
 3190         if (MLX5_CAP_GEN(dev->mdev, roce))
 3191                 mlx5_nic_vport_disable_roce(dev->mdev);
 3192 }
 3193 
 3194 static void mlx5_ib_dealloc_q_port_counter(struct mlx5_ib_dev *dev, u8 port_num)
 3195 {
 3196         mlx5_vport_dealloc_q_counter(dev->mdev,
 3197                                      MLX5_INTERFACE_PROTOCOL_IB,
 3198                                      dev->port[port_num].q_cnt_id);
 3199         dev->port[port_num].q_cnt_id = 0;
 3200 }
 3201 
 3202 static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
 3203 {
 3204         unsigned int i;
 3205 
 3206         for (i = 0; i < dev->num_ports; i++)
 3207                 mlx5_ib_dealloc_q_port_counter(dev, i);
 3208 }
 3209 
 3210 static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
 3211 {
 3212         int i;
 3213         int ret;
 3214 
 3215         for (i = 0; i < dev->num_ports; i++) {
 3216                 ret = mlx5_vport_alloc_q_counter(dev->mdev,
 3217                                                  MLX5_INTERFACE_PROTOCOL_IB,
 3218                                                  &dev->port[i].q_cnt_id);
 3219                 if (ret) {
 3220                         mlx5_ib_warn(dev,
 3221                                      "couldn't allocate queue counter for port %d, err %d\n",
 3222                                      i + 1, ret);
 3223                         goto dealloc_counters;
 3224                 }
 3225         }
 3226 
 3227         return 0;
 3228 
 3229 dealloc_counters:
 3230         while (--i >= 0)
 3231                 mlx5_ib_dealloc_q_port_counter(dev, i);
 3232 
 3233         return ret;
 3234 }
 3235 
 3236 static const char * const names[] = {
 3237         "rx_write_requests",
 3238         "rx_read_requests",
 3239         "rx_atomic_requests",
 3240         "out_of_buffer",
 3241         "out_of_sequence",
 3242         "duplicate_request",
 3243         "rnr_nak_retry_err",
 3244         "packet_seq_err",
 3245         "implied_nak_seq_err",
 3246         "local_ack_timeout_err",
 3247 };
 3248 
 3249 static const size_t stats_offsets[] = {
 3250         MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests),
 3251         MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests),
 3252         MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests),
 3253         MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer),
 3254         MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence),
 3255         MLX5_BYTE_OFF(query_q_counter_out, duplicate_request),
 3256         MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err),
 3257         MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err),
 3258         MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err),
 3259         MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err),
 3260 };
 3261 
 3262 static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
 3263                                                     u8 port_num)
 3264 {
 3265         BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets));
 3266 
 3267         /* We support only per port stats */
 3268         if (port_num == 0)
 3269                 return NULL;
 3270 
 3271         return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names),
 3272                                           RDMA_HW_STATS_DEFAULT_LIFESPAN);
 3273 }
 3274 
 3275 static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
 3276                                 struct rdma_hw_stats *stats,
 3277                                 u8 port, int index)
 3278 {
 3279         struct mlx5_ib_dev *dev = to_mdev(ibdev);
 3280         int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
 3281         void *out;
 3282         __be32 val;
 3283         int ret;
 3284         int i;
 3285 
 3286         if (!port || !stats)
 3287                 return -ENOSYS;
 3288 
 3289         out = mlx5_vzalloc(outlen);
 3290         if (!out)
 3291                 return -ENOMEM;
 3292 
 3293         ret = mlx5_vport_query_q_counter(dev->mdev,
 3294                                         dev->port[port - 1].q_cnt_id, 0,
 3295                                         out, outlen);
 3296         if (ret)
 3297                 goto free;
 3298 
 3299         for (i = 0; i < ARRAY_SIZE(names); i++) {
 3300                 val = *(__be32 *)(out + stats_offsets[i]);
 3301                 stats->value[i] = (u64)be32_to_cpu(val);
 3302         }
 3303 free:
 3304         kvfree(out);
 3305         return ARRAY_SIZE(names);
 3306 }
 3307 
 3308 static int mlx5_ib_stage_bfreg_init(struct mlx5_ib_dev *dev)
 3309 {
 3310         int err;
 3311 
 3312         err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
 3313         if (err)
 3314                 return err;
 3315 
 3316         err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true);
 3317         if (err) {
 3318                 mlx5_free_bfreg(dev->mdev, &dev->bfreg);
 3319                 return err;
 3320         }
 3321 
 3322         err = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false);
 3323         if (err) {
 3324                 mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
 3325                 mlx5_free_bfreg(dev->mdev, &dev->bfreg);
 3326         }
 3327 
 3328         return err;
 3329 }
 3330 
 3331 static void mlx5_ib_stage_bfreg_cleanup(struct mlx5_ib_dev *dev)
 3332 {
 3333         mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg);
 3334         mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
 3335         mlx5_free_bfreg(dev->mdev, &dev->bfreg);
 3336 }
 3337 
 3338 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 3339 {
 3340         struct mlx5_ib_dev *dev;
 3341         enum rdma_link_layer ll;
 3342         int port_type_cap;
 3343         int err;
 3344         int i;
 3345 
 3346         port_type_cap = MLX5_CAP_GEN(mdev, port_type);
 3347         ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 3348 
 3349         dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
 3350         if (!dev)
 3351                 return NULL;
 3352 
 3353         dev->mdev = mdev;
 3354 
 3355         dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
 3356                             GFP_KERNEL);
 3357         if (!dev->port)
 3358                 goto err_dealloc;
 3359 
 3360         rwlock_init(&dev->roce.netdev_lock);
 3361         err = get_port_caps(dev);
 3362         if (err)
 3363                 goto err_free_port;
 3364 
 3365         if (mlx5_use_mad_ifc(dev))
 3366                 get_ext_port_caps(dev);
 3367 
 3368         MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
 3369 
 3370         mutex_init(&dev->lb_mutex);
 3371 
 3372         INIT_IB_DEVICE_OPS(&dev->ib_dev.ops, mlx5, MLX5);
 3373         snprintf(dev->ib_dev.name, IB_DEVICE_NAME_MAX, "mlx5_%d", device_get_unit(mdev->pdev->dev.bsddev));
 3374         dev->ib_dev.owner               = THIS_MODULE;
 3375         dev->ib_dev.node_type           = RDMA_NODE_IB_CA;
 3376         dev->ib_dev.local_dma_lkey      = 0 /* not supported for now */;
 3377         dev->num_ports          = MLX5_CAP_GEN(mdev, num_ports);
 3378         dev->ib_dev.phys_port_cnt     = dev->num_ports;
 3379         dev->ib_dev.num_comp_vectors    =
 3380                 dev->mdev->priv.eq_table.num_comp_vectors;
 3381         dev->ib_dev.dma_device  = &mdev->pdev->dev;
 3382 
 3383         dev->ib_dev.uverbs_abi_ver      = MLX5_IB_UVERBS_ABI_VERSION;
 3384         dev->ib_dev.uverbs_cmd_mask     =
 3385                 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
 3386                 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
 3387                 (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
 3388                 (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
 3389                 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
 3390                 (1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
 3391                 (1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
 3392                 (1ull << IB_USER_VERBS_CMD_REG_MR)              |
 3393                 (1ull << IB_USER_VERBS_CMD_REREG_MR)            |
 3394                 (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
 3395                 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
 3396                 (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
 3397                 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
 3398                 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
 3399                 (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
 3400                 (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
 3401                 (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
 3402                 (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
 3403                 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
 3404                 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
 3405                 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
 3406                 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
 3407                 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
 3408                 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
 3409                 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)         |
 3410                 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
 3411         dev->ib_dev.uverbs_ex_cmd_mask =
 3412                 (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE)     |
 3413                 (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ)        |
 3414                 (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
 3415 
 3416         dev->ib_dev.query_device        = mlx5_ib_query_device;
 3417         dev->ib_dev.query_port          = mlx5_ib_query_port;
 3418         dev->ib_dev.get_link_layer      = mlx5_ib_port_link_layer;
 3419         if (ll == IB_LINK_LAYER_ETHERNET)
 3420                 dev->ib_dev.get_netdev  = mlx5_ib_get_netdev;
 3421         dev->ib_dev.query_gid           = mlx5_ib_query_gid;
 3422         dev->ib_dev.add_gid             = mlx5_ib_add_gid;
 3423         dev->ib_dev.del_gid             = mlx5_ib_del_gid;
 3424         dev->ib_dev.query_pkey          = mlx5_ib_query_pkey;
 3425         dev->ib_dev.modify_device       = mlx5_ib_modify_device;
 3426         dev->ib_dev.modify_port         = mlx5_ib_modify_port;
 3427         dev->ib_dev.alloc_ucontext      = mlx5_ib_alloc_ucontext;
 3428         dev->ib_dev.dealloc_ucontext    = mlx5_ib_dealloc_ucontext;
 3429         dev->ib_dev.mmap                = mlx5_ib_mmap;
 3430         dev->ib_dev.mmap_free           = mlx5_ib_mmap_free;
 3431         dev->ib_dev.alloc_pd            = mlx5_ib_alloc_pd;
 3432         dev->ib_dev.dealloc_pd          = mlx5_ib_dealloc_pd;
 3433         dev->ib_dev.create_ah           = mlx5_ib_create_ah;
 3434         dev->ib_dev.query_ah            = mlx5_ib_query_ah;
 3435         dev->ib_dev.destroy_ah          = mlx5_ib_destroy_ah;
 3436         dev->ib_dev.create_srq          = mlx5_ib_create_srq;
 3437         dev->ib_dev.modify_srq          = mlx5_ib_modify_srq;
 3438         dev->ib_dev.query_srq           = mlx5_ib_query_srq;
 3439         dev->ib_dev.destroy_srq         = mlx5_ib_destroy_srq;
 3440         dev->ib_dev.post_srq_recv       = mlx5_ib_post_srq_recv;
 3441         dev->ib_dev.create_qp           = mlx5_ib_create_qp;
 3442         dev->ib_dev.modify_qp           = mlx5_ib_modify_qp;
 3443         dev->ib_dev.query_qp            = mlx5_ib_query_qp;
 3444         dev->ib_dev.destroy_qp          = mlx5_ib_destroy_qp;
 3445         dev->ib_dev.post_send           = mlx5_ib_post_send;
 3446         dev->ib_dev.post_recv           = mlx5_ib_post_recv;
 3447         dev->ib_dev.create_cq           = mlx5_ib_create_cq;
 3448         dev->ib_dev.modify_cq           = mlx5_ib_modify_cq;
 3449         dev->ib_dev.resize_cq           = mlx5_ib_resize_cq;
 3450         dev->ib_dev.destroy_cq          = mlx5_ib_destroy_cq;
 3451         dev->ib_dev.poll_cq             = mlx5_ib_poll_cq;
 3452         dev->ib_dev.req_notify_cq       = mlx5_ib_arm_cq;
 3453         dev->ib_dev.get_dma_mr          = mlx5_ib_get_dma_mr;
 3454         dev->ib_dev.reg_user_mr         = mlx5_ib_reg_user_mr;
 3455         dev->ib_dev.rereg_user_mr       = mlx5_ib_rereg_user_mr;
 3456         dev->ib_dev.dereg_mr            = mlx5_ib_dereg_mr;
 3457         dev->ib_dev.attach_mcast        = mlx5_ib_mcg_attach;
 3458         dev->ib_dev.detach_mcast        = mlx5_ib_mcg_detach;
 3459         dev->ib_dev.process_mad         = mlx5_ib_process_mad;
 3460         dev->ib_dev.alloc_mr            = mlx5_ib_alloc_mr;
 3461         dev->ib_dev.map_mr_sg           = mlx5_ib_map_mr_sg;
 3462         dev->ib_dev.check_mr_status     = mlx5_ib_check_mr_status;
 3463         dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
 3464         dev->ib_dev.get_dev_fw_str      = get_dev_fw_str;
 3465         if (mlx5_core_is_pf(mdev)) {
 3466                 dev->ib_dev.get_vf_config       = mlx5_ib_get_vf_config;
 3467                 dev->ib_dev.set_vf_link_state   = mlx5_ib_set_vf_link_state;
 3468                 dev->ib_dev.get_vf_stats        = mlx5_ib_get_vf_stats;
 3469                 dev->ib_dev.set_vf_guid         = mlx5_ib_set_vf_guid;
 3470         }
 3471 
 3472         dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext;
 3473 
 3474         mlx5_ib_internal_fill_odp_caps(dev);
 3475 
 3476         if (MLX5_CAP_GEN(mdev, imaicl)) {
 3477                 dev->ib_dev.alloc_mw            = mlx5_ib_alloc_mw;
 3478                 dev->ib_dev.dealloc_mw          = mlx5_ib_dealloc_mw;
 3479                 dev->ib_dev.uverbs_cmd_mask |=
 3480                         (1ull << IB_USER_VERBS_CMD_ALLOC_MW)    |
 3481                         (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
 3482         }
 3483 
 3484         if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
 3485             MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
 3486                 dev->ib_dev.get_hw_stats        = mlx5_ib_get_hw_stats;
 3487                 dev->ib_dev.alloc_hw_stats      = mlx5_ib_alloc_hw_stats;
 3488         }
 3489 
 3490         if (MLX5_CAP_GEN(mdev, xrc)) {
 3491                 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
 3492                 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
 3493                 dev->ib_dev.uverbs_cmd_mask |=
 3494                         (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
 3495                         (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
 3496         }
 3497 
 3498         if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
 3499             IB_LINK_LAYER_ETHERNET) {
 3500                 dev->ib_dev.create_flow = mlx5_ib_create_flow;
 3501                 dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
 3502                 dev->ib_dev.create_wq    = mlx5_ib_create_wq;
 3503                 dev->ib_dev.modify_wq    = mlx5_ib_modify_wq;
 3504                 dev->ib_dev.destroy_wq   = mlx5_ib_destroy_wq;
 3505                 dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
 3506                 dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
 3507                 dev->ib_dev.uverbs_ex_cmd_mask |=
 3508                         (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
 3509                         (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) |
 3510                         (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
 3511                         (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
 3512                         (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
 3513                         (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
 3514                         (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
 3515         }
 3516         err = init_node_data(dev);
 3517         if (err)
 3518                 goto err_free_port;
 3519 
 3520         mutex_init(&dev->flow_db.lock);
 3521         mutex_init(&dev->cap_mask_mutex);
 3522         INIT_LIST_HEAD(&dev->qp_list);
 3523         spin_lock_init(&dev->reset_flow_resource_lock);
 3524 
 3525         if (ll == IB_LINK_LAYER_ETHERNET) {
 3526                 err = mlx5_enable_roce(dev);
 3527                 if (err)
 3528                         goto err_free_port;
 3529         }
 3530 
 3531         err = create_dev_resources(&dev->devr);
 3532         if (err)
 3533                 goto err_disable_roce;
 3534 
 3535         err = mlx5_ib_odp_init_one(dev);
 3536         if (err)
 3537                 goto err_rsrc;
 3538 
 3539         err = mlx5_ib_alloc_q_counters(dev);
 3540         if (err)
 3541                 goto err_odp;
 3542 
 3543         err = mlx5_ib_stage_bfreg_init(dev);
 3544         if (err)
 3545                 goto err_q_cnt;
 3546 
 3547         err = ib_register_device(&dev->ib_dev, NULL);
 3548         if (err)
 3549                 goto err_bfreg;
 3550 
 3551         err = create_umr_res(dev);
 3552         if (err)
 3553                 goto err_dev;
 3554 
 3555         for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
 3556                 err = device_create_file(&dev->ib_dev.dev,
 3557                                          mlx5_class_attributes[i]);
 3558                 if (err)
 3559                         goto err_umrc;
 3560         }
 3561 
 3562         err = mlx5_ib_init_congestion(dev);
 3563         if (err)
 3564                 goto err_umrc;
 3565 
 3566         dev->ib_active = true;
 3567 
 3568         return dev;
 3569 
 3570 err_umrc:
 3571         destroy_umrc_res(dev);
 3572 
 3573 err_dev:
 3574         ib_unregister_device(&dev->ib_dev);
 3575 
 3576 err_bfreg:
 3577         mlx5_ib_stage_bfreg_cleanup(dev);
 3578 
 3579 err_q_cnt:
 3580         mlx5_ib_dealloc_q_counters(dev);
 3581 
 3582 err_odp:
 3583         mlx5_ib_odp_remove_one(dev);
 3584 
 3585 err_rsrc:
 3586         destroy_dev_resources(&dev->devr);
 3587 
 3588 err_disable_roce:
 3589         if (ll == IB_LINK_LAYER_ETHERNET) {
 3590                 mlx5_disable_roce(dev);
 3591                 mlx5_remove_roce_notifier(dev);
 3592         }
 3593 
 3594 err_free_port:
 3595         kfree(dev->port);
 3596 
 3597 err_dealloc:
 3598         ib_dealloc_device((struct ib_device *)dev);
 3599 
 3600         return NULL;
 3601 }
 3602 
 3603 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
 3604 {
 3605         struct mlx5_ib_dev *dev = context;
 3606         enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
 3607 
 3608         mlx5_ib_cleanup_congestion(dev);
 3609         mlx5_remove_roce_notifier(dev);
 3610         ib_unregister_device(&dev->ib_dev);
 3611         mlx5_ib_stage_bfreg_cleanup(dev);
 3612         mlx5_ib_dealloc_q_counters(dev);
 3613         destroy_umrc_res(dev);
 3614         mlx5_ib_odp_remove_one(dev);
 3615         destroy_dev_resources(&dev->devr);
 3616         if (ll == IB_LINK_LAYER_ETHERNET)
 3617                 mlx5_disable_roce(dev);
 3618         kfree(dev->port);
 3619         ib_dealloc_device(&dev->ib_dev);
 3620 }
 3621 
 3622 static struct mlx5_interface mlx5_ib_interface = {
 3623         .add            = mlx5_ib_add,
 3624         .remove         = mlx5_ib_remove,
 3625         .event          = mlx5_ib_event,
 3626         .protocol       = MLX5_INTERFACE_PROTOCOL_IB,
 3627 };
 3628 
 3629 static int __init mlx5_ib_init(void)
 3630 {
 3631         int err;
 3632 
 3633         err = mlx5_ib_odp_init();
 3634         if (err)
 3635                 return err;
 3636 
 3637         err = mlx5_register_interface(&mlx5_ib_interface);
 3638         if (err)
 3639                 goto clean_odp;
 3640 
 3641         return err;
 3642 
 3643 clean_odp:
 3644         mlx5_ib_odp_cleanup();
 3645         return err;
 3646 }
 3647 
 3648 static void __exit mlx5_ib_cleanup(void)
 3649 {
 3650         mlx5_unregister_interface(&mlx5_ib_interface);
 3651         mlx5_ib_odp_cleanup();
 3652 }
 3653 
 3654 module_init_order(mlx5_ib_init, SI_ORDER_SEVENTH);
 3655 module_exit_order(mlx5_ib_cleanup, SI_ORDER_SEVENTH);

Cache object: d2a0a18de4f9dd97e9efd39d980d47a9


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.