The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
    3  * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
    4  *
    5  * This software is available to you under a choice of one of two
    6  * licenses.  You may choose to be licensed under the terms of the GNU
    7  * General Public License (GPL) Version 2, available from the file
    8  * COPYING in the main directory of this source tree, or the
    9  * OpenIB.org BSD license below:
   10  *
   11  *     Redistribution and use in source and binary forms, with or
   12  *     without modification, are permitted provided that the following
   13  *     conditions are met:
   14  *
   15  *      - Redistributions of source code must retain the above
   16  *        copyright notice, this list of conditions and the following
   17  *        disclaimer.
   18  *
   19  *      - Redistributions in binary form must reproduce the above
   20  *        copyright notice, this list of conditions and the following
   21  *        disclaimer in the documentation and/or other materials
   22  *        provided with the distribution.
   23  *
   24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
   28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
   30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   31  * SOFTWARE.
   32  */
   33 
   34 #define LINUXKPI_PARAM_PREFIX mlx4_
   35 
   36 #include <linux/module.h>
   37 #include <linux/slab.h>
   38 #include <linux/errno.h>
   39 #include <linux/etherdevice.h>
   40 #include <linux/netdevice.h>
   41 #include <linux/if_vlan.h>
   42 #include <linux/fs.h>
   43 #include <linux/rcupdate.h>
   44 #include <linux/notifier.h>
   45 #include <linux/delay.h>
   46 
   47 #include <net/ipv6.h>
   48 
   49 #include <rdma/ib_smi.h>
   50 #include <rdma/ib_user_verbs.h>
   51 #include <rdma/ib_addr.h>
   52 #include <rdma/ib_cache.h>
   53 
   54 #include <dev/mlx4/driver.h>
   55 #include <dev/mlx4/cmd.h>
   56 #include <dev/mlx4/qp.h>
   57 #include <linux/sched.h>
   58 #include <linux/page.h>
   59 #include <linux/printk.h>
   60 #include "mlx4_ib.h"
   61 #include <rdma/mlx4-abi.h>
   62 #include "wc.h"
   63 
   64 #define DRV_NAME        MLX4_IB_DRV_NAME
   65 #ifndef DRV_VERSION
   66 #define DRV_VERSION     "3.7.1"
   67 #endif
   68 #define DRV_RELDATE     "November 2021"
   69 
   70 #define MLX4_IB_FLOW_MAX_PRIO 0xFFF
   71 #define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
   72 #define MLX4_IB_CARD_REV_A0   0xA0
   73 
   74 MODULE_AUTHOR("Roland Dreier");
   75 MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
   76 MODULE_LICENSE("Dual BSD/GPL");
   77 
   78 int mlx4_ib_sm_guid_assign = 0;
   79 module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
   80 MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)");
   81 
   82 static const char mlx4_ib_version[] =
   83         DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
   84         DRV_VERSION " (" DRV_RELDATE ")\n";
   85 
   86 static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
   87 
   88 static struct workqueue_struct *wq;
   89 
   90 static void init_query_mad(struct ib_smp *mad)
   91 {
   92         mad->base_version  = 1;
   93         mad->mgmt_class    = IB_MGMT_CLASS_SUBN_LID_ROUTED;
   94         mad->class_version = 1;
   95         mad->method        = IB_MGMT_METHOD_GET;
   96 }
   97 
   98 static int check_flow_steering_support(struct mlx4_dev *dev)
   99 {
  100         int eth_num_ports = 0;
  101         int ib_num_ports = 0;
  102 
  103         int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
  104 
  105         if (dmfs) {
  106                 int i;
  107                 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
  108                         eth_num_ports++;
  109                 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
  110                         ib_num_ports++;
  111                 dmfs &= (!ib_num_ports ||
  112                          (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
  113                         (!eth_num_ports ||
  114                          (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
  115                 if (ib_num_ports && mlx4_is_mfunc(dev)) {
  116                         pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n");
  117                         dmfs = 0;
  118                 }
  119         }
  120         return dmfs;
  121 }
  122 
  123 static int num_ib_ports(struct mlx4_dev *dev)
  124 {
  125         int ib_ports = 0;
  126         int i;
  127 
  128         mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
  129                 ib_ports++;
  130 
  131         return ib_ports;
  132 }
  133 
  134 static struct ifnet *mlx4_ib_get_netdev(struct ib_device *device, u8 port_num)
  135 {
  136         struct mlx4_ib_dev *ibdev = to_mdev(device);
  137         struct ifnet *dev;
  138 
  139         rcu_read_lock();
  140         dev = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num);
  141 
  142 #if 0
  143         if (dev) {
  144                 if (mlx4_is_bonded(ibdev->dev)) {
  145                         struct ifnet *upper = NULL;
  146 
  147                         upper = netdev_master_upper_dev_get_rcu(dev);
  148                         if (upper) {
  149                                 struct ifnet *active;
  150 
  151                                 active = bond_option_active_slave_get_rcu(mlx4_netdev_priv(upper));
  152                                 if (active)
  153                                         dev = active;
  154                         }
  155                 }
  156         }
  157 #endif
  158         if (dev)
  159                 if_ref(dev);
  160 
  161         rcu_read_unlock();
  162         return dev;
  163 }
  164 
  165 static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
  166                                   struct mlx4_ib_dev *ibdev,
  167                                   u8 port_num)
  168 {
  169         struct mlx4_cmd_mailbox *mailbox;
  170         int err;
  171         struct mlx4_dev *dev = ibdev->dev;
  172         int i;
  173         union ib_gid *gid_tbl;
  174 
  175         mailbox = mlx4_alloc_cmd_mailbox(dev);
  176         if (IS_ERR(mailbox))
  177                 return -ENOMEM;
  178 
  179         gid_tbl = mailbox->buf;
  180 
  181         for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
  182                 memcpy(&gid_tbl[i], &gids[i].gid, sizeof(union ib_gid));
  183 
  184         err = mlx4_cmd(dev, mailbox->dma,
  185                        MLX4_SET_PORT_GID_TABLE << 8 | port_num,
  186                        1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
  187                        MLX4_CMD_WRAPPED);
  188         if (mlx4_is_bonded(dev))
  189                 err += mlx4_cmd(dev, mailbox->dma,
  190                                 MLX4_SET_PORT_GID_TABLE << 8 | 2,
  191                                 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
  192                                 MLX4_CMD_WRAPPED);
  193 
  194         mlx4_free_cmd_mailbox(dev, mailbox);
  195         return err;
  196 }
  197 
  198 static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
  199                                      struct mlx4_ib_dev *ibdev,
  200                                      u8 port_num)
  201 {
  202         struct mlx4_cmd_mailbox *mailbox;
  203         int err;
  204         struct mlx4_dev *dev = ibdev->dev;
  205         int i;
  206         struct {
  207                 union ib_gid    gid;
  208                 __be32          rsrvd1[2];
  209                 __be16          rsrvd2;
  210                 u8              type;
  211                 u8              version;
  212                 __be32          rsrvd3;
  213         } *gid_tbl;
  214 
  215         mailbox = mlx4_alloc_cmd_mailbox(dev);
  216         if (IS_ERR(mailbox))
  217                 return -ENOMEM;
  218 
  219         gid_tbl = mailbox->buf;
  220         for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
  221                 memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
  222                 if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
  223                         gid_tbl[i].version = 2;
  224                         if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
  225                                 gid_tbl[i].type = 1;
  226                         else
  227                                 memset(&gid_tbl[i].gid, 0, 12);
  228                 }
  229         }
  230 
  231         err = mlx4_cmd(dev, mailbox->dma,
  232                        MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
  233                        1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
  234                        MLX4_CMD_WRAPPED);
  235         if (mlx4_is_bonded(dev))
  236                 err += mlx4_cmd(dev, mailbox->dma,
  237                                 MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
  238                                 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
  239                                 MLX4_CMD_WRAPPED);
  240 
  241         mlx4_free_cmd_mailbox(dev, mailbox);
  242         return err;
  243 }
  244 
  245 static int mlx4_ib_update_gids(struct gid_entry *gids,
  246                                struct mlx4_ib_dev *ibdev,
  247                                u8 port_num)
  248 {
  249         if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
  250                 return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
  251 
  252         return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
  253 }
  254 
  255 static int mlx4_ib_add_gid(struct ib_device *device,
  256                            u8 port_num,
  257                            unsigned int index,
  258                            const union ib_gid *gid,
  259                            const struct ib_gid_attr *attr,
  260                            void **context)
  261 {
  262         struct mlx4_ib_dev *ibdev = to_mdev(device);
  263         struct mlx4_ib_iboe *iboe = &ibdev->iboe;
  264         struct mlx4_port_gid_table   *port_gid_table;
  265         int free = -1, found = -1;
  266         int ret = 0;
  267         int hw_update = 0;
  268         int i;
  269         struct gid_entry *gids = NULL;
  270 
  271         if (!rdma_cap_roce_gid_table(device, port_num))
  272                 return -EINVAL;
  273 
  274         if (port_num > MLX4_MAX_PORTS)
  275                 return -EINVAL;
  276 
  277         if (!context)
  278                 return -EINVAL;
  279 
  280         port_gid_table = &iboe->gids[port_num - 1];
  281         spin_lock_bh(&iboe->lock);
  282         for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
  283                 if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
  284                     (port_gid_table->gids[i].gid_type == attr->gid_type))  {
  285                         found = i;
  286                         break;
  287                 }
  288                 if (free < 0 && !memcmp(&port_gid_table->gids[i].gid, &zgid, sizeof(*gid)))
  289                         free = i; /* HW has space */
  290         }
  291 
  292         if (found < 0) {
  293                 if (free < 0) {
  294                         ret = -ENOSPC;
  295                 } else {
  296                         port_gid_table->gids[free].ctx = kmalloc(sizeof(*port_gid_table->gids[free].ctx), GFP_ATOMIC);
  297                         if (!port_gid_table->gids[free].ctx) {
  298                                 ret = -ENOMEM;
  299                         } else {
  300                                 *context = port_gid_table->gids[free].ctx;
  301                                 memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
  302                                 port_gid_table->gids[free].gid_type = attr->gid_type;
  303                                 port_gid_table->gids[free].ctx->real_index = free;
  304                                 port_gid_table->gids[free].ctx->refcount = 1;
  305                                 hw_update = 1;
  306                         }
  307                 }
  308         } else {
  309                 struct gid_cache_context *ctx = port_gid_table->gids[found].ctx;
  310                 *context = ctx;
  311                 ctx->refcount++;
  312         }
  313         if (!ret && hw_update) {
  314                 gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC);
  315                 if (!gids) {
  316                         ret = -ENOMEM;
  317                 } else {
  318                         for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
  319                                 memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
  320                                 gids[i].gid_type = port_gid_table->gids[i].gid_type;
  321                         }
  322                 }
  323         }
  324         spin_unlock_bh(&iboe->lock);
  325 
  326         if (!ret && hw_update) {
  327                 ret = mlx4_ib_update_gids(gids, ibdev, port_num);
  328                 kfree(gids);
  329         }
  330 
  331         return ret;
  332 }
  333 
  334 static int mlx4_ib_del_gid(struct ib_device *device,
  335                            u8 port_num,
  336                            unsigned int index,
  337                            void **context)
  338 {
  339         struct gid_cache_context *ctx = *context;
  340         struct mlx4_ib_dev *ibdev = to_mdev(device);
  341         struct mlx4_ib_iboe *iboe = &ibdev->iboe;
  342         struct mlx4_port_gid_table   *port_gid_table;
  343         int ret = 0;
  344         int hw_update = 0;
  345         struct gid_entry *gids = NULL;
  346 
  347         if (!rdma_cap_roce_gid_table(device, port_num))
  348                 return -EINVAL;
  349 
  350         if (port_num > MLX4_MAX_PORTS)
  351                 return -EINVAL;
  352 
  353         port_gid_table = &iboe->gids[port_num - 1];
  354         spin_lock_bh(&iboe->lock);
  355         if (ctx) {
  356                 ctx->refcount--;
  357                 if (!ctx->refcount) {
  358                         unsigned int real_index = ctx->real_index;
  359 
  360                         memcpy(&port_gid_table->gids[real_index].gid, &zgid, sizeof(zgid));
  361                         kfree(port_gid_table->gids[real_index].ctx);
  362                         port_gid_table->gids[real_index].ctx = NULL;
  363                         hw_update = 1;
  364                 }
  365         }
  366         if (!ret && hw_update) {
  367                 int i;
  368 
  369                 gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC);
  370                 if (!gids) {
  371                         ret = -ENOMEM;
  372                 } else {
  373                         for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
  374                                 memcpy(&gids[i].gid,
  375                                        &port_gid_table->gids[i].gid,
  376                                        sizeof(union ib_gid));
  377                                 gids[i].gid_type =
  378                                     port_gid_table->gids[i].gid_type;
  379                         }
  380                 }
  381         }
  382         spin_unlock_bh(&iboe->lock);
  383 
  384         if (!ret && hw_update) {
  385                 ret = mlx4_ib_update_gids(gids, ibdev, port_num);
  386                 kfree(gids);
  387         }
  388         return ret;
  389 }
  390 
  391 int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
  392                                     u8 port_num, int index)
  393 {
  394         struct mlx4_ib_iboe *iboe = &ibdev->iboe;
  395         struct gid_cache_context *ctx = NULL;
  396         union ib_gid gid;
  397         struct mlx4_port_gid_table   *port_gid_table;
  398         int real_index = -EINVAL;
  399         int i;
  400         int ret;
  401         unsigned long flags;
  402         struct ib_gid_attr attr;
  403 
  404         if (port_num > MLX4_MAX_PORTS)
  405                 return -EINVAL;
  406 
  407         if (mlx4_is_bonded(ibdev->dev))
  408                 port_num = 1;
  409 
  410         if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
  411                 return index;
  412 
  413         ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr);
  414         if (ret)
  415                 return ret;
  416 
  417         if (attr.ndev)
  418                 if_rele(attr.ndev);
  419 
  420         if (!memcmp(&gid, &zgid, sizeof(gid)))
  421                 return -EINVAL;
  422 
  423         spin_lock_irqsave(&iboe->lock, flags);
  424         port_gid_table = &iboe->gids[port_num - 1];
  425 
  426         for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
  427                 if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) &&
  428                     attr.gid_type == port_gid_table->gids[i].gid_type) {
  429                         ctx = port_gid_table->gids[i].ctx;
  430                         break;
  431                 }
  432         if (ctx)
  433                 real_index = ctx->real_index;
  434         spin_unlock_irqrestore(&iboe->lock, flags);
  435         return real_index;
  436 }
  437 
  438 static int mlx4_ib_query_device(struct ib_device *ibdev,
  439                                 struct ib_device_attr *props,
  440                                 struct ib_udata *uhw)
  441 {
  442         struct mlx4_ib_dev *dev = to_mdev(ibdev);
  443         struct ib_smp *in_mad  = NULL;
  444         struct ib_smp *out_mad = NULL;
  445         int err = -ENOMEM;
  446         int have_ib_ports;
  447         struct mlx4_uverbs_ex_query_device cmd;
  448         struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0};
  449         struct mlx4_clock_params clock_params;
  450 
  451         if (uhw->inlen) {
  452                 if (uhw->inlen < sizeof(cmd))
  453                         return -EINVAL;
  454 
  455                 err = ib_copy_from_udata(&cmd, uhw, sizeof(cmd));
  456                 if (err)
  457                         return err;
  458 
  459                 if (cmd.comp_mask)
  460                         return -EINVAL;
  461 
  462                 if (cmd.reserved)
  463                         return -EINVAL;
  464         }
  465 
  466         resp.response_length = offsetof(typeof(resp), response_length) +
  467                 sizeof(resp.response_length);
  468         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
  469         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
  470         if (!in_mad || !out_mad)
  471                 goto out;
  472 
  473         init_query_mad(in_mad);
  474         in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
  475 
  476         err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
  477                            1, NULL, NULL, in_mad, out_mad);
  478         if (err)
  479                 goto out;
  480 
  481         memset(props, 0, sizeof *props);
  482 
  483         have_ib_ports = num_ib_ports(dev->dev);
  484 
  485         props->fw_ver = dev->dev->caps.fw_ver;
  486         props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
  487                 IB_DEVICE_PORT_ACTIVE_EVENT             |
  488                 IB_DEVICE_SYS_IMAGE_GUID                |
  489                 IB_DEVICE_RC_RNR_NAK_GEN                |
  490                 IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
  491         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
  492                 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
  493         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
  494                 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
  495         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM && have_ib_ports)
  496                 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
  497         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
  498                 props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
  499         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
  500                 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
  501         if (dev->dev->caps.max_gso_sz &&
  502             (dev->dev->rev_id != MLX4_IB_CARD_REV_A0) &&
  503             (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH))
  504                 props->device_cap_flags |= IB_DEVICE_UD_TSO;
  505         if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
  506                 props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
  507         if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
  508             (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
  509             (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
  510                 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
  511         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
  512                 props->device_cap_flags |= IB_DEVICE_XRC;
  513         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)
  514                 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW;
  515         if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
  516                 if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B)
  517                         props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
  518                 else
  519                         props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
  520         }
  521         if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
  522                 props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
  523 
  524         props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
  525 
  526         props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
  527                 0xffffff;
  528         props->vendor_part_id      = dev->dev->persist->pdev->device;
  529         props->hw_ver              = be32_to_cpup((__be32 *) (out_mad->data + 32));
  530         memcpy(&props->sys_image_guid, out_mad->data +  4, 8);
  531 
  532         props->max_mr_size         = ~0ull;
  533         props->page_size_cap       = dev->dev->caps.page_size_cap;
  534         props->max_qp              = dev->dev->quotas.qp;
  535         props->max_qp_wr           = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
  536         props->max_sge             = min(dev->dev->caps.max_sq_sg,
  537                                          dev->dev->caps.max_rq_sg);
  538         props->max_sge_rd          = MLX4_MAX_SGE_RD;
  539         props->max_cq              = dev->dev->quotas.cq;
  540         props->max_cqe             = dev->dev->caps.max_cqes;
  541         props->max_mr              = dev->dev->quotas.mpt;
  542         props->max_pd              = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
  543         props->max_qp_rd_atom      = dev->dev->caps.max_qp_dest_rdma;
  544         props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
  545         props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
  546         props->max_srq             = dev->dev->quotas.srq;
  547         props->max_srq_wr          = dev->dev->caps.max_srq_wqes - 1;
  548         props->max_srq_sge         = dev->dev->caps.max_srq_sge;
  549         props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
  550         props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay;
  551         props->atomic_cap          = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
  552                 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
  553         props->masked_atomic_cap   = props->atomic_cap;
  554         props->max_pkeys           = dev->dev->caps.pkey_table_len[1];
  555         props->max_mcast_grp       = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
  556         props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
  557         props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
  558                                            props->max_mcast_grp;
  559         props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
  560         props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL;
  561         props->timestamp_mask = 0xFFFFFFFFFFFFULL;
  562 
  563         if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) {
  564                 resp.response_length += sizeof(resp.hca_core_clock_offset);
  565                 if (!mlx4_get_internal_clock_params(dev->dev, &clock_params)) {
  566                         resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP;
  567                         resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE;
  568                 }
  569         }
  570 
  571         if (uhw->outlen) {
  572                 err = ib_copy_to_udata(uhw, &resp, resp.response_length);
  573                 if (err)
  574                         goto out;
  575         }
  576 out:
  577         kfree(in_mad);
  578         kfree(out_mad);
  579 
  580         return err;
  581 }
  582 
  583 static enum rdma_link_layer
  584 mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
  585 {
  586         struct mlx4_dev *dev = to_mdev(device)->dev;
  587 
  588         return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ?
  589                 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
  590 }
  591 
  592 static int ib_link_query_port(struct ib_device *ibdev, u8 port,
  593                               struct ib_port_attr *props, int netw_view)
  594 {
  595         struct ib_smp *in_mad  = NULL;
  596         struct ib_smp *out_mad = NULL;
  597         int ext_active_speed;
  598         int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
  599         int err = -ENOMEM;
  600 
  601         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
  602         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
  603         if (!in_mad || !out_mad)
  604                 goto out;
  605 
  606         init_query_mad(in_mad);
  607         in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
  608         in_mad->attr_mod = cpu_to_be32(port);
  609 
  610         if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
  611                 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
  612 
  613         err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
  614                                 in_mad, out_mad);
  615         if (err)
  616                 goto out;
  617 
  618 
  619         props->lid              = be16_to_cpup((__be16 *) (out_mad->data + 16));
  620         props->lmc              = out_mad->data[34] & 0x7;
  621         props->sm_lid           = be16_to_cpup((__be16 *) (out_mad->data + 18));
  622         props->sm_sl            = out_mad->data[36] & 0xf;
  623         props->state            = out_mad->data[32] & 0xf;
  624         props->phys_state       = out_mad->data[33] >> 4;
  625         props->port_cap_flags   = be32_to_cpup((__be32 *) (out_mad->data + 20));
  626         if (netw_view)
  627                 props->gid_tbl_len = out_mad->data[50];
  628         else
  629                 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
  630         props->max_msg_sz       = to_mdev(ibdev)->dev->caps.max_msg_sz;
  631         props->pkey_tbl_len     = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
  632         props->bad_pkey_cntr    = be16_to_cpup((__be16 *) (out_mad->data + 46));
  633         props->qkey_viol_cntr   = be16_to_cpup((__be16 *) (out_mad->data + 48));
  634         props->active_width     = out_mad->data[31] & 0xf;
  635         props->active_speed     = out_mad->data[35] >> 4;
  636         props->max_mtu          = out_mad->data[41] & 0xf;
  637         props->active_mtu       = out_mad->data[36] >> 4;
  638         props->subnet_timeout   = out_mad->data[51] & 0x1f;
  639         props->max_vl_num       = out_mad->data[37] >> 4;
  640         props->init_type_reply  = out_mad->data[41] >> 4;
  641 
  642         /* Check if extended speeds (EDR/FDR/...) are supported */
  643         if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
  644                 ext_active_speed = out_mad->data[62] >> 4;
  645 
  646                 switch (ext_active_speed) {
  647                 case 1:
  648                         props->active_speed = IB_SPEED_FDR;
  649                         break;
  650                 case 2:
  651                         props->active_speed = IB_SPEED_EDR;
  652                         break;
  653                 }
  654         }
  655 
  656         /* If reported active speed is QDR, check if is FDR-10 */
  657         if (props->active_speed == IB_SPEED_QDR) {
  658                 init_query_mad(in_mad);
  659                 in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
  660                 in_mad->attr_mod = cpu_to_be32(port);
  661 
  662                 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
  663                                    NULL, NULL, in_mad, out_mad);
  664                 if (err)
  665                         goto out;
  666 
  667                 /* Checking LinkSpeedActive for FDR-10 */
  668                 if (out_mad->data[15] & 0x1)
  669                         props->active_speed = IB_SPEED_FDR10;
  670         }
  671 
  672         /* Avoid wrong speed value returned by FW if the IB link is down. */
  673         if (props->state == IB_PORT_DOWN)
  674                  props->active_speed = IB_SPEED_SDR;
  675 
  676 out:
  677         kfree(in_mad);
  678         kfree(out_mad);
  679         return err;
  680 }
  681 
  682 static u8 state_to_phys_state(enum ib_port_state state)
  683 {
  684         return state == IB_PORT_ACTIVE ?
  685                 IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
  686 }
  687 
  688 static int eth_link_query_port(struct ib_device *ibdev, u8 port,
  689                                struct ib_port_attr *props, int netw_view)
  690 {
  691 
  692         struct mlx4_ib_dev *mdev = to_mdev(ibdev);
  693         struct mlx4_ib_iboe *iboe = &mdev->iboe;
  694         struct ifnet *ndev;
  695         enum ib_mtu tmp;
  696         struct mlx4_cmd_mailbox *mailbox;
  697         int err = 0;
  698         int is_bonded = mlx4_is_bonded(mdev->dev);
  699 
  700         mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
  701         if (IS_ERR(mailbox))
  702                 return PTR_ERR(mailbox);
  703 
  704         err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
  705                            MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
  706                            MLX4_CMD_WRAPPED);
  707         if (err)
  708                 goto out;
  709 
  710         props->active_width     =  (((u8 *)mailbox->buf)[5] == 0x40) ?
  711                                                 IB_WIDTH_4X : IB_WIDTH_1X;
  712         props->active_speed     = IB_SPEED_QDR;
  713         props->port_cap_flags   = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
  714         props->gid_tbl_len      = mdev->dev->caps.gid_table_len[port];
  715         props->max_msg_sz       = mdev->dev->caps.max_msg_sz;
  716         props->pkey_tbl_len     = 1;
  717         props->max_mtu          = IB_MTU_4096;
  718         props->max_vl_num       = 2;
  719         props->state            = IB_PORT_DOWN;
  720         props->phys_state       = state_to_phys_state(props->state);
  721         props->active_mtu       = IB_MTU_256;
  722         spin_lock_bh(&iboe->lock);
  723         ndev = iboe->netdevs[port - 1];
  724         if (ndev && is_bonded) {
  725 #if 0
  726                 rcu_read_lock(); /* required to get upper dev */
  727                 ndev = netdev_master_upper_dev_get_rcu(ndev);
  728                 rcu_read_unlock();
  729 #endif
  730         }
  731         if (!ndev)
  732                 goto out_unlock;
  733 
  734         tmp = iboe_get_mtu(ndev->if_mtu);
  735         props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
  736 
  737         props->state            = ((ndev->if_drv_flags & IFF_DRV_RUNNING) != 0 &&
  738                                    ndev->if_link_state == LINK_STATE_UP) ?
  739                                         IB_PORT_ACTIVE : IB_PORT_DOWN;
  740         props->phys_state       = state_to_phys_state(props->state);
  741 out_unlock:
  742         spin_unlock_bh(&iboe->lock);
  743 out:
  744         mlx4_free_cmd_mailbox(mdev->dev, mailbox);
  745         return err;
  746 }
  747 
  748 int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
  749                          struct ib_port_attr *props, int netw_view)
  750 {
  751         int err;
  752 
  753         memset(props, 0, sizeof *props);
  754 
  755         err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
  756                 ib_link_query_port(ibdev, port, props, netw_view) :
  757                                 eth_link_query_port(ibdev, port, props, netw_view);
  758 
  759         return err;
  760 }
  761 
  762 static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
  763                               struct ib_port_attr *props)
  764 {
  765         /* returns host view */
  766         return __mlx4_ib_query_port(ibdev, port, props, 0);
  767 }
  768 
  769 int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
  770                         union ib_gid *gid, int netw_view)
  771 {
  772         struct ib_smp *in_mad  = NULL;
  773         struct ib_smp *out_mad = NULL;
  774         int err = -ENOMEM;
  775         struct mlx4_ib_dev *dev = to_mdev(ibdev);
  776         int clear = 0;
  777         int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
  778 
  779         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
  780         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
  781         if (!in_mad || !out_mad)
  782                 goto out;
  783 
  784         init_query_mad(in_mad);
  785         in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
  786         in_mad->attr_mod = cpu_to_be32(port);
  787 
  788         if (mlx4_is_mfunc(dev->dev) && netw_view)
  789                 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
  790 
  791         err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
  792         if (err)
  793                 goto out;
  794 
  795         memcpy(gid->raw, out_mad->data + 8, 8);
  796 
  797         if (mlx4_is_mfunc(dev->dev) && !netw_view) {
  798                 if (index) {
  799                         /* For any index > 0, return the null guid */
  800                         err = 0;
  801                         clear = 1;
  802                         goto out;
  803                 }
  804         }
  805 
  806         init_query_mad(in_mad);
  807         in_mad->attr_id  = IB_SMP_ATTR_GUID_INFO;
  808         in_mad->attr_mod = cpu_to_be32(index / 8);
  809 
  810         err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
  811                            NULL, NULL, in_mad, out_mad);
  812         if (err)
  813                 goto out;
  814 
  815         memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
  816 
  817 out:
  818         if (clear)
  819                 memset(gid->raw + 8, 0, 8);
  820         kfree(in_mad);
  821         kfree(out_mad);
  822         return err;
  823 }
  824 
  825 static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
  826                              union ib_gid *gid)
  827 {
  828         int ret;
  829 
  830         if (rdma_protocol_ib(ibdev, port))
  831                 return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
  832 
  833         if (!rdma_protocol_roce(ibdev, port))
  834                 return -ENODEV;
  835 
  836         if (!rdma_cap_roce_gid_table(ibdev, port))
  837                 return -ENODEV;
  838 
  839         ret = ib_get_cached_gid(ibdev, port, index, gid, NULL);
  840         if (ret == -EAGAIN) {
  841                 memcpy(gid, &zgid, sizeof(*gid));
  842                 return 0;
  843         }
  844 
  845         return ret;
  846 }
  847 
  848 static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u8 port, u64 *sl2vl_tbl)
  849 {
  850         union sl2vl_tbl_to_u64 sl2vl64;
  851         struct ib_smp *in_mad  = NULL;
  852         struct ib_smp *out_mad = NULL;
  853         int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
  854         int err = -ENOMEM;
  855         int jj;
  856 
  857         if (mlx4_is_slave(to_mdev(ibdev)->dev)) {
  858                 *sl2vl_tbl = 0;
  859                 return 0;
  860         }
  861 
  862         in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
  863         out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
  864         if (!in_mad || !out_mad)
  865                 goto out;
  866 
  867         init_query_mad(in_mad);
  868         in_mad->attr_id  = IB_SMP_ATTR_SL_TO_VL_TABLE;
  869         in_mad->attr_mod = 0;
  870 
  871         if (mlx4_is_mfunc(to_mdev(ibdev)->dev))
  872                 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
  873 
  874         err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
  875                            in_mad, out_mad);
  876         if (err)
  877                 goto out;
  878 
  879         for (jj = 0; jj < 8; jj++)
  880                 sl2vl64.sl8[jj] = ((struct ib_smp *)out_mad)->data[jj];
  881         *sl2vl_tbl = sl2vl64.sl64;
  882 
  883 out:
  884         kfree(in_mad);
  885         kfree(out_mad);
  886         return err;
  887 }
  888 
  889 static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev)
  890 {
  891         u64 sl2vl;
  892         int i;
  893         int err;
  894 
  895         for (i = 1; i <= mdev->dev->caps.num_ports; i++) {
  896                 if (mdev->dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
  897                         continue;
  898                 err = mlx4_ib_query_sl2vl(&mdev->ib_dev, i, &sl2vl);
  899                 if (err) {
  900                         pr_err("Unable to get default sl to vl mapping for port %d.  Using all zeroes (%d)\n",
  901                                i, err);
  902                         sl2vl = 0;
  903                 }
  904                 atomic64_set(&mdev->sl2vl[i - 1], sl2vl);
  905         }
  906 }
  907 
  908 int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
  909                          u16 *pkey, int netw_view)
  910 {
  911         struct ib_smp *in_mad  = NULL;
  912         struct ib_smp *out_mad = NULL;
  913         int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
  914         int err = -ENOMEM;
  915 
  916         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
  917         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
  918         if (!in_mad || !out_mad)
  919                 goto out;
  920 
  921         init_query_mad(in_mad);
  922         in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
  923         in_mad->attr_mod = cpu_to_be32(index / 32);
  924 
  925         if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
  926                 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
  927 
  928         err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
  929                            in_mad, out_mad);
  930         if (err)
  931                 goto out;
  932 
  933         *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
  934 
  935 out:
  936         kfree(in_mad);
  937         kfree(out_mad);
  938         return err;
  939 }
  940 
  941 static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
  942 {
  943         return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
  944 }
  945 
  946 static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
  947                                  struct ib_device_modify *props)
  948 {
  949         struct mlx4_cmd_mailbox *mailbox;
  950         unsigned long flags;
  951 
  952         if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
  953                 return -EOPNOTSUPP;
  954 
  955         if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
  956                 return 0;
  957 
  958         if (mlx4_is_slave(to_mdev(ibdev)->dev))
  959                 return -EOPNOTSUPP;
  960 
  961         spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
  962         memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
  963         spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
  964 
  965         /*
  966          * If possible, pass node desc to FW, so it can generate
  967          * a 144 trap.  If cmd fails, just ignore.
  968          */
  969         mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev);
  970         if (IS_ERR(mailbox))
  971                 return 0;
  972 
  973         memcpy(mailbox->buf, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
  974         mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
  975                  MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
  976 
  977         mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
  978 
  979         return 0;
  980 }
  981 
  982 static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
  983                             u32 cap_mask)
  984 {
  985         struct mlx4_cmd_mailbox *mailbox;
  986         int err;
  987 
  988         mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
  989         if (IS_ERR(mailbox))
  990                 return PTR_ERR(mailbox);
  991 
  992         if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
  993                 *(u8 *) mailbox->buf         = !!reset_qkey_viols << 6;
  994                 ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
  995         } else {
  996                 ((u8 *) mailbox->buf)[3]     = !!reset_qkey_viols;
  997                 ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
  998         }
  999 
 1000         err = mlx4_cmd(dev->dev, mailbox->dma, port, MLX4_SET_PORT_IB_OPCODE,
 1001                        MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
 1002                        MLX4_CMD_WRAPPED);
 1003 
 1004         mlx4_free_cmd_mailbox(dev->dev, mailbox);
 1005         return err;
 1006 }
 1007 
 1008 static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
 1009                                struct ib_port_modify *props)
 1010 {
 1011         struct mlx4_ib_dev *mdev = to_mdev(ibdev);
 1012         u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
 1013         struct ib_port_attr attr;
 1014         u32 cap_mask;
 1015         int err;
 1016 
 1017         /* return OK if this is RoCE. CM calls ib_modify_port() regardless
 1018          * of whether port link layer is ETH or IB. For ETH ports, qkey
 1019          * violations and port capabilities are not meaningful.
 1020          */
 1021         if (is_eth)
 1022                 return 0;
 1023 
 1024         mutex_lock(&mdev->cap_mask_mutex);
 1025 
 1026         err = mlx4_ib_query_port(ibdev, port, &attr);
 1027         if (err)
 1028                 goto out;
 1029 
 1030         cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
 1031                 ~props->clr_port_cap_mask;
 1032 
 1033         err = mlx4_ib_SET_PORT(mdev, port,
 1034                                !!(mask & IB_PORT_RESET_QKEY_CNTR),
 1035                                cap_mask);
 1036 
 1037 out:
 1038         mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
 1039         return err;
 1040 }
 1041 
 1042 static int mlx4_ib_alloc_ucontext(struct ib_ucontext *uctx,
 1043                                   struct ib_udata *udata)
 1044 {
 1045         struct ib_device *ibdev = uctx->device;
 1046         struct mlx4_ib_dev *dev = to_mdev(ibdev);
 1047         struct mlx4_ib_ucontext *context = to_mucontext(uctx);
 1048         struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
 1049         struct mlx4_ib_alloc_ucontext_resp resp;
 1050         int err;
 1051 
 1052         if (!dev->ib_active)
 1053                 return -EAGAIN;
 1054 
 1055         if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
 1056                 resp_v3.qp_tab_size      = dev->dev->caps.num_qps;
 1057                 resp_v3.bf_reg_size      = dev->dev->caps.bf_reg_size;
 1058                 resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
 1059         } else {
 1060                 resp.dev_caps         = dev->dev->caps.userspace_caps;
 1061                 resp.qp_tab_size      = dev->dev->caps.num_qps;
 1062                 resp.bf_reg_size      = dev->dev->caps.bf_reg_size;
 1063                 resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
 1064                 resp.cqe_size         = dev->dev->caps.cqe_size;
 1065         }
 1066 
 1067         err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
 1068         if (err)
 1069                 return err;
 1070 
 1071         INIT_LIST_HEAD(&context->db_page_list);
 1072         mutex_init(&context->db_page_mutex);
 1073 
 1074         if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
 1075                 err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
 1076         else
 1077                 err = ib_copy_to_udata(udata, &resp, sizeof(resp));
 1078 
 1079         if (err) {
 1080                 mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
 1081                 return -EFAULT;
 1082         }
 1083 
 1084         return err;
 1085 }
 1086 
 1087 static void mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 1088 {
 1089         struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
 1090 
 1091         mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
 1092 }
 1093 
 1094 static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 1095 {
 1096         struct mlx4_ib_dev *dev = to_mdev(context->device);
 1097 
 1098         switch (vma->vm_pgoff) {
 1099         case 0:
 1100                 return rdma_user_mmap_io(context, vma,
 1101                                          to_mucontext(context)->uar.pfn,
 1102                                          PAGE_SIZE,
 1103                                          pgprot_noncached(vma->vm_page_prot),
 1104                                          NULL);
 1105 
 1106         case 1:
 1107                 if (dev->dev->caps.bf_reg_size == 0)
 1108                         return -EINVAL;
 1109                 return rdma_user_mmap_io(
 1110                         context, vma,
 1111                         to_mucontext(context)->uar.pfn +
 1112                                 dev->dev->caps.num_uars,
 1113                         PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot),
 1114                         NULL);
 1115 
 1116         case 3: {
 1117                 struct mlx4_clock_params params;
 1118                 int ret;
 1119 
 1120                 ret = mlx4_get_internal_clock_params(dev->dev, &params);
 1121                 if (ret)
 1122                         return ret;
 1123 
 1124                 return rdma_user_mmap_io(
 1125                         context, vma,
 1126                         (pci_resource_start(dev->dev->persist->pdev,
 1127                                             params.bar) +
 1128                          params.offset) >>
 1129                                 PAGE_SHIFT,
 1130                         PAGE_SIZE, pgprot_noncached(vma->vm_page_prot),
 1131                         NULL);
 1132         }
 1133 
 1134         default:
 1135                 return -EINVAL;
 1136         }
 1137 }
 1138 
 1139 static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 1140 {
 1141         struct mlx4_ib_pd *pd = to_mpd(ibpd);
 1142         struct ib_device *ibdev = ibpd->device;
 1143         int err;
 1144 
 1145         err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
 1146         if (err)
 1147                 return err;
 1148 
 1149         if (udata && ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) {
 1150                 mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
 1151                 return -EFAULT;
 1152         }
 1153         return 0;
 1154 }
 1155 
 1156 static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
 1157 {
 1158         mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
 1159 }
 1160 
 1161 static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
 1162                                           struct ib_udata *udata)
 1163 {
 1164         struct mlx4_ib_xrcd *xrcd;
 1165         struct ib_cq_init_attr cq_attr = {};
 1166         int err;
 1167 
 1168         if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
 1169                 return ERR_PTR(-ENOSYS);
 1170 
 1171         xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
 1172         if (!xrcd)
 1173                 return ERR_PTR(-ENOMEM);
 1174 
 1175         err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
 1176         if (err)
 1177                 goto err1;
 1178 
 1179         xrcd->pd = ib_alloc_pd(ibdev, 0);
 1180         if (IS_ERR(xrcd->pd)) {
 1181                 err = PTR_ERR(xrcd->pd);
 1182                 goto err2;
 1183         }
 1184 
 1185         cq_attr.cqe = 1;
 1186         xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr);
 1187         if (IS_ERR(xrcd->cq)) {
 1188                 err = PTR_ERR(xrcd->cq);
 1189                 goto err3;
 1190         }
 1191 
 1192         return &xrcd->ibxrcd;
 1193 
 1194 err3:
 1195         ib_dealloc_pd(xrcd->pd);
 1196 err2:
 1197         mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
 1198 err1:
 1199         kfree(xrcd);
 1200         return ERR_PTR(err);
 1201 }
 1202 
 1203 static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
 1204 {
 1205         ib_destroy_cq(to_mxrcd(xrcd)->cq);
 1206         ib_dealloc_pd(to_mxrcd(xrcd)->pd);
 1207         mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
 1208         kfree(xrcd);
 1209 
 1210         return 0;
 1211 }
 1212 
 1213 static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
 1214 {
 1215         struct mlx4_ib_qp *mqp = to_mqp(ibqp);
 1216         struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
 1217         struct mlx4_ib_gid_entry *ge;
 1218 
 1219         ge = kzalloc(sizeof *ge, GFP_KERNEL);
 1220         if (!ge)
 1221                 return -ENOMEM;
 1222 
 1223         ge->gid = *gid;
 1224         if (mlx4_ib_add_mc(mdev, mqp, gid)) {
 1225                 ge->port = mqp->port;
 1226                 ge->added = 1;
 1227         }
 1228 
 1229         mutex_lock(&mqp->mutex);
 1230         list_add_tail(&ge->list, &mqp->gid_list);
 1231         mutex_unlock(&mqp->mutex);
 1232 
 1233         return 0;
 1234 }
 1235 
 1236 static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev,
 1237                                           struct mlx4_ib_counters *ctr_table)
 1238 {
 1239         struct counter_index *counter, *tmp_count;
 1240 
 1241         mutex_lock(&ctr_table->mutex);
 1242         list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list,
 1243                                  list) {
 1244                 if (counter->allocated)
 1245                         mlx4_counter_free(ibdev->dev, counter->index);
 1246                 list_del(&counter->list);
 1247                 kfree(counter);
 1248         }
 1249         mutex_unlock(&ctr_table->mutex);
 1250 }
 1251 
 1252 int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
 1253                    union ib_gid *gid)
 1254 {
 1255         struct ifnet *ndev;
 1256         int ret = 0;
 1257 
 1258         if (!mqp->port)
 1259                 return 0;
 1260 
 1261         spin_lock_bh(&mdev->iboe.lock);
 1262         ndev = mdev->iboe.netdevs[mqp->port - 1];
 1263         if (ndev)
 1264                 if_ref(ndev);
 1265         spin_unlock_bh(&mdev->iboe.lock);
 1266 
 1267         if (ndev) {
 1268                 ret = 1;
 1269                 if_rele(ndev);
 1270         }
 1271 
 1272         return ret;
 1273 }
 1274 
 1275 struct mlx4_ib_steering {
 1276         struct list_head list;
 1277         struct mlx4_flow_reg_id reg_id;
 1278         union ib_gid gid;
 1279 };
 1280 
 1281 #define LAST_ETH_FIELD vlan_tag
 1282 #define LAST_IB_FIELD sl
 1283 #define LAST_IPV4_FIELD dst_ip
 1284 #define LAST_TCP_UDP_FIELD src_port
 1285 
 1286 /* Field is the last supported field */
 1287 #define FIELDS_NOT_SUPPORTED(filter, field)\
 1288         memchr_inv((void *)&filter.field  +\
 1289                    sizeof(filter.field), 0,\
 1290                    sizeof(filter) -\
 1291                    offsetof(typeof(filter), field) -\
 1292                    sizeof(filter.field))
 1293 
 1294 static int parse_flow_attr(struct mlx4_dev *dev,
 1295                            u32 qp_num,
 1296                            union ib_flow_spec *ib_spec,
 1297                            struct _rule_hw *mlx4_spec)
 1298 {
 1299         enum mlx4_net_trans_rule_id type;
 1300 
 1301         switch (ib_spec->type) {
 1302         case IB_FLOW_SPEC_ETH:
 1303                 if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
 1304                         return -ENOTSUPP;
 1305 
 1306                 type = MLX4_NET_TRANS_RULE_ID_ETH;
 1307                 memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
 1308                        ETH_ALEN);
 1309                 memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac,
 1310                        ETH_ALEN);
 1311                 mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
 1312                 mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
 1313                 break;
 1314         case IB_FLOW_SPEC_IB:
 1315                 if (FIELDS_NOT_SUPPORTED(ib_spec->ib.mask, LAST_IB_FIELD))
 1316                         return -ENOTSUPP;
 1317 
 1318                 type = MLX4_NET_TRANS_RULE_ID_IB;
 1319                 mlx4_spec->ib.l3_qpn =
 1320                         cpu_to_be32(qp_num);
 1321                 mlx4_spec->ib.qpn_mask =
 1322                         cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
 1323                 break;
 1324 
 1325 
 1326         case IB_FLOW_SPEC_IPV4:
 1327                 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
 1328                         return -ENOTSUPP;
 1329 
 1330                 type = MLX4_NET_TRANS_RULE_ID_IPV4;
 1331                 mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
 1332                 mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
 1333                 mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip;
 1334                 mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip;
 1335                 break;
 1336 
 1337         case IB_FLOW_SPEC_TCP:
 1338         case IB_FLOW_SPEC_UDP:
 1339                 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD))
 1340                         return -ENOTSUPP;
 1341 
 1342                 type = ib_spec->type == IB_FLOW_SPEC_TCP ?
 1343                                         MLX4_NET_TRANS_RULE_ID_TCP :
 1344                                         MLX4_NET_TRANS_RULE_ID_UDP;
 1345                 mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port;
 1346                 mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port;
 1347                 mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port;
 1348                 mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port;
 1349                 break;
 1350 
 1351         default:
 1352                 return -EINVAL;
 1353         }
 1354         if (mlx4_map_sw_to_hw_steering_id(dev, type) < 0 ||
 1355             mlx4_hw_rule_sz(dev, type) < 0)
 1356                 return -EINVAL;
 1357         mlx4_spec->id = cpu_to_be16(mlx4_map_sw_to_hw_steering_id(dev, type));
 1358         mlx4_spec->size = mlx4_hw_rule_sz(dev, type) >> 2;
 1359         return mlx4_hw_rule_sz(dev, type);
 1360 }
 1361 
 1362 struct default_rules {
 1363         __u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
 1364         __u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
 1365         __u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
 1366         __u8  link_layer;
 1367 };
 1368 static const struct default_rules default_table[] = {
 1369         {
 1370                 .mandatory_fields = {IB_FLOW_SPEC_IPV4},
 1371                 .mandatory_not_fields = {IB_FLOW_SPEC_ETH},
 1372                 .rules_create_list = {IB_FLOW_SPEC_IB},
 1373                 .link_layer = IB_LINK_LAYER_INFINIBAND
 1374         }
 1375 };
 1376 
 1377 static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
 1378                                          struct ib_flow_attr *flow_attr)
 1379 {
 1380         int i, j, k;
 1381         void *ib_flow;
 1382         const struct default_rules *pdefault_rules = default_table;
 1383         u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
 1384 
 1385         for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) {
 1386                 __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
 1387                 memset(&field_types, 0, sizeof(field_types));
 1388 
 1389                 if (link_layer != pdefault_rules->link_layer)
 1390                         continue;
 1391 
 1392                 ib_flow = flow_attr + 1;
 1393                 /* we assume the specs are sorted */
 1394                 for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
 1395                      j < flow_attr->num_of_specs; k++) {
 1396                         union ib_flow_spec *current_flow =
 1397                                 (union ib_flow_spec *)ib_flow;
 1398 
 1399                         /* same layer but different type */
 1400                         if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
 1401                              (pdefault_rules->mandatory_fields[k] &
 1402                               IB_FLOW_SPEC_LAYER_MASK)) &&
 1403                             (current_flow->type !=
 1404                              pdefault_rules->mandatory_fields[k]))
 1405                                 goto out;
 1406 
 1407                         /* same layer, try match next one */
 1408                         if (current_flow->type ==
 1409                             pdefault_rules->mandatory_fields[k]) {
 1410                                 j++;
 1411                                 ib_flow +=
 1412                                         ((union ib_flow_spec *)ib_flow)->size;
 1413                         }
 1414                 }
 1415 
 1416                 ib_flow = flow_attr + 1;
 1417                 for (j = 0; j < flow_attr->num_of_specs;
 1418                      j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
 1419                         for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
 1420                                 /* same layer and same type */
 1421                                 if (((union ib_flow_spec *)ib_flow)->type ==
 1422                                     pdefault_rules->mandatory_not_fields[k])
 1423                                         goto out;
 1424 
 1425                 return i;
 1426         }
 1427 out:
 1428         return -1;
 1429 }
 1430 
 1431 static int __mlx4_ib_create_default_rules(
 1432                 struct mlx4_ib_dev *mdev,
 1433                 struct ib_qp *qp,
 1434                 const struct default_rules *pdefault_rules,
 1435                 struct _rule_hw *mlx4_spec) {
 1436         int size = 0;
 1437         int i;
 1438 
 1439         for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) {
 1440                 int ret;
 1441                 union ib_flow_spec ib_spec;
 1442                 switch (pdefault_rules->rules_create_list[i]) {
 1443                 case 0:
 1444                         /* no rule */
 1445                         continue;
 1446                 case IB_FLOW_SPEC_IB:
 1447                         ib_spec.type = IB_FLOW_SPEC_IB;
 1448                         ib_spec.size = sizeof(struct ib_flow_spec_ib);
 1449 
 1450                         break;
 1451                 default:
 1452                         /* invalid rule */
 1453                         return -EINVAL;
 1454                 }
 1455                 /* We must put empty rule, qpn is being ignored */
 1456                 ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
 1457                                       mlx4_spec);
 1458                 if (ret < 0) {
 1459                         pr_info("invalid parsing\n");
 1460                         return -EINVAL;
 1461                 }
 1462 
 1463                 mlx4_spec = (void *)mlx4_spec + ret;
 1464                 size += ret;
 1465         }
 1466         return size;
 1467 }
 1468 
 1469 static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
 1470                           int domain,
 1471                           enum mlx4_net_trans_promisc_mode flow_type,
 1472                           u64 *reg_id)
 1473 {
 1474         int ret, i;
 1475         int size = 0;
 1476         void *ib_flow;
 1477         struct mlx4_ib_dev *mdev = to_mdev(qp->device);
 1478         struct mlx4_cmd_mailbox *mailbox;
 1479         struct mlx4_net_trans_rule_hw_ctrl *ctrl;
 1480         int default_flow;
 1481 
 1482         static const u16 __mlx4_domain[] = {
 1483                 [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
 1484                 [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
 1485                 [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
 1486                 [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
 1487         };
 1488 
 1489         if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
 1490                 pr_err("Invalid priority value %d\n", flow_attr->priority);
 1491                 return -EINVAL;
 1492         }
 1493 
 1494         if (domain >= IB_FLOW_DOMAIN_NUM) {
 1495                 pr_err("Invalid domain value %d\n", domain);
 1496                 return -EINVAL;
 1497         }
 1498 
 1499         if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
 1500                 return -EINVAL;
 1501 
 1502         mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
 1503         if (IS_ERR(mailbox))
 1504                 return PTR_ERR(mailbox);
 1505         ctrl = mailbox->buf;
 1506 
 1507         ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
 1508                                  flow_attr->priority);
 1509         ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
 1510         ctrl->port = flow_attr->port;
 1511         ctrl->qpn = cpu_to_be32(qp->qp_num);
 1512 
 1513         ib_flow = flow_attr + 1;
 1514         size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
 1515         /* Add default flows */
 1516         default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
 1517         if (default_flow >= 0) {
 1518                 ret = __mlx4_ib_create_default_rules(
 1519                                 mdev, qp, default_table + default_flow,
 1520                                 mailbox->buf + size);
 1521                 if (ret < 0) {
 1522                         mlx4_free_cmd_mailbox(mdev->dev, mailbox);
 1523                         return -EINVAL;
 1524                 }
 1525                 size += ret;
 1526         }
 1527         for (i = 0; i < flow_attr->num_of_specs; i++) {
 1528                 ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
 1529                                       mailbox->buf + size);
 1530                 if (ret < 0) {
 1531                         mlx4_free_cmd_mailbox(mdev->dev, mailbox);
 1532                         return -EINVAL;
 1533                 }
 1534                 ib_flow += ((union ib_flow_spec *) ib_flow)->size;
 1535                 size += ret;
 1536         }
 1537 
 1538         ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
 1539                            MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
 1540                            MLX4_CMD_WRAPPED);
 1541         if (ret == -ENOMEM)
 1542                 pr_err("mcg table is full. Fail to register network rule.\n");
 1543         else if (ret == -ENXIO)
 1544                 pr_err("Device managed flow steering is disabled. Fail to register network rule.\n");
 1545         else if (ret)
 1546                 pr_err("Invalid argument. Fail to register network rule.\n");
 1547 
 1548         mlx4_free_cmd_mailbox(mdev->dev, mailbox);
 1549         return ret;
 1550 }
 1551 
 1552 static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
 1553 {
 1554         int err;
 1555         err = mlx4_cmd(dev, reg_id, 0, 0,
 1556                        MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
 1557                        MLX4_CMD_WRAPPED);
 1558         if (err)
 1559                 pr_err("Fail to detach network rule. registration id = 0x%llx\n",
 1560                        (long long)reg_id);
 1561         return err;
 1562 }
 1563 
 1564 static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
 1565                                     u64 *reg_id)
 1566 {
 1567         void *ib_flow;
 1568         union ib_flow_spec *ib_spec;
 1569         struct mlx4_dev *dev = to_mdev(qp->device)->dev;
 1570         int err = 0;
 1571 
 1572         if (dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN ||
 1573             dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC)
 1574                 return 0; /* do nothing */
 1575 
 1576         ib_flow = flow_attr + 1;
 1577         ib_spec = (union ib_flow_spec *)ib_flow;
 1578 
 1579         if (ib_spec->type !=  IB_FLOW_SPEC_ETH || flow_attr->num_of_specs != 1)
 1580                 return 0; /* do nothing */
 1581 
 1582         err = mlx4_tunnel_steer_add(to_mdev(qp->device)->dev, ib_spec->eth.val.dst_mac,
 1583                                     flow_attr->port, qp->qp_num,
 1584                                     MLX4_DOMAIN_UVERBS | (flow_attr->priority & 0xff),
 1585                                     reg_id);
 1586         return err;
 1587 }
 1588 
 1589 static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
 1590                                       struct ib_flow_attr *flow_attr,
 1591                                       enum mlx4_net_trans_promisc_mode *type)
 1592 {
 1593         int err = 0;
 1594 
 1595         if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER) ||
 1596             (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) ||
 1597             (flow_attr->num_of_specs > 1) || (flow_attr->priority != 0)) {
 1598                 return -EOPNOTSUPP;
 1599         }
 1600 
 1601         if (flow_attr->num_of_specs == 0) {
 1602                 type[0] = MLX4_FS_MC_SNIFFER;
 1603                 type[1] = MLX4_FS_UC_SNIFFER;
 1604         } else {
 1605                 union ib_flow_spec *ib_spec;
 1606 
 1607                 ib_spec = (union ib_flow_spec *)(flow_attr + 1);
 1608                 if (ib_spec->type !=  IB_FLOW_SPEC_ETH)
 1609                         return -EINVAL;
 1610 
 1611                 /* if all is zero than MC and UC */
 1612                 if (is_zero_ether_addr(ib_spec->eth.mask.dst_mac)) {
 1613                         type[0] = MLX4_FS_MC_SNIFFER;
 1614                         type[1] = MLX4_FS_UC_SNIFFER;
 1615                 } else {
 1616                         u8 mac[ETH_ALEN] = {ib_spec->eth.mask.dst_mac[0] ^ 0x01,
 1617                                             ib_spec->eth.mask.dst_mac[1],
 1618                                             ib_spec->eth.mask.dst_mac[2],
 1619                                             ib_spec->eth.mask.dst_mac[3],
 1620                                             ib_spec->eth.mask.dst_mac[4],
 1621                                             ib_spec->eth.mask.dst_mac[5]};
 1622 
 1623                         /* Above xor was only on MC bit, non empty mask is valid
 1624                          * only if this bit is set and rest are zero.
 1625                          */
 1626                         if (!is_zero_ether_addr(&mac[0]))
 1627                                 return -EINVAL;
 1628 
 1629                         if (is_multicast_ether_addr(ib_spec->eth.val.dst_mac))
 1630                                 type[0] = MLX4_FS_MC_SNIFFER;
 1631                         else
 1632                                 type[0] = MLX4_FS_UC_SNIFFER;
 1633                 }
 1634         }
 1635 
 1636         return err;
 1637 }
 1638 
 1639 static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 1640                                     struct ib_flow_attr *flow_attr,
 1641                                     int domain, struct ib_udata *udata)
 1642 {
 1643         int err = 0, i = 0, j = 0;
 1644         struct mlx4_ib_flow *mflow;
 1645         enum mlx4_net_trans_promisc_mode type[2];
 1646         struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
 1647         int is_bonded = mlx4_is_bonded(dev);
 1648 
 1649         if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt)
 1650                 return ERR_PTR(-EINVAL);
 1651 
 1652         if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
 1653             (flow_attr->type != IB_FLOW_ATTR_NORMAL))
 1654                 return ERR_PTR(-EOPNOTSUPP);
 1655 
 1656         if (udata &&
 1657             udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
 1658                 return ERR_PTR(-EOPNOTSUPP);
 1659 
 1660         memset(type, 0, sizeof(type));
 1661 
 1662         mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
 1663         if (!mflow) {
 1664                 err = -ENOMEM;
 1665                 goto err_free;
 1666         }
 1667 
 1668         switch (flow_attr->type) {
 1669         case IB_FLOW_ATTR_NORMAL:
 1670                 /* If dont trap flag (continue match) is set, under specific
 1671                  * condition traffic be replicated to given qp,
 1672                  * without stealing it
 1673                  */
 1674                 if (unlikely(flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)) {
 1675                         err = mlx4_ib_add_dont_trap_rule(dev,
 1676                                                          flow_attr,
 1677                                                          type);
 1678                         if (err)
 1679                                 goto err_free;
 1680                 } else {
 1681                         type[0] = MLX4_FS_REGULAR;
 1682                 }
 1683                 break;
 1684 
 1685         case IB_FLOW_ATTR_ALL_DEFAULT:
 1686                 type[0] = MLX4_FS_ALL_DEFAULT;
 1687                 break;
 1688 
 1689         case IB_FLOW_ATTR_MC_DEFAULT:
 1690                 type[0] = MLX4_FS_MC_DEFAULT;
 1691                 break;
 1692 
 1693         case IB_FLOW_ATTR_SNIFFER:
 1694                 type[0] = MLX4_FS_MIRROR_RX_PORT;
 1695                 type[1] = MLX4_FS_MIRROR_SX_PORT;
 1696                 break;
 1697 
 1698         default:
 1699                 err = -EINVAL;
 1700                 goto err_free;
 1701         }
 1702 
 1703         while (i < ARRAY_SIZE(type) && type[i]) {
 1704                 err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
 1705                                             &mflow->reg_id[i].id);
 1706                 if (err)
 1707                         goto err_create_flow;
 1708                 if (is_bonded) {
 1709                         /* Application always sees one port so the mirror rule
 1710                          * must be on port #2
 1711                          */
 1712                         flow_attr->port = 2;
 1713                         err = __mlx4_ib_create_flow(qp, flow_attr,
 1714                                                     domain, type[j],
 1715                                                     &mflow->reg_id[j].mirror);
 1716                         flow_attr->port = 1;
 1717                         if (err)
 1718                                 goto err_create_flow;
 1719                         j++;
 1720                 }
 1721 
 1722                 i++;
 1723         }
 1724 
 1725         if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) {
 1726                 err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
 1727                                                &mflow->reg_id[i].id);
 1728                 if (err)
 1729                         goto err_create_flow;
 1730 
 1731                 if (is_bonded) {
 1732                         flow_attr->port = 2;
 1733                         err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
 1734                                                        &mflow->reg_id[j].mirror);
 1735                         flow_attr->port = 1;
 1736                         if (err)
 1737                                 goto err_create_flow;
 1738                         j++;
 1739                 }
 1740                 /* function to create mirror rule */
 1741                 i++;
 1742         }
 1743 
 1744         return &mflow->ibflow;
 1745 
 1746 err_create_flow:
 1747         while (i) {
 1748                 (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
 1749                                              mflow->reg_id[i].id);
 1750                 i--;
 1751         }
 1752 
 1753         while (j) {
 1754                 (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
 1755                                              mflow->reg_id[j].mirror);
 1756                 j--;
 1757         }
 1758 err_free:
 1759         kfree(mflow);
 1760         return ERR_PTR(err);
 1761 }
 1762 
 1763 static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
 1764 {
 1765         int err, ret = 0;
 1766         int i = 0;
 1767         struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
 1768         struct mlx4_ib_flow *mflow = to_mflow(flow_id);
 1769 
 1770         while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i].id) {
 1771                 err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i].id);
 1772                 if (err)
 1773                         ret = err;
 1774                 if (mflow->reg_id[i].mirror) {
 1775                         err = __mlx4_ib_destroy_flow(mdev->dev,
 1776                                                      mflow->reg_id[i].mirror);
 1777                         if (err)
 1778                                 ret = err;
 1779                 }
 1780                 i++;
 1781         }
 1782 
 1783         kfree(mflow);
 1784         return ret;
 1785 }
 1786 
 1787 static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 1788 {
 1789         int err;
 1790         struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
 1791         struct mlx4_dev *dev = mdev->dev;
 1792         struct mlx4_ib_qp *mqp = to_mqp(ibqp);
 1793         struct mlx4_ib_steering *ib_steering = NULL;
 1794         enum mlx4_protocol prot = MLX4_PROT_IB_IPV6;
 1795         struct mlx4_flow_reg_id reg_id;
 1796 
 1797         if (mdev->dev->caps.steering_mode ==
 1798             MLX4_STEERING_MODE_DEVICE_MANAGED) {
 1799                 ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
 1800                 if (!ib_steering)
 1801                         return -ENOMEM;
 1802         }
 1803 
 1804         err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
 1805                                     !!(mqp->flags &
 1806                                        MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
 1807                                     prot, &reg_id.id);
 1808         if (err) {
 1809                 pr_err("multicast attach op failed, err %d\n", err);
 1810                 goto err_malloc;
 1811         }
 1812 
 1813         reg_id.mirror = 0;
 1814         if (mlx4_is_bonded(dev)) {
 1815                 err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw,
 1816                                             (mqp->port == 1) ? 2 : 1,
 1817                                             !!(mqp->flags &
 1818                                             MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
 1819                                             prot, &reg_id.mirror);
 1820                 if (err)
 1821                         goto err_add;
 1822         }
 1823 
 1824         err = add_gid_entry(ibqp, gid);
 1825         if (err)
 1826                 goto err_add;
 1827 
 1828         if (ib_steering) {
 1829                 memcpy(ib_steering->gid.raw, gid->raw, 16);
 1830                 ib_steering->reg_id = reg_id;
 1831                 mutex_lock(&mqp->mutex);
 1832                 list_add(&ib_steering->list, &mqp->steering_rules);
 1833                 mutex_unlock(&mqp->mutex);
 1834         }
 1835         return 0;
 1836 
 1837 err_add:
 1838         mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
 1839                               prot, reg_id.id);
 1840         if (reg_id.mirror)
 1841                 mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
 1842                                       prot, reg_id.mirror);
 1843 err_malloc:
 1844         kfree(ib_steering);
 1845 
 1846         return err;
 1847 }
 1848 
 1849 static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
 1850 {
 1851         struct mlx4_ib_gid_entry *ge;
 1852         struct mlx4_ib_gid_entry *tmp;
 1853         struct mlx4_ib_gid_entry *ret = NULL;
 1854 
 1855         list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
 1856                 if (!memcmp(raw, ge->gid.raw, 16)) {
 1857                         ret = ge;
 1858                         break;
 1859                 }
 1860         }
 1861 
 1862         return ret;
 1863 }
 1864 
 1865 static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 1866 {
 1867         int err;
 1868         struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
 1869         struct mlx4_dev *dev = mdev->dev;
 1870         struct mlx4_ib_qp *mqp = to_mqp(ibqp);
 1871         struct ifnet *ndev;
 1872         struct mlx4_ib_gid_entry *ge;
 1873         struct mlx4_flow_reg_id reg_id = {0, 0};
 1874         enum mlx4_protocol prot =  MLX4_PROT_IB_IPV6;
 1875 
 1876         if (mdev->dev->caps.steering_mode ==
 1877             MLX4_STEERING_MODE_DEVICE_MANAGED) {
 1878                 struct mlx4_ib_steering *ib_steering;
 1879 
 1880                 mutex_lock(&mqp->mutex);
 1881                 list_for_each_entry(ib_steering, &mqp->steering_rules, list) {
 1882                         if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
 1883                                 list_del(&ib_steering->list);
 1884                                 break;
 1885                         }
 1886                 }
 1887                 mutex_unlock(&mqp->mutex);
 1888                 if (&ib_steering->list == &mqp->steering_rules) {
 1889                         pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
 1890                         return -EINVAL;
 1891                 }
 1892                 reg_id = ib_steering->reg_id;
 1893                 kfree(ib_steering);
 1894         }
 1895 
 1896         err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
 1897                                     prot, reg_id.id);
 1898         if (err)
 1899                 return err;
 1900 
 1901         if (mlx4_is_bonded(dev)) {
 1902                 err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
 1903                                             prot, reg_id.mirror);
 1904                 if (err)
 1905                         return err;
 1906         }
 1907 
 1908         mutex_lock(&mqp->mutex);
 1909         ge = find_gid_entry(mqp, gid->raw);
 1910         if (ge) {
 1911                 spin_lock_bh(&mdev->iboe.lock);
 1912                 ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
 1913                 if (ndev)
 1914                         if_ref(ndev);
 1915                 spin_unlock_bh(&mdev->iboe.lock);
 1916                 if (ndev)
 1917                         if_rele(ndev);
 1918                 list_del(&ge->list);
 1919                 kfree(ge);
 1920         } else
 1921                 pr_warn("could not find mgid entry\n");
 1922 
 1923         mutex_unlock(&mqp->mutex);
 1924 
 1925         return 0;
 1926 }
 1927 
 1928 static int init_node_data(struct mlx4_ib_dev *dev)
 1929 {
 1930         struct ib_smp *in_mad  = NULL;
 1931         struct ib_smp *out_mad = NULL;
 1932         int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
 1933         int err = -ENOMEM;
 1934 
 1935         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 1936         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 1937         if (!in_mad || !out_mad)
 1938                 goto out;
 1939 
 1940         init_query_mad(in_mad);
 1941         in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
 1942         if (mlx4_is_master(dev->dev))
 1943                 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
 1944 
 1945         err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
 1946         if (err)
 1947                 goto out;
 1948 
 1949         memcpy(dev->ib_dev.node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX);
 1950 
 1951         in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
 1952 
 1953         err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
 1954         if (err)
 1955                 goto out;
 1956 
 1957         dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
 1958         memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
 1959 
 1960 out:
 1961         kfree(in_mad);
 1962         kfree(out_mad);
 1963         return err;
 1964 }
 1965 
 1966 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
 1967                         char *buf)
 1968 {
 1969         struct mlx4_ib_dev *dev =
 1970                 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
 1971         return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
 1972 }
 1973 
 1974 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
 1975                         char *buf)
 1976 {
 1977         struct mlx4_ib_dev *dev =
 1978                 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
 1979         return sprintf(buf, "%x\n", dev->dev->rev_id);
 1980 }
 1981 
 1982 static ssize_t show_board(struct device *device, struct device_attribute *attr,
 1983                           char *buf)
 1984 {
 1985         struct mlx4_ib_dev *dev =
 1986                 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
 1987         return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
 1988                        dev->dev->board_id);
 1989 }
 1990 
 1991 static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
 1992 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
 1993 static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
 1994 
 1995 static struct device_attribute *mlx4_class_attributes[] = {
 1996         &dev_attr_hw_rev,
 1997         &dev_attr_hca_type,
 1998         &dev_attr_board_id
 1999 };
 2000 
 2001 struct diag_counter {
 2002         const char *name;
 2003         u32 offset;
 2004 };
 2005 
 2006 #define DIAG_COUNTER(_name, _offset)                    \
 2007         { .name = #_name, .offset = _offset }
 2008 
 2009 static const struct diag_counter diag_basic[] = {
 2010         DIAG_COUNTER(rq_num_lle, 0x00),
 2011         DIAG_COUNTER(sq_num_lle, 0x04),
 2012         DIAG_COUNTER(rq_num_lqpoe, 0x08),
 2013         DIAG_COUNTER(sq_num_lqpoe, 0x0C),
 2014         DIAG_COUNTER(rq_num_lpe, 0x18),
 2015         DIAG_COUNTER(sq_num_lpe, 0x1C),
 2016         DIAG_COUNTER(rq_num_wrfe, 0x20),
 2017         DIAG_COUNTER(sq_num_wrfe, 0x24),
 2018         DIAG_COUNTER(sq_num_mwbe, 0x2C),
 2019         DIAG_COUNTER(sq_num_bre, 0x34),
 2020         DIAG_COUNTER(sq_num_rire, 0x44),
 2021         DIAG_COUNTER(rq_num_rire, 0x48),
 2022         DIAG_COUNTER(sq_num_rae, 0x4C),
 2023         DIAG_COUNTER(rq_num_rae, 0x50),
 2024         DIAG_COUNTER(sq_num_roe, 0x54),
 2025         DIAG_COUNTER(sq_num_tree, 0x5C),
 2026         DIAG_COUNTER(sq_num_rree, 0x64),
 2027         DIAG_COUNTER(rq_num_rnr, 0x68),
 2028         DIAG_COUNTER(sq_num_rnr, 0x6C),
 2029         DIAG_COUNTER(rq_num_oos, 0x100),
 2030         DIAG_COUNTER(sq_num_oos, 0x104),
 2031 };
 2032 
 2033 static const struct diag_counter diag_ext[] = {
 2034         DIAG_COUNTER(rq_num_dup, 0x130),
 2035         DIAG_COUNTER(sq_num_to, 0x134),
 2036 };
 2037 
 2038 static const struct diag_counter diag_device_only[] = {
 2039         DIAG_COUNTER(num_cqovf, 0x1A0),
 2040         DIAG_COUNTER(rq_num_udsdprd, 0x118),
 2041 };
 2042 
 2043 static struct rdma_hw_stats *mlx4_ib_alloc_hw_stats(struct ib_device *ibdev,
 2044                                                     u8 port_num)
 2045 {
 2046         struct mlx4_ib_dev *dev = to_mdev(ibdev);
 2047         struct mlx4_ib_diag_counters *diag = dev->diag_counters;
 2048 
 2049         if (!diag[!!port_num].name)
 2050                 return NULL;
 2051 
 2052         return rdma_alloc_hw_stats_struct(diag[!!port_num].name,
 2053                                           diag[!!port_num].num_counters,
 2054                                           RDMA_HW_STATS_DEFAULT_LIFESPAN);
 2055 }
 2056 
 2057 static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,
 2058                                 struct rdma_hw_stats *stats,
 2059                                 u8 port, int index)
 2060 {
 2061         struct mlx4_ib_dev *dev = to_mdev(ibdev);
 2062         struct mlx4_ib_diag_counters *diag = dev->diag_counters;
 2063         u32 hw_value[ARRAY_SIZE(diag_device_only) +
 2064                 ARRAY_SIZE(diag_ext) + ARRAY_SIZE(diag_basic)] = {};
 2065         int ret;
 2066         int i;
 2067 
 2068         ret = mlx4_query_diag_counters(dev->dev,
 2069                                        MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS,
 2070                                        diag[!!port].offset, hw_value,
 2071                                        diag[!!port].num_counters, port);
 2072 
 2073         if (ret)
 2074                 return ret;
 2075 
 2076         for (i = 0; i < diag[!!port].num_counters; i++)
 2077                 stats->value[i] = hw_value[i];
 2078 
 2079         return diag[!!port].num_counters;
 2080 }
 2081 
 2082 static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
 2083                                          const char ***name,
 2084                                          u32 **offset,
 2085                                          u32 *num,
 2086                                          bool port)
 2087 {
 2088         u32 num_counters;
 2089 
 2090         num_counters = ARRAY_SIZE(diag_basic);
 2091 
 2092         if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT)
 2093                 num_counters += ARRAY_SIZE(diag_ext);
 2094 
 2095         if (!port)
 2096                 num_counters += ARRAY_SIZE(diag_device_only);
 2097 
 2098         *name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL);
 2099         if (!*name)
 2100                 return -ENOMEM;
 2101 
 2102         *offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL);
 2103         if (!*offset)
 2104                 goto err_name;
 2105 
 2106         *num = num_counters;
 2107 
 2108         return 0;
 2109 
 2110 err_name:
 2111         kfree(*name);
 2112         return -ENOMEM;
 2113 }
 2114 
 2115 static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
 2116                                        const char **name,
 2117                                        u32 *offset,
 2118                                        bool port)
 2119 {
 2120         int i;
 2121         int j;
 2122 
 2123         for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) {
 2124                 name[i] = diag_basic[i].name;
 2125                 offset[i] = diag_basic[i].offset;
 2126         }
 2127 
 2128         if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) {
 2129                 for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) {
 2130                         name[j] = diag_ext[i].name;
 2131                         offset[j] = diag_ext[i].offset;
 2132                 }
 2133         }
 2134 
 2135         if (!port) {
 2136                 for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) {
 2137                         name[j] = diag_device_only[i].name;
 2138                         offset[j] = diag_device_only[i].offset;
 2139                 }
 2140         }
 2141 }
 2142 
 2143 static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
 2144 {
 2145         struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;
 2146         int i;
 2147         int ret;
 2148         bool per_port = !!(ibdev->dev->caps.flags2 &
 2149                 MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
 2150 
 2151         if (mlx4_is_slave(ibdev->dev))
 2152                 return 0;
 2153 
 2154         for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
 2155                 /* i == 1 means we are building port counters */
 2156                 if (i && !per_port)
 2157                         continue;
 2158 
 2159                 ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name,
 2160                                                     &diag[i].offset,
 2161                                                     &diag[i].num_counters, i);
 2162                 if (ret)
 2163                         goto err_alloc;
 2164 
 2165                 mlx4_ib_fill_diag_counters(ibdev, diag[i].name,
 2166                                            diag[i].offset, i);
 2167         }
 2168 
 2169         ibdev->ib_dev.get_hw_stats      = mlx4_ib_get_hw_stats;
 2170         ibdev->ib_dev.alloc_hw_stats    = mlx4_ib_alloc_hw_stats;
 2171 
 2172         return 0;
 2173 
 2174 err_alloc:
 2175         if (i) {
 2176                 kfree(diag[i - 1].name);
 2177                 kfree(diag[i - 1].offset);
 2178         }
 2179 
 2180         return ret;
 2181 }
 2182 
 2183 static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev)
 2184 {
 2185         int i;
 2186 
 2187         for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
 2188                 kfree(ibdev->diag_counters[i].offset);
 2189                 kfree(ibdev->diag_counters[i].name);
 2190         }
 2191 }
 2192 
 2193 #define MLX4_IB_INVALID_MAC     ((u64)-1)
 2194 static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
 2195                                struct ifnet *dev,
 2196                                int port)
 2197 {
 2198         u64 new_smac = 0;
 2199         u64 release_mac = MLX4_IB_INVALID_MAC;
 2200         struct mlx4_ib_qp *qp;
 2201 
 2202         new_smac = mlx4_mac_to_u64(IF_LLADDR(dev));
 2203 
 2204         atomic64_set(&ibdev->iboe.mac[port - 1], new_smac);
 2205 
 2206         /* no need for update QP1 and mac registration in non-SRIOV */
 2207         if (!mlx4_is_mfunc(ibdev->dev))
 2208                 return;
 2209 
 2210         mutex_lock(&ibdev->qp1_proxy_lock[port - 1]);
 2211         qp = ibdev->qp1_proxy[port - 1];
 2212         if (qp) {
 2213                 int new_smac_index;
 2214                 u64 old_smac;
 2215                 struct mlx4_update_qp_params update_params;
 2216 
 2217                 mutex_lock(&qp->mutex);
 2218                 old_smac = qp->pri.smac;
 2219                 if (new_smac == old_smac)
 2220                         goto unlock;
 2221 
 2222                 new_smac_index = mlx4_register_mac(ibdev->dev, port, new_smac);
 2223 
 2224                 if (new_smac_index < 0)
 2225                         goto unlock;
 2226 
 2227                 update_params.smac_index = new_smac_index;
 2228                 if (mlx4_update_qp(ibdev->dev, qp->mqp.qpn, MLX4_UPDATE_QP_SMAC,
 2229                                    &update_params)) {
 2230                         release_mac = new_smac;
 2231                         goto unlock;
 2232                 }
 2233                 /* if old port was zero, no mac was yet registered for this QP */
 2234                 if (qp->pri.smac_port)
 2235                         release_mac = old_smac;
 2236                 qp->pri.smac = new_smac;
 2237                 qp->pri.smac_port = port;
 2238                 qp->pri.smac_index = new_smac_index;
 2239         }
 2240 
 2241 unlock:
 2242         if (release_mac != MLX4_IB_INVALID_MAC)
 2243                 mlx4_unregister_mac(ibdev->dev, port, release_mac);
 2244         if (qp)
 2245                 mutex_unlock(&qp->mutex);
 2246         mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
 2247 }
 2248 
 2249 static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
 2250                                  struct ifnet *dev,
 2251                                  unsigned long event)
 2252 
 2253 {
 2254         struct mlx4_ib_iboe *iboe;
 2255         int update_qps_port = -1;
 2256         int port;
 2257 
 2258         iboe = &ibdev->iboe;
 2259 
 2260         spin_lock_bh(&iboe->lock);
 2261         mlx4_foreach_ib_transport_port(port, ibdev->dev) {
 2262 
 2263                 iboe->netdevs[port - 1] =
 2264                         mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
 2265 
 2266                 if (dev == iboe->netdevs[port - 1] &&
 2267                     (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
 2268                      event == NETDEV_UP || event == NETDEV_CHANGE))
 2269                         update_qps_port = port;
 2270 
 2271         }
 2272         spin_unlock_bh(&iboe->lock);
 2273 
 2274         if (update_qps_port > 0)
 2275                 mlx4_ib_update_qps(ibdev, dev, update_qps_port);
 2276 }
 2277 
 2278 static int mlx4_ib_netdev_event(struct notifier_block *this,
 2279                                 unsigned long event, void *ptr)
 2280 {
 2281         struct ifnet *dev = netdev_notifier_info_to_ifp(ptr);
 2282         struct mlx4_ib_dev *ibdev;
 2283 
 2284         if (dev->if_vnet != &init_net)
 2285                 return NOTIFY_DONE;
 2286 
 2287         ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
 2288         mlx4_ib_scan_netdevs(ibdev, dev, event);
 2289 
 2290         return NOTIFY_DONE;
 2291 }
 2292 
 2293 static void init_pkeys(struct mlx4_ib_dev *ibdev)
 2294 {
 2295         int port;
 2296         int slave;
 2297         int i;
 2298 
 2299         if (mlx4_is_master(ibdev->dev)) {
 2300                 for (slave = 0; slave <= ibdev->dev->persist->num_vfs;
 2301                      ++slave) {
 2302                         for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
 2303                                 for (i = 0;
 2304                                      i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
 2305                                      ++i) {
 2306                                         ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
 2307                                         /* master has the identity virt2phys pkey mapping */
 2308                                                 (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
 2309                                                         ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
 2310                                         mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
 2311                                                              ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
 2312                                 }
 2313                         }
 2314                 }
 2315                 /* initialize pkey cache */
 2316                 for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
 2317                         for (i = 0;
 2318                              i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
 2319                              ++i)
 2320                                 ibdev->pkeys.phys_pkey_cache[port-1][i] =
 2321                                         (i) ? 0 : 0xFFFF;
 2322                 }
 2323         }
 2324 }
 2325 
 2326 static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
 2327 {
 2328         int i, j, eq = 0, total_eqs = 0;
 2329 
 2330         ibdev->eq_table = kcalloc(dev->caps.num_comp_vectors,
 2331                                   sizeof(ibdev->eq_table[0]), GFP_KERNEL);
 2332         if (!ibdev->eq_table)
 2333                 return;
 2334 
 2335         for (i = 1; i <= dev->caps.num_ports; i++) {
 2336                 for (j = 0; j < mlx4_get_eqs_per_port(dev, i);
 2337                      j++, total_eqs++) {
 2338                         if (i > 1 &&  mlx4_is_eq_shared(dev, total_eqs))
 2339                                 continue;
 2340                         ibdev->eq_table[eq] = total_eqs;
 2341                         if (!mlx4_assign_eq(dev, i,
 2342                                             &ibdev->eq_table[eq]))
 2343                                 eq++;
 2344                         else
 2345                                 ibdev->eq_table[eq] = -1;
 2346                 }
 2347         }
 2348 
 2349         for (i = eq; i < dev->caps.num_comp_vectors;
 2350              ibdev->eq_table[i++] = -1)
 2351                 ;
 2352 
 2353         /* Advertise the new number of EQs to clients */
 2354         ibdev->ib_dev.num_comp_vectors = eq;
 2355 }
 2356 
 2357 static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
 2358 {
 2359         int i;
 2360         int total_eqs = ibdev->ib_dev.num_comp_vectors;
 2361 
 2362         /* no eqs were allocated */
 2363         if (!ibdev->eq_table)
 2364                 return;
 2365 
 2366         /* Reset the advertised EQ number */
 2367         ibdev->ib_dev.num_comp_vectors = 0;
 2368 
 2369         for (i = 0; i < total_eqs; i++)
 2370                 mlx4_release_eq(dev, ibdev->eq_table[i]);
 2371 
 2372         kfree(ibdev->eq_table);
 2373         ibdev->eq_table = NULL;
 2374 }
 2375 
 2376 static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
 2377                                struct ib_port_immutable *immutable)
 2378 {
 2379         struct ib_port_attr attr;
 2380         struct mlx4_ib_dev *mdev = to_mdev(ibdev);
 2381         int err;
 2382 
 2383         err = mlx4_ib_query_port(ibdev, port_num, &attr);
 2384         if (err)
 2385                 return err;
 2386 
 2387         immutable->pkey_tbl_len = attr.pkey_tbl_len;
 2388         immutable->gid_tbl_len = attr.gid_tbl_len;
 2389 
 2390         if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
 2391                 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
 2392         } else {
 2393                 if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
 2394                         immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
 2395                 if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
 2396                         immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
 2397                                 RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
 2398         }
 2399 
 2400         immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 2401 
 2402         return 0;
 2403 }
 2404 
 2405 static void get_fw_ver_str(struct ib_device *device, char *str,
 2406                            size_t str_len)
 2407 {
 2408         struct mlx4_ib_dev *dev =
 2409                 container_of(device, struct mlx4_ib_dev, ib_dev);
 2410         snprintf(str, str_len, "%d.%d.%d",
 2411                  (int) (dev->dev->caps.fw_ver >> 32),
 2412                  (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
 2413                  (int) dev->dev->caps.fw_ver & 0xffff);
 2414 }
 2415 
 2416 static void *mlx4_ib_add(struct mlx4_dev *dev)
 2417 {
 2418         struct mlx4_ib_dev *ibdev;
 2419         int num_ports;
 2420         int i, j;
 2421         int err;
 2422         struct mlx4_ib_iboe *iboe;
 2423         int ib_num_ports = 0;
 2424         int num_req_counters;
 2425         int allocated;
 2426         u32 counter_index;
 2427         struct counter_index *new_counter_index = NULL;
 2428 
 2429         pr_info_once("%s", mlx4_ib_version);
 2430 
 2431         num_ports = 0;
 2432         mlx4_foreach_ib_transport_port(i, dev)
 2433                 num_ports++;
 2434 
 2435         /* No point in registering a device with no ports... */
 2436         if (num_ports == 0)
 2437                 return NULL;
 2438 
 2439         ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
 2440         if (!ibdev) {
 2441                 dev_err(&dev->persist->pdev->dev,
 2442                         "Device struct alloc failed\n");
 2443                 return NULL;
 2444         }
 2445 
 2446         iboe = &ibdev->iboe;
 2447 
 2448         if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
 2449                 goto err_dealloc;
 2450 
 2451         if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
 2452                 goto err_pd;
 2453 
 2454         ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT,
 2455                                  PAGE_SIZE);
 2456         if (!ibdev->uar_map)
 2457                 goto err_uar;
 2458         MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
 2459 
 2460         ibdev->dev = dev;
 2461         ibdev->bond_next_port   = 0;
 2462 
 2463         INIT_IB_DEVICE_OPS(&ibdev->ib_dev.ops, mlx4, MLX4);
 2464         strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
 2465         ibdev->ib_dev.owner             = THIS_MODULE;
 2466         ibdev->ib_dev.node_type         = RDMA_NODE_IB_CA;
 2467         ibdev->ib_dev.local_dma_lkey    = dev->caps.reserved_lkey;
 2468         ibdev->num_ports                = num_ports;
 2469         ibdev->ib_dev.phys_port_cnt     = mlx4_is_bonded(dev) ?
 2470                                                 1 : ibdev->num_ports;
 2471         ibdev->ib_dev.num_comp_vectors  = dev->caps.num_comp_vectors;
 2472         ibdev->ib_dev.dma_device        = &dev->persist->pdev->dev;
 2473         ibdev->ib_dev.get_netdev        = mlx4_ib_get_netdev;
 2474         ibdev->ib_dev.add_gid           = mlx4_ib_add_gid;
 2475         ibdev->ib_dev.del_gid           = mlx4_ib_del_gid;
 2476 
 2477         if (dev->caps.userspace_caps)
 2478                 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
 2479         else
 2480                 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
 2481 
 2482         ibdev->ib_dev.uverbs_cmd_mask   =
 2483                 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
 2484                 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
 2485                 (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
 2486                 (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
 2487                 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
 2488                 (1ull << IB_USER_VERBS_CMD_REG_MR)              |
 2489                 (1ull << IB_USER_VERBS_CMD_REREG_MR)            |
 2490                 (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
 2491                 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
 2492                 (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
 2493                 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
 2494                 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
 2495                 (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
 2496                 (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
 2497                 (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
 2498                 (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
 2499                 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
 2500                 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
 2501                 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
 2502                 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
 2503                 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
 2504                 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
 2505                 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)         |
 2506                 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
 2507 
 2508         ibdev->ib_dev.query_device      = mlx4_ib_query_device;
 2509         ibdev->ib_dev.query_port        = mlx4_ib_query_port;
 2510         ibdev->ib_dev.get_link_layer    = mlx4_ib_port_link_layer;
 2511         ibdev->ib_dev.query_gid         = mlx4_ib_query_gid;
 2512         ibdev->ib_dev.query_pkey        = mlx4_ib_query_pkey;
 2513         ibdev->ib_dev.modify_device     = mlx4_ib_modify_device;
 2514         ibdev->ib_dev.modify_port       = mlx4_ib_modify_port;
 2515         ibdev->ib_dev.alloc_ucontext    = mlx4_ib_alloc_ucontext;
 2516         ibdev->ib_dev.dealloc_ucontext  = mlx4_ib_dealloc_ucontext;
 2517         ibdev->ib_dev.mmap              = mlx4_ib_mmap;
 2518         ibdev->ib_dev.alloc_pd          = mlx4_ib_alloc_pd;
 2519         ibdev->ib_dev.dealloc_pd        = mlx4_ib_dealloc_pd;
 2520         ibdev->ib_dev.create_ah         = mlx4_ib_create_ah;
 2521         ibdev->ib_dev.query_ah          = mlx4_ib_query_ah;
 2522         ibdev->ib_dev.destroy_ah        = mlx4_ib_destroy_ah;
 2523         ibdev->ib_dev.create_srq        = mlx4_ib_create_srq;
 2524         ibdev->ib_dev.modify_srq        = mlx4_ib_modify_srq;
 2525         ibdev->ib_dev.query_srq         = mlx4_ib_query_srq;
 2526         ibdev->ib_dev.destroy_srq       = mlx4_ib_destroy_srq;
 2527         ibdev->ib_dev.post_srq_recv     = mlx4_ib_post_srq_recv;
 2528         ibdev->ib_dev.create_qp         = mlx4_ib_create_qp;
 2529         ibdev->ib_dev.modify_qp         = mlx4_ib_modify_qp;
 2530         ibdev->ib_dev.query_qp          = mlx4_ib_query_qp;
 2531         ibdev->ib_dev.destroy_qp        = mlx4_ib_destroy_qp;
 2532         ibdev->ib_dev.post_send         = mlx4_ib_post_send;
 2533         ibdev->ib_dev.post_recv         = mlx4_ib_post_recv;
 2534         ibdev->ib_dev.create_cq         = mlx4_ib_create_cq;
 2535         ibdev->ib_dev.modify_cq         = mlx4_ib_modify_cq;
 2536         ibdev->ib_dev.resize_cq         = mlx4_ib_resize_cq;
 2537         ibdev->ib_dev.destroy_cq        = mlx4_ib_destroy_cq;
 2538         ibdev->ib_dev.poll_cq           = mlx4_ib_poll_cq;
 2539         ibdev->ib_dev.req_notify_cq     = mlx4_ib_arm_cq;
 2540         ibdev->ib_dev.get_dma_mr        = mlx4_ib_get_dma_mr;
 2541         ibdev->ib_dev.reg_user_mr       = mlx4_ib_reg_user_mr;
 2542         ibdev->ib_dev.rereg_user_mr     = mlx4_ib_rereg_user_mr;
 2543         ibdev->ib_dev.dereg_mr          = mlx4_ib_dereg_mr;
 2544         ibdev->ib_dev.alloc_mr          = mlx4_ib_alloc_mr;
 2545         ibdev->ib_dev.map_mr_sg         = mlx4_ib_map_mr_sg;
 2546         ibdev->ib_dev.attach_mcast      = mlx4_ib_mcg_attach;
 2547         ibdev->ib_dev.detach_mcast      = mlx4_ib_mcg_detach;
 2548         ibdev->ib_dev.process_mad       = mlx4_ib_process_mad;
 2549         ibdev->ib_dev.get_port_immutable = mlx4_port_immutable;
 2550         ibdev->ib_dev.get_dev_fw_str    = get_fw_ver_str;
 2551 
 2552         if (!mlx4_is_slave(ibdev->dev)) {
 2553                 ibdev->ib_dev.alloc_fmr         = mlx4_ib_fmr_alloc;
 2554                 ibdev->ib_dev.map_phys_fmr      = mlx4_ib_map_phys_fmr;
 2555                 ibdev->ib_dev.unmap_fmr         = mlx4_ib_unmap_fmr;
 2556                 ibdev->ib_dev.dealloc_fmr       = mlx4_ib_fmr_dealloc;
 2557         }
 2558 
 2559         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
 2560             dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
 2561                 ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
 2562                 ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
 2563 
 2564                 ibdev->ib_dev.uverbs_cmd_mask |=
 2565                         (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
 2566                         (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
 2567         }
 2568 
 2569         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
 2570                 ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
 2571                 ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
 2572                 ibdev->ib_dev.uverbs_cmd_mask |=
 2573                         (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
 2574                         (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
 2575         }
 2576 
 2577         if (check_flow_steering_support(dev)) {
 2578                 ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
 2579                 ibdev->ib_dev.create_flow       = mlx4_ib_create_flow;
 2580                 ibdev->ib_dev.destroy_flow      = mlx4_ib_destroy_flow;
 2581 
 2582                 ibdev->ib_dev.uverbs_ex_cmd_mask        |=
 2583                         (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
 2584                         (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
 2585         }
 2586 
 2587         ibdev->ib_dev.uverbs_ex_cmd_mask |=
 2588                 (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
 2589                 (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
 2590                 (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
 2591 
 2592         mlx4_ib_alloc_eqs(dev, ibdev);
 2593 
 2594         spin_lock_init(&iboe->lock);
 2595 
 2596         if (init_node_data(ibdev))
 2597                 goto err_map;
 2598         mlx4_init_sl2vl_tbl(ibdev);
 2599 
 2600         for (i = 0; i < ibdev->num_ports; ++i) {
 2601                 mutex_init(&ibdev->counters_table[i].mutex);
 2602                 INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
 2603         }
 2604 
 2605         num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
 2606         for (i = 0; i < num_req_counters; ++i) {
 2607                 mutex_init(&ibdev->qp1_proxy_lock[i]);
 2608                 allocated = 0;
 2609                 if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
 2610                                                 IB_LINK_LAYER_ETHERNET) {
 2611                         err = mlx4_counter_alloc(ibdev->dev, &counter_index);
 2612                         /* if failed to allocate a new counter, use default */
 2613                         if (err)
 2614                                 counter_index =
 2615                                         mlx4_get_default_counter_index(dev,
 2616                                                                        i + 1);
 2617                         else
 2618                                 allocated = 1;
 2619                 } else { /* IB_LINK_LAYER_INFINIBAND use the default counter */
 2620                         counter_index = mlx4_get_default_counter_index(dev,
 2621                                                                        i + 1);
 2622                 }
 2623                 new_counter_index = kmalloc(sizeof(*new_counter_index),
 2624                                             GFP_KERNEL);
 2625                 if (!new_counter_index) {
 2626                         if (allocated)
 2627                                 mlx4_counter_free(ibdev->dev, counter_index);
 2628                         goto err_counter;
 2629                 }
 2630                 new_counter_index->index = counter_index;
 2631                 new_counter_index->allocated = allocated;
 2632                 list_add_tail(&new_counter_index->list,
 2633                               &ibdev->counters_table[i].counters_list);
 2634                 ibdev->counters_table[i].default_counter = counter_index;
 2635                 pr_info("counter index %d for port %d allocated %d\n",
 2636                         counter_index, i + 1, allocated);
 2637         }
 2638         if (mlx4_is_bonded(dev))
 2639                 for (i = 1; i < ibdev->num_ports ; ++i) {
 2640                         new_counter_index =
 2641                                         kmalloc(sizeof(struct counter_index),
 2642                                                 GFP_KERNEL);
 2643                         if (!new_counter_index)
 2644                                 goto err_counter;
 2645                         new_counter_index->index = counter_index;
 2646                         new_counter_index->allocated = 0;
 2647                         list_add_tail(&new_counter_index->list,
 2648                                       &ibdev->counters_table[i].counters_list);
 2649                         ibdev->counters_table[i].default_counter =
 2650                                                                 counter_index;
 2651                 }
 2652 
 2653         mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
 2654                 ib_num_ports++;
 2655 
 2656         spin_lock_init(&ibdev->sm_lock);
 2657         mutex_init(&ibdev->cap_mask_mutex);
 2658         INIT_LIST_HEAD(&ibdev->qp_list);
 2659         spin_lock_init(&ibdev->reset_flow_resource_lock);
 2660 
 2661         if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED &&
 2662             ib_num_ports) {
 2663                 ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
 2664                 err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
 2665                                             MLX4_IB_UC_STEER_QPN_ALIGN,
 2666                                             &ibdev->steer_qpn_base, 0);
 2667                 if (err)
 2668                         goto err_counter;
 2669 
 2670                 ibdev->ib_uc_qpns_bitmap =
 2671                         kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
 2672                                 sizeof(long),
 2673                                 GFP_KERNEL);
 2674                 if (!ibdev->ib_uc_qpns_bitmap) {
 2675                         dev_err(&dev->persist->pdev->dev,
 2676                                 "bit map alloc failed\n");
 2677                         goto err_steer_qp_release;
 2678                 }
 2679 
 2680                 bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
 2681 
 2682                 err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
 2683                                 dev, ibdev->steer_qpn_base,
 2684                                 ibdev->steer_qpn_base +
 2685                                 ibdev->steer_qpn_count - 1);
 2686                 if (err)
 2687                         goto err_steer_free_bitmap;
 2688         }
 2689 
 2690         for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
 2691                 atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
 2692 
 2693         if (mlx4_ib_alloc_diag_counters(ibdev))
 2694                 goto err_steer_free_bitmap;
 2695 
 2696         if (ib_register_device(&ibdev->ib_dev, NULL))
 2697                 goto err_diag_counters;
 2698 
 2699         if (mlx4_ib_mad_init(ibdev))
 2700                 goto err_reg;
 2701 
 2702         if (mlx4_ib_init_sriov(ibdev))
 2703                 goto err_mad;
 2704 
 2705         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
 2706             dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
 2707                 if (!iboe->nb.notifier_call) {
 2708                         iboe->nb.notifier_call = mlx4_ib_netdev_event;
 2709                         err = register_netdevice_notifier(&iboe->nb);
 2710                         if (err) {
 2711                                 iboe->nb.notifier_call = NULL;
 2712                                 goto err_notif;
 2713                         }
 2714                 }
 2715                 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
 2716                         err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
 2717                         if (err) {
 2718                                 goto err_notif;
 2719                         }
 2720                 }
 2721         }
 2722 
 2723         for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
 2724                 if (device_create_file(&ibdev->ib_dev.dev,
 2725                                        mlx4_class_attributes[j]))
 2726                         goto err_notif;
 2727         }
 2728 
 2729         ibdev->ib_active = true;
 2730 
 2731         if (mlx4_is_mfunc(ibdev->dev))
 2732                 init_pkeys(ibdev);
 2733 
 2734         /* create paravirt contexts for any VFs which are active */
 2735         if (mlx4_is_master(ibdev->dev)) {
 2736                 for (j = 0; j < MLX4_MFUNC_MAX; j++) {
 2737                         if (j == mlx4_master_func_num(ibdev->dev))
 2738                                 continue;
 2739                         if (mlx4_is_slave_active(ibdev->dev, j))
 2740                                 do_slave_init(ibdev, j, 1);
 2741                 }
 2742         }
 2743         return ibdev;
 2744 
 2745 err_notif:
 2746         if (ibdev->iboe.nb.notifier_call) {
 2747                 if (unregister_netdevice_notifier(&ibdev->iboe.nb))
 2748                         pr_warn("failure unregistering notifier\n");
 2749                 ibdev->iboe.nb.notifier_call = NULL;
 2750         }
 2751         flush_workqueue(wq);
 2752 
 2753         mlx4_ib_close_sriov(ibdev);
 2754 
 2755 err_mad:
 2756         mlx4_ib_mad_cleanup(ibdev);
 2757 
 2758 err_reg:
 2759         ib_unregister_device(&ibdev->ib_dev);
 2760 
 2761 err_diag_counters:
 2762         mlx4_ib_diag_cleanup(ibdev);
 2763 
 2764 err_steer_free_bitmap:
 2765         kfree(ibdev->ib_uc_qpns_bitmap);
 2766 
 2767 err_steer_qp_release:
 2768         if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
 2769                 mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
 2770                                       ibdev->steer_qpn_count);
 2771 err_counter:
 2772         for (i = 0; i < ibdev->num_ports; ++i)
 2773                 mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
 2774 
 2775 err_map:
 2776         iounmap(ibdev->uar_map);
 2777 
 2778 err_uar:
 2779         mlx4_uar_free(dev, &ibdev->priv_uar);
 2780 
 2781 err_pd:
 2782         mlx4_pd_free(dev, ibdev->priv_pdn);
 2783 
 2784 err_dealloc:
 2785         ib_dealloc_device(&ibdev->ib_dev);
 2786 
 2787         return NULL;
 2788 }
 2789 
 2790 int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
 2791 {
 2792         int offset;
 2793 
 2794         WARN_ON(!dev->ib_uc_qpns_bitmap);
 2795 
 2796         offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
 2797                                          dev->steer_qpn_count,
 2798                                          get_count_order(count));
 2799         if (offset < 0)
 2800                 return offset;
 2801 
 2802         *qpn = dev->steer_qpn_base + offset;
 2803         return 0;
 2804 }
 2805 
 2806 void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
 2807 {
 2808         if (!qpn ||
 2809             dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
 2810                 return;
 2811 
 2812         BUG_ON(qpn < dev->steer_qpn_base);
 2813 
 2814         bitmap_release_region(dev->ib_uc_qpns_bitmap,
 2815                               qpn - dev->steer_qpn_base,
 2816                               get_count_order(count));
 2817 }
 2818 
 2819 int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
 2820                          int is_attach)
 2821 {
 2822         int err;
 2823         size_t flow_size;
 2824         struct ib_flow_attr *flow = NULL;
 2825         struct ib_flow_spec_ib *ib_spec;
 2826 
 2827         if (is_attach) {
 2828                 flow_size = sizeof(struct ib_flow_attr) +
 2829                             sizeof(struct ib_flow_spec_ib);
 2830                 flow = kzalloc(flow_size, GFP_KERNEL);
 2831                 if (!flow)
 2832                         return -ENOMEM;
 2833                 flow->port = mqp->port;
 2834                 flow->num_of_specs = 1;
 2835                 flow->size = flow_size;
 2836                 ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
 2837                 ib_spec->type = IB_FLOW_SPEC_IB;
 2838                 ib_spec->size = sizeof(struct ib_flow_spec_ib);
 2839                 /* Add an empty rule for IB L2 */
 2840                 memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
 2841 
 2842                 err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
 2843                                             IB_FLOW_DOMAIN_NIC,
 2844                                             MLX4_FS_REGULAR,
 2845                                             &mqp->reg_id);
 2846         } else {
 2847                 err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
 2848         }
 2849         kfree(flow);
 2850         return err;
 2851 }
 2852 
 2853 static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
 2854 {
 2855         struct mlx4_ib_dev *ibdev = ibdev_ptr;
 2856         int p;
 2857 
 2858         ibdev->ib_active = false;
 2859         flush_workqueue(wq);
 2860 
 2861         mlx4_ib_close_sriov(ibdev);
 2862         mlx4_ib_mad_cleanup(ibdev);
 2863         ib_unregister_device(&ibdev->ib_dev);
 2864         mlx4_ib_diag_cleanup(ibdev);
 2865         if (ibdev->iboe.nb.notifier_call) {
 2866                 if (unregister_netdevice_notifier(&ibdev->iboe.nb))
 2867                         pr_warn("failure unregistering notifier\n");
 2868                 ibdev->iboe.nb.notifier_call = NULL;
 2869         }
 2870 
 2871         if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) {
 2872                 mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
 2873                                       ibdev->steer_qpn_count);
 2874                 kfree(ibdev->ib_uc_qpns_bitmap);
 2875         }
 2876 
 2877         iounmap(ibdev->uar_map);
 2878         for (p = 0; p < ibdev->num_ports; ++p)
 2879                 mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]);
 2880 
 2881         mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
 2882                 mlx4_CLOSE_PORT(dev, p);
 2883 
 2884         mlx4_ib_free_eqs(dev, ibdev);
 2885 
 2886         mlx4_uar_free(dev, &ibdev->priv_uar);
 2887         mlx4_pd_free(dev, ibdev->priv_pdn);
 2888         ib_dealloc_device(&ibdev->ib_dev);
 2889 }
 2890 
 2891 static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
 2892 {
 2893         struct mlx4_ib_demux_work **dm = NULL;
 2894         struct mlx4_dev *dev = ibdev->dev;
 2895         int i;
 2896         unsigned long flags;
 2897         struct mlx4_active_ports actv_ports;
 2898         unsigned int ports;
 2899         unsigned int first_port;
 2900 
 2901         if (!mlx4_is_master(dev))
 2902                 return;
 2903 
 2904         actv_ports = mlx4_get_active_ports(dev, slave);
 2905         ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
 2906         first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports);
 2907 
 2908         dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC);
 2909         if (!dm) {
 2910                 pr_err("failed to allocate memory for tunneling qp update\n");
 2911                 return;
 2912         }
 2913 
 2914         for (i = 0; i < ports; i++) {
 2915                 dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
 2916                 if (!dm[i]) {
 2917                         pr_err("failed to allocate memory for tunneling qp update work struct\n");
 2918                         while (--i >= 0)
 2919                                 kfree(dm[i]);
 2920                         goto out;
 2921                 }
 2922                 INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
 2923                 dm[i]->port = first_port + i + 1;
 2924                 dm[i]->slave = slave;
 2925                 dm[i]->do_init = do_init;
 2926                 dm[i]->dev = ibdev;
 2927         }
 2928         /* initialize or tear down tunnel QPs for the slave */
 2929         spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
 2930         if (!ibdev->sriov.is_going_down) {
 2931                 for (i = 0; i < ports; i++)
 2932                         queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
 2933                 spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
 2934         } else {
 2935                 spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
 2936                 for (i = 0; i < ports; i++)
 2937                         kfree(dm[i]);
 2938         }
 2939 out:
 2940         kfree(dm);
 2941         return;
 2942 }
 2943 
 2944 static void mlx4_ib_handle_catas_error(struct mlx4_ib_dev *ibdev)
 2945 {
 2946         struct mlx4_ib_qp *mqp;
 2947         unsigned long flags_qp;
 2948         unsigned long flags_cq;
 2949         struct mlx4_ib_cq *send_mcq, *recv_mcq;
 2950         struct list_head    cq_notify_list;
 2951         struct mlx4_cq *mcq;
 2952         unsigned long flags;
 2953 
 2954         pr_warn("mlx4_ib_handle_catas_error was started\n");
 2955         INIT_LIST_HEAD(&cq_notify_list);
 2956 
 2957         /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
 2958         spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
 2959 
 2960         list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
 2961                 spin_lock_irqsave(&mqp->sq.lock, flags_qp);
 2962                 if (mqp->sq.tail != mqp->sq.head) {
 2963                         send_mcq = to_mcq(mqp->ibqp.send_cq);
 2964                         spin_lock_irqsave(&send_mcq->lock, flags_cq);
 2965                         if (send_mcq->mcq.comp &&
 2966                             mqp->ibqp.send_cq->comp_handler) {
 2967                                 if (!send_mcq->mcq.reset_notify_added) {
 2968                                         send_mcq->mcq.reset_notify_added = 1;
 2969                                         list_add_tail(&send_mcq->mcq.reset_notify,
 2970                                                       &cq_notify_list);
 2971                                 }
 2972                         }
 2973                         spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
 2974                 }
 2975                 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
 2976                 /* Now, handle the QP's receive queue */
 2977                 spin_lock_irqsave(&mqp->rq.lock, flags_qp);
 2978                 /* no handling is needed for SRQ */
 2979                 if (!mqp->ibqp.srq) {
 2980                         if (mqp->rq.tail != mqp->rq.head) {
 2981                                 recv_mcq = to_mcq(mqp->ibqp.recv_cq);
 2982                                 spin_lock_irqsave(&recv_mcq->lock, flags_cq);
 2983                                 if (recv_mcq->mcq.comp &&
 2984                                     mqp->ibqp.recv_cq->comp_handler) {
 2985                                         if (!recv_mcq->mcq.reset_notify_added) {
 2986                                                 recv_mcq->mcq.reset_notify_added = 1;
 2987                                                 list_add_tail(&recv_mcq->mcq.reset_notify,
 2988                                                               &cq_notify_list);
 2989                                         }
 2990                                 }
 2991                                 spin_unlock_irqrestore(&recv_mcq->lock,
 2992                                                        flags_cq);
 2993                         }
 2994                 }
 2995                 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
 2996         }
 2997 
 2998         list_for_each_entry(mcq, &cq_notify_list, reset_notify) {
 2999                 mcq->comp(mcq);
 3000         }
 3001         spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
 3002         pr_warn("mlx4_ib_handle_catas_error ended\n");
 3003 }
 3004 
 3005 static void handle_bonded_port_state_event(struct work_struct *work)
 3006 {
 3007         struct ib_event_work *ew =
 3008                 container_of(work, struct ib_event_work, work);
 3009         struct mlx4_ib_dev *ibdev = ew->ib_dev;
 3010         enum ib_port_state bonded_port_state = IB_PORT_NOP;
 3011         int i;
 3012         struct ib_event ibev;
 3013 
 3014         kfree(ew);
 3015         spin_lock_bh(&ibdev->iboe.lock);
 3016         for (i = 0; i < MLX4_MAX_PORTS; ++i) {
 3017                 struct ifnet *curr_netdev = ibdev->iboe.netdevs[i];
 3018                 enum ib_port_state curr_port_state;
 3019 
 3020                 if (!curr_netdev)
 3021                         continue;
 3022 
 3023                 curr_port_state =
 3024                         ((curr_netdev->if_drv_flags & IFF_DRV_RUNNING) != 0 &&
 3025                          curr_netdev->if_link_state == LINK_STATE_UP) ?
 3026                         IB_PORT_ACTIVE : IB_PORT_DOWN;
 3027 
 3028                 bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ?
 3029                         curr_port_state : IB_PORT_ACTIVE;
 3030         }
 3031         spin_unlock_bh(&ibdev->iboe.lock);
 3032 
 3033         ibev.device = &ibdev->ib_dev;
 3034         ibev.element.port_num = 1;
 3035         ibev.event = (bonded_port_state == IB_PORT_ACTIVE) ?
 3036                 IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
 3037 
 3038         ib_dispatch_event(&ibev);
 3039 }
 3040 
 3041 void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port)
 3042 {
 3043         u64 sl2vl;
 3044         int err;
 3045 
 3046         err = mlx4_ib_query_sl2vl(&mdev->ib_dev, port, &sl2vl);
 3047         if (err) {
 3048                 pr_err("Unable to get current sl to vl mapping for port %d.  Using all zeroes (%d)\n",
 3049                        port, err);
 3050                 sl2vl = 0;
 3051         }
 3052         atomic64_set(&mdev->sl2vl[port - 1], sl2vl);
 3053 }
 3054 
 3055 static void ib_sl2vl_update_work(struct work_struct *work)
 3056 {
 3057         struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
 3058         struct mlx4_ib_dev *mdev = ew->ib_dev;
 3059         int port = ew->port;
 3060 
 3061         mlx4_ib_sl2vl_update(mdev, port);
 3062 
 3063         kfree(ew);
 3064 }
 3065 
 3066 void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
 3067                                      int port)
 3068 {
 3069         struct ib_event_work *ew;
 3070 
 3071         ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
 3072         if (ew) {
 3073                 INIT_WORK(&ew->work, ib_sl2vl_update_work);
 3074                 ew->port = port;
 3075                 ew->ib_dev = ibdev;
 3076                 queue_work(wq, &ew->work);
 3077         } else {
 3078                 pr_err("failed to allocate memory for sl2vl update work\n");
 3079         }
 3080 }
 3081 
 3082 static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
 3083                           enum mlx4_dev_event event, unsigned long param)
 3084 {
 3085         struct ib_event ibev;
 3086         struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
 3087         struct mlx4_eqe *eqe = NULL;
 3088         struct ib_event_work *ew;
 3089         int p = 0;
 3090 
 3091         if (mlx4_is_bonded(dev) &&
 3092             ((event == MLX4_DEV_EVENT_PORT_UP) ||
 3093             (event == MLX4_DEV_EVENT_PORT_DOWN))) {
 3094                 ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
 3095                 if (!ew)
 3096                         return;
 3097                 INIT_WORK(&ew->work, handle_bonded_port_state_event);
 3098                 ew->ib_dev = ibdev;
 3099                 queue_work(wq, &ew->work);
 3100                 return;
 3101         }
 3102 
 3103         if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
 3104                 eqe = (struct mlx4_eqe *)param;
 3105         else
 3106                 p = (int) param;
 3107 
 3108         switch (event) {
 3109         case MLX4_DEV_EVENT_PORT_UP:
 3110                 if (p > ibdev->num_ports)
 3111                         return;
 3112                 if (!mlx4_is_slave(dev) &&
 3113                     rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
 3114                         IB_LINK_LAYER_INFINIBAND) {
 3115                         if (mlx4_is_master(dev))
 3116                                 mlx4_ib_invalidate_all_guid_record(ibdev, p);
 3117                         if (ibdev->dev->flags & MLX4_FLAG_SECURE_HOST &&
 3118                             !(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT))
 3119                                 mlx4_sched_ib_sl2vl_update_work(ibdev, p);
 3120                 }
 3121                 ibev.event = IB_EVENT_PORT_ACTIVE;
 3122                 break;
 3123 
 3124         case MLX4_DEV_EVENT_PORT_DOWN:
 3125                 if (p > ibdev->num_ports)
 3126                         return;
 3127                 ibev.event = IB_EVENT_PORT_ERR;
 3128                 break;
 3129 
 3130         case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
 3131                 ibdev->ib_active = false;
 3132                 ibev.event = IB_EVENT_DEVICE_FATAL;
 3133                 mlx4_ib_handle_catas_error(ibdev);
 3134                 break;
 3135 
 3136         case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
 3137                 ew = kmalloc(sizeof *ew, GFP_ATOMIC);
 3138                 if (!ew) {
 3139                         pr_err("failed to allocate memory for events work\n");
 3140                         break;
 3141                 }
 3142 
 3143                 INIT_WORK(&ew->work, handle_port_mgmt_change_event);
 3144                 memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
 3145                 ew->ib_dev = ibdev;
 3146                 /* need to queue only for port owner, which uses GEN_EQE */
 3147                 if (mlx4_is_master(dev))
 3148                         queue_work(wq, &ew->work);
 3149                 else
 3150                         handle_port_mgmt_change_event(&ew->work);
 3151                 return;
 3152 
 3153         case MLX4_DEV_EVENT_SLAVE_INIT:
 3154                 /* here, p is the slave id */
 3155                 do_slave_init(ibdev, p, 1);
 3156                 if (mlx4_is_master(dev)) {
 3157                         int i;
 3158 
 3159                         for (i = 1; i <= ibdev->num_ports; i++) {
 3160                                 if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
 3161                                         == IB_LINK_LAYER_INFINIBAND)
 3162                                         mlx4_ib_slave_alias_guid_event(ibdev,
 3163                                                                        p, i,
 3164                                                                        1);
 3165                         }
 3166                 }
 3167                 return;
 3168 
 3169         case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
 3170                 if (mlx4_is_master(dev)) {
 3171                         int i;
 3172 
 3173                         for (i = 1; i <= ibdev->num_ports; i++) {
 3174                                 if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
 3175                                         == IB_LINK_LAYER_INFINIBAND)
 3176                                         mlx4_ib_slave_alias_guid_event(ibdev,
 3177                                                                        p, i,
 3178                                                                        0);
 3179                         }
 3180                 }
 3181                 /* here, p is the slave id */
 3182                 do_slave_init(ibdev, p, 0);
 3183                 return;
 3184 
 3185         default:
 3186                 return;
 3187         }
 3188 
 3189         ibev.device           = ibdev_ptr;
 3190         ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
 3191 
 3192         ib_dispatch_event(&ibev);
 3193 }
 3194 
 3195 static struct mlx4_interface mlx4_ib_interface = {
 3196         .add            = mlx4_ib_add,
 3197         .remove         = mlx4_ib_remove,
 3198         .event          = mlx4_ib_event,
 3199         .protocol       = MLX4_PROT_IB_IPV6,
 3200         .flags          = MLX4_INTFF_BONDING
 3201 };
 3202 
 3203 static int __init mlx4_ib_init(void)
 3204 {
 3205         int err;
 3206 
 3207         wq = alloc_ordered_workqueue("mlx4_ib", WQ_MEM_RECLAIM);
 3208         if (!wq)
 3209                 return -ENOMEM;
 3210 
 3211         err = mlx4_ib_mcg_init();
 3212         if (err)
 3213                 goto clean_wq;
 3214 
 3215         err = mlx4_register_interface(&mlx4_ib_interface);
 3216         if (err)
 3217                 goto clean_mcg;
 3218 
 3219         return 0;
 3220 
 3221 clean_mcg:
 3222         mlx4_ib_mcg_destroy();
 3223 
 3224 clean_wq:
 3225         destroy_workqueue(wq);
 3226         return err;
 3227 }
 3228 
 3229 static void __exit mlx4_ib_cleanup(void)
 3230 {
 3231         mlx4_unregister_interface(&mlx4_ib_interface);
 3232         mlx4_ib_mcg_destroy();
 3233         destroy_workqueue(wq);
 3234 }
 3235 
 3236 module_init_order(mlx4_ib_init, SI_ORDER_SEVENTH);
 3237 module_exit_order(mlx4_ib_cleanup, SI_ORDER_SEVENTH);
 3238 
 3239 static int
 3240 mlx4ib_evhand(module_t mod, int event, void *arg)
 3241 {
 3242         return (0);
 3243 }
 3244 
 3245 static moduledata_t mlx4ib_mod = {
 3246         .name = "mlx4ib",
 3247         .evhand = mlx4ib_evhand,
 3248 };
 3249 
 3250 DECLARE_MODULE(mlx4ib, mlx4ib_mod, SI_SUB_LAST, SI_ORDER_ANY);
 3251 MODULE_DEPEND(mlx4ib, mlx4, 1, 1, 1);
 3252 MODULE_DEPEND(mlx4ib, ibcore, 1, 1, 1);
 3253 MODULE_DEPEND(mlx4ib, linuxkpi, 1, 1, 1);

Cache object: 5c6ef3a2e417f522e0f51b48328aed9d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.