The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/mlx4/mlx4_ib/mlx4_ib_mad.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
    3  *
    4  * This software is available to you under a choice of one of two
    5  * licenses.  You may choose to be licensed under the terms of the GNU
    6  * General Public License (GPL) Version 2, available from the file
    7  * COPYING in the main directory of this source tree, or the
    8  * OpenIB.org BSD license below:
    9  *
   10  *     Redistribution and use in source and binary forms, with or
   11  *     without modification, are permitted provided that the following
   12  *     conditions are met:
   13  *
   14  *      - Redistributions of source code must retain the above
   15  *        copyright notice, this list of conditions and the following
   16  *        disclaimer.
   17  *
   18  *      - Redistributions in binary form must reproduce the above
   19  *        copyright notice, this list of conditions and the following
   20  *        disclaimer in the documentation and/or other materials
   21  *        provided with the distribution.
   22  *
   23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
   27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
   29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   30  * SOFTWARE.
   31  */
   32 
   33 #include <rdma/ib_mad.h>
   34 #include <rdma/ib_smi.h>
   35 #include <rdma/ib_sa.h>
   36 #include <rdma/ib_cache.h>
   37 
   38 #include <linux/random.h>
   39 #include <dev/mlx4/cmd.h>
   40 #include <dev/mlx4/driver.h>
   41 #include <linux/gfp.h>
   42 #include <rdma/ib_pma.h>
   43 
   44 #include "mlx4_ib.h"
   45 
   46 enum {
   47         MLX4_IB_VENDOR_CLASS1 = 0x9,
   48         MLX4_IB_VENDOR_CLASS2 = 0xa
   49 };
   50 
   51 #define MLX4_TUN_SEND_WRID_SHIFT 34
   52 #define MLX4_TUN_QPN_SHIFT 32
   53 #define MLX4_TUN_WRID_RECV (((u64) 1) << MLX4_TUN_SEND_WRID_SHIFT)
   54 #define MLX4_TUN_SET_WRID_QPN(a) (((u64) ((a) & 0x3)) << MLX4_TUN_QPN_SHIFT)
   55 
   56 #define MLX4_TUN_IS_RECV(a)  (((a) >>  MLX4_TUN_SEND_WRID_SHIFT) & 0x1)
   57 #define MLX4_TUN_WRID_QPN(a) (((a) >> MLX4_TUN_QPN_SHIFT) & 0x3)
   58 
   59  /* Port mgmt change event handling */
   60 
   61 #define GET_BLK_PTR_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.block_ptr)
   62 #define GET_MASK_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.tbl_entries_mask)
   63 #define NUM_IDX_IN_PKEY_TBL_BLK 32
   64 #define GUID_TBL_ENTRY_SIZE 8      /* size in bytes */
   65 #define GUID_TBL_BLK_NUM_ENTRIES 8
   66 #define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES)
   67 
   68 struct mlx4_mad_rcv_buf {
   69         struct ib_grh grh;
   70         u8 payload[256];
   71 } __packed;
   72 
   73 struct mlx4_mad_snd_buf {
   74         u8 payload[256];
   75 } __packed;
   76 
   77 struct mlx4_tunnel_mad {
   78         struct ib_grh grh;
   79         struct mlx4_ib_tunnel_header hdr;
   80         struct ib_mad mad;
   81 } __packed;
   82 
   83 struct mlx4_rcv_tunnel_mad {
   84         struct mlx4_rcv_tunnel_hdr hdr;
   85         struct ib_grh grh;
   86         struct ib_mad mad;
   87 } __packed;
   88 
   89 static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num);
   90 static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num);
   91 static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
   92                                 int block, u32 change_bitmap);
   93 
   94 __be64 mlx4_ib_gen_node_guid(void)
   95 {
   96 #define NODE_GUID_HI    ((u64) (((u64)IB_OPENIB_OUI) << 40))
   97         return cpu_to_be64(NODE_GUID_HI | random());
   98 }
   99 
  100 __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx)
  101 {
  102         return cpu_to_be64(atomic_inc_return(&ctx->tid)) |
  103                 cpu_to_be64(0xff00000000000000LL);
  104 }
  105 
  106 int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
  107                  int port, const struct ib_wc *in_wc,
  108                  const struct ib_grh *in_grh,
  109                  const void *in_mad, void *response_mad)
  110 {
  111         struct mlx4_cmd_mailbox *inmailbox, *outmailbox;
  112         void *inbox;
  113         int err;
  114         u32 in_modifier = port;
  115         u8 op_modifier = 0;
  116 
  117         inmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
  118         if (IS_ERR(inmailbox))
  119                 return PTR_ERR(inmailbox);
  120         inbox = inmailbox->buf;
  121 
  122         outmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
  123         if (IS_ERR(outmailbox)) {
  124                 mlx4_free_cmd_mailbox(dev->dev, inmailbox);
  125                 return PTR_ERR(outmailbox);
  126         }
  127 
  128         memcpy(inbox, in_mad, 256);
  129 
  130         /*
  131          * Key check traps can't be generated unless we have in_wc to
  132          * tell us where to send the trap.
  133          */
  134         if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_MKEY) || !in_wc)
  135                 op_modifier |= 0x1;
  136         if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_BKEY) || !in_wc)
  137                 op_modifier |= 0x2;
  138         if (mlx4_is_mfunc(dev->dev) &&
  139             (mad_ifc_flags & MLX4_MAD_IFC_NET_VIEW || in_wc))
  140                 op_modifier |= 0x8;
  141 
  142         if (in_wc) {
  143                 struct {
  144                         __be32          my_qpn;
  145                         u32             reserved1;
  146                         __be32          rqpn;
  147                         u8              sl;
  148                         u8              g_path;
  149                         u16             reserved2[2];
  150                         __be16          pkey;
  151                         u32             reserved3[11];
  152                         u8              grh[40];
  153                 } *ext_info;
  154 
  155                 memset(inbox + 256, 0, 256);
  156                 ext_info = inbox + 256;
  157 
  158                 ext_info->my_qpn = cpu_to_be32(in_wc->qp->qp_num);
  159                 ext_info->rqpn   = cpu_to_be32(in_wc->src_qp);
  160                 ext_info->sl     = in_wc->sl << 4;
  161                 ext_info->g_path = in_wc->dlid_path_bits |
  162                         (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
  163                 ext_info->pkey   = cpu_to_be16(in_wc->pkey_index);
  164 
  165                 if (in_grh)
  166                         memcpy(ext_info->grh, in_grh, 40);
  167 
  168                 op_modifier |= 0x4;
  169 
  170                 in_modifier |= in_wc->slid << 16;
  171         }
  172 
  173         err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier,
  174                            mlx4_is_master(dev->dev) ? (op_modifier & ~0x8) : op_modifier,
  175                            MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
  176                            (op_modifier & 0x8) ? MLX4_CMD_NATIVE : MLX4_CMD_WRAPPED);
  177 
  178         if (!err)
  179                 memcpy(response_mad, outmailbox->buf, 256);
  180 
  181         mlx4_free_cmd_mailbox(dev->dev, inmailbox);
  182         mlx4_free_cmd_mailbox(dev->dev, outmailbox);
  183 
  184         return err;
  185 }
  186 
  187 static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
  188 {
  189         struct ib_ah *new_ah;
  190         struct ib_ah_attr ah_attr;
  191         unsigned long flags;
  192 
  193         if (!dev->send_agent[port_num - 1][0])
  194                 return;
  195 
  196         memset(&ah_attr, 0, sizeof ah_attr);
  197         ah_attr.dlid     = lid;
  198         ah_attr.sl       = sl;
  199         ah_attr.port_num = port_num;
  200 
  201         new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
  202                               &ah_attr, 0);
  203         if (IS_ERR(new_ah))
  204                 return;
  205 
  206         spin_lock_irqsave(&dev->sm_lock, flags);
  207         if (dev->sm_ah[port_num - 1])
  208                 ib_destroy_ah(dev->sm_ah[port_num - 1], 0);
  209         dev->sm_ah[port_num - 1] = new_ah;
  210         spin_unlock_irqrestore(&dev->sm_lock, flags);
  211 }
  212 
  213 /*
  214  * Snoop SM MADs for port info, GUID info, and  P_Key table sets, so we can
  215  * synthesize LID change, Client-Rereg, GID change, and P_Key change events.
  216  */
  217 static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad *mad,
  218                       u16 prev_lid)
  219 {
  220         struct ib_port_info *pinfo;
  221         u16 lid;
  222         __be16 *base;
  223         u32 bn, pkey_change_bitmap;
  224         int i;
  225 
  226 
  227         struct mlx4_ib_dev *dev = to_mdev(ibdev);
  228         if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
  229              mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
  230             mad->mad_hdr.method == IB_MGMT_METHOD_SET)
  231                 switch (mad->mad_hdr.attr_id) {
  232                 case IB_SMP_ATTR_PORT_INFO:
  233                         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
  234                                 return;
  235                         pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data;
  236                         lid = be16_to_cpu(pinfo->lid);
  237 
  238                         update_sm_ah(dev, port_num,
  239                                      be16_to_cpu(pinfo->sm_lid),
  240                                      pinfo->neighbormtu_mastersmsl & 0xf);
  241 
  242                         if (pinfo->clientrereg_resv_subnetto & 0x80)
  243                                 handle_client_rereg_event(dev, port_num);
  244 
  245                         if (prev_lid != lid)
  246                                 handle_lid_change_event(dev, port_num);
  247                         break;
  248 
  249                 case IB_SMP_ATTR_PKEY_TABLE:
  250                         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
  251                                 return;
  252                         if (!mlx4_is_mfunc(dev->dev)) {
  253                                 mlx4_ib_dispatch_event(dev, port_num,
  254                                                        IB_EVENT_PKEY_CHANGE);
  255                                 break;
  256                         }
  257 
  258                         /* at this point, we are running in the master.
  259                          * Slaves do not receive SMPs.
  260                          */
  261                         bn  = be32_to_cpu(((struct ib_smp *)mad)->attr_mod) & 0xFFFF;
  262                         base = (__be16 *) &(((struct ib_smp *)mad)->data[0]);
  263                         pkey_change_bitmap = 0;
  264                         for (i = 0; i < 32; i++) {
  265                                 pr_debug("PKEY[%d] = x%x\n",
  266                                          i + bn*32, be16_to_cpu(base[i]));
  267                                 if (be16_to_cpu(base[i]) !=
  268                                     dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32]) {
  269                                         pkey_change_bitmap |= (1 << i);
  270                                         dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32] =
  271                                                 be16_to_cpu(base[i]);
  272                                 }
  273                         }
  274                         pr_debug("PKEY Change event: port=%d, "
  275                                  "block=0x%x, change_bitmap=0x%x\n",
  276                                  port_num, bn, pkey_change_bitmap);
  277 
  278                         if (pkey_change_bitmap) {
  279                                 mlx4_ib_dispatch_event(dev, port_num,
  280                                                        IB_EVENT_PKEY_CHANGE);
  281                                 if (!dev->sriov.is_going_down)
  282                                         __propagate_pkey_ev(dev, port_num, bn,
  283                                                             pkey_change_bitmap);
  284                         }
  285                         break;
  286 
  287                 case IB_SMP_ATTR_GUID_INFO:
  288                         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
  289                                 return;
  290                         /* paravirtualized master's guid is guid 0 -- does not change */
  291                         if (!mlx4_is_master(dev->dev))
  292                                 mlx4_ib_dispatch_event(dev, port_num,
  293                                                        IB_EVENT_GID_CHANGE);
  294                         /*if master, notify relevant slaves*/
  295                         if (mlx4_is_master(dev->dev) &&
  296                             !dev->sriov.is_going_down) {
  297                                 bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod);
  298                                 mlx4_ib_update_cache_on_guid_change(dev, bn, port_num,
  299                                                                     (u8 *)(&((struct ib_smp *)mad)->data));
  300                                 mlx4_ib_notify_slaves_on_guid_change(dev, bn, port_num,
  301                                                                      (u8 *)(&((struct ib_smp *)mad)->data));
  302                         }
  303                         break;
  304 
  305                 case IB_SMP_ATTR_SL_TO_VL_TABLE:
  306                         /* cache sl to vl mapping changes for use in
  307                          * filling QP1 LRH VL field when sending packets
  308                          */
  309                         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV &&
  310                             dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT)
  311                                 return;
  312                         if (!mlx4_is_slave(dev->dev)) {
  313                                 union sl2vl_tbl_to_u64 sl2vl64;
  314                                 int jj;
  315 
  316                                 for (jj = 0; jj < 8; jj++) {
  317                                         sl2vl64.sl8[jj] = ((struct ib_smp *)mad)->data[jj];
  318                                         pr_debug("sl2vl[%d] = %02x\n", jj, sl2vl64.sl8[jj]);
  319                                 }
  320                                 atomic64_set(&dev->sl2vl[port_num - 1], sl2vl64.sl64);
  321                         }
  322                         break;
  323 
  324                 default:
  325                         break;
  326                 }
  327 }
  328 
  329 static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
  330                                 int block, u32 change_bitmap)
  331 {
  332         int i, ix, slave, err;
  333         int have_event = 0;
  334 
  335         for (slave = 0; slave < dev->dev->caps.sqp_demux; slave++) {
  336                 if (slave == mlx4_master_func_num(dev->dev))
  337                         continue;
  338                 if (!mlx4_is_slave_active(dev->dev, slave))
  339                         continue;
  340 
  341                 have_event = 0;
  342                 for (i = 0; i < 32; i++) {
  343                         if (!(change_bitmap & (1 << i)))
  344                                 continue;
  345                         for (ix = 0;
  346                              ix < dev->dev->caps.pkey_table_len[port_num]; ix++) {
  347                                 if (dev->pkeys.virt2phys_pkey[slave][port_num - 1]
  348                                     [ix] == i + 32 * block) {
  349                                         err = mlx4_gen_pkey_eqe(dev->dev, slave, port_num);
  350                                         pr_debug("propagate_pkey_ev: slave %d,"
  351                                                  " port %d, ix %d (%d)\n",
  352                                                  slave, port_num, ix, err);
  353                                         have_event = 1;
  354                                         break;
  355                                 }
  356                         }
  357                         if (have_event)
  358                                 break;
  359                 }
  360         }
  361 }
  362 
  363 static void node_desc_override(struct ib_device *dev,
  364                                struct ib_mad *mad)
  365 {
  366         unsigned long flags;
  367 
  368         if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
  369              mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
  370             mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
  371             mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
  372                 spin_lock_irqsave(&to_mdev(dev)->sm_lock, flags);
  373                 memcpy(((struct ib_smp *) mad)->data, dev->node_desc,
  374                        IB_DEVICE_NODE_DESC_MAX);
  375                 spin_unlock_irqrestore(&to_mdev(dev)->sm_lock, flags);
  376         }
  377 }
  378 
  379 static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, const struct ib_mad *mad)
  380 {
  381         int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
  382         struct ib_mad_send_buf *send_buf;
  383         struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
  384         int ret;
  385         unsigned long flags;
  386 
  387         if (agent) {
  388                 send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
  389                                               IB_MGMT_MAD_DATA, GFP_ATOMIC,
  390                                               IB_MGMT_BASE_VERSION);
  391                 if (IS_ERR(send_buf))
  392                         return;
  393                 /*
  394                  * We rely here on the fact that MLX QPs don't use the
  395                  * address handle after the send is posted (this is
  396                  * wrong following the IB spec strictly, but we know
  397                  * it's OK for our devices).
  398                  */
  399                 spin_lock_irqsave(&dev->sm_lock, flags);
  400                 memcpy(send_buf->mad, mad, sizeof *mad);
  401                 if ((send_buf->ah = dev->sm_ah[port_num - 1]))
  402                         ret = ib_post_send_mad(send_buf, NULL);
  403                 else
  404                         ret = -EINVAL;
  405                 spin_unlock_irqrestore(&dev->sm_lock, flags);
  406 
  407                 if (ret)
  408                         ib_free_send_mad(send_buf);
  409         }
  410 }
  411 
  412 static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave,
  413                                                              struct ib_sa_mad *sa_mad)
  414 {
  415         int ret = 0;
  416 
  417         /* dispatch to different sa handlers */
  418         switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
  419         case IB_SA_ATTR_MC_MEMBER_REC:
  420                 ret = mlx4_ib_mcg_demux_handler(ibdev, port, slave, sa_mad);
  421                 break;
  422         default:
  423                 break;
  424         }
  425         return ret;
  426 }
  427 
  428 int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
  429 {
  430         struct mlx4_ib_dev *dev = to_mdev(ibdev);
  431         int i;
  432 
  433         for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
  434                 if (dev->sriov.demux[port - 1].guid_cache[i] == guid)
  435                         return i;
  436         }
  437         return -1;
  438 }
  439 
  440 
  441 static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave,
  442                                    u8 port, u16 pkey, u16 *ix)
  443 {
  444         int i, ret;
  445         u8 unassigned_pkey_ix, pkey_ix, partial_ix = 0xFF;
  446         u16 slot_pkey;
  447 
  448         if (slave == mlx4_master_func_num(dev->dev))
  449                 return ib_find_cached_pkey(&dev->ib_dev, port, pkey, ix);
  450 
  451         unassigned_pkey_ix = dev->dev->phys_caps.pkey_phys_table_len[port] - 1;
  452 
  453         for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) {
  454                 if (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == unassigned_pkey_ix)
  455                         continue;
  456 
  457                 pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][i];
  458 
  459                 ret = ib_get_cached_pkey(&dev->ib_dev, port, pkey_ix, &slot_pkey);
  460                 if (ret)
  461                         continue;
  462                 if ((slot_pkey & 0x7FFF) == (pkey & 0x7FFF)) {
  463                         if (slot_pkey & 0x8000) {
  464                                 *ix = (u16) pkey_ix;
  465                                 return 0;
  466                         } else {
  467                                 /* take first partial pkey index found */
  468                                 if (partial_ix == 0xFF)
  469                                         partial_ix = pkey_ix;
  470                         }
  471                 }
  472         }
  473 
  474         if (partial_ix < 0xFF) {
  475                 *ix = (u16) partial_ix;
  476                 return 0;
  477         }
  478 
  479         return -EINVAL;
  480 }
  481 
  482 int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
  483                           enum ib_qp_type dest_qpt, struct ib_wc *wc,
  484                           struct ib_grh *grh, struct ib_mad *mad)
  485 {
  486         struct ib_sge list;
  487         struct ib_ud_wr wr;
  488         const struct ib_send_wr *bad_wr;
  489         struct mlx4_ib_demux_pv_ctx *tun_ctx;
  490         struct mlx4_ib_demux_pv_qp *tun_qp;
  491         struct mlx4_rcv_tunnel_mad *tun_mad;
  492         struct ib_ah_attr attr;
  493         struct ib_ah *ah;
  494         struct ib_qp *src_qp = NULL;
  495         unsigned tun_tx_ix = 0;
  496         int dqpn;
  497         int ret = 0;
  498         u16 tun_pkey_ix;
  499         u16 cached_pkey;
  500         u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
  501 
  502         if (dest_qpt > IB_QPT_GSI)
  503                 return -EINVAL;
  504 
  505         tun_ctx = dev->sriov.demux[port-1].tun[slave];
  506 
  507         /* check if proxy qp created */
  508         if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
  509                 return -EAGAIN;
  510 
  511         if (!dest_qpt)
  512                 tun_qp = &tun_ctx->qp[0];
  513         else
  514                 tun_qp = &tun_ctx->qp[1];
  515 
  516         /* compute P_Key index to put in tunnel header for slave */
  517         if (dest_qpt) {
  518                 u16 pkey_ix;
  519                 ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey);
  520                 if (ret)
  521                         return -EINVAL;
  522 
  523                 ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix);
  524                 if (ret)
  525                         return -EINVAL;
  526                 tun_pkey_ix = pkey_ix;
  527         } else
  528                 tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
  529 
  530         dqpn = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave + port + (dest_qpt * 2) - 1;
  531 
  532         /* get tunnel tx data buf for slave */
  533         src_qp = tun_qp->qp;
  534 
  535         /* create ah. Just need an empty one with the port num for the post send.
  536          * The driver will set the force loopback bit in post_send */
  537         memset(&attr, 0, sizeof attr);
  538         attr.port_num = port;
  539         if (is_eth) {
  540                 memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16);
  541                 attr.ah_flags = IB_AH_GRH;
  542         }
  543         ah = ib_create_ah(tun_ctx->pd, &attr, 0);
  544         if (IS_ERR(ah))
  545                 return -ENOMEM;
  546 
  547         /* allocate tunnel tx buf after pass failure returns */
  548         spin_lock(&tun_qp->tx_lock);
  549         if (tun_qp->tx_ix_head - tun_qp->tx_ix_tail >=
  550             (MLX4_NUM_TUNNEL_BUFS - 1))
  551                 ret = -EAGAIN;
  552         else
  553                 tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
  554         spin_unlock(&tun_qp->tx_lock);
  555         if (ret)
  556                 goto end;
  557 
  558         tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
  559         if (tun_qp->tx_ring[tun_tx_ix].ah)
  560                 ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah, 0);
  561         tun_qp->tx_ring[tun_tx_ix].ah = ah;
  562         ib_dma_sync_single_for_cpu(&dev->ib_dev,
  563                                    tun_qp->tx_ring[tun_tx_ix].buf.map,
  564                                    sizeof (struct mlx4_rcv_tunnel_mad),
  565                                    DMA_TO_DEVICE);
  566 
  567         /* copy over to tunnel buffer */
  568         if (grh)
  569                 memcpy(&tun_mad->grh, grh, sizeof *grh);
  570         memcpy(&tun_mad->mad, mad, sizeof *mad);
  571 
  572         /* adjust tunnel data */
  573         tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
  574         tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
  575         tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
  576 
  577         if (is_eth) {
  578                 u16 vlan = 0;
  579                 if (mlx4_get_slave_default_vlan(dev->dev, port, slave, &vlan,
  580                                                 NULL)) {
  581                         /* VST mode */
  582                         if (vlan != wc->vlan_id) {
  583                                 /* Packet vlan is not the VST-assigned vlan.
  584                                  * Drop the packet.
  585                                  */
  586                                 ret = -EPERM;
  587                                 goto out;
  588                         } else {
  589                                 /* Remove the vlan tag before forwarding
  590                                  * the packet to the VF.
  591                                  */
  592                                 vlan = 0xffff;
  593                         }
  594                 } else {
  595                         vlan = wc->vlan_id;
  596                 }
  597 
  598                 tun_mad->hdr.sl_vid = cpu_to_be16(vlan);
  599                 memcpy((char *)&tun_mad->hdr.mac_31_0, &(wc->smac[0]), 4);
  600                 memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2);
  601         } else {
  602                 tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
  603                 tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
  604         }
  605 
  606         ib_dma_sync_single_for_device(&dev->ib_dev,
  607                                       tun_qp->tx_ring[tun_tx_ix].buf.map,
  608                                       sizeof (struct mlx4_rcv_tunnel_mad),
  609                                       DMA_TO_DEVICE);
  610 
  611         list.addr = tun_qp->tx_ring[tun_tx_ix].buf.map;
  612         list.length = sizeof (struct mlx4_rcv_tunnel_mad);
  613         list.lkey = tun_ctx->pd->local_dma_lkey;
  614 
  615         wr.ah = ah;
  616         wr.port_num = port;
  617         wr.remote_qkey = IB_QP_SET_QKEY;
  618         wr.remote_qpn = dqpn;
  619         wr.wr.next = NULL;
  620         wr.wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
  621         wr.wr.sg_list = &list;
  622         wr.wr.num_sge = 1;
  623         wr.wr.opcode = IB_WR_SEND;
  624         wr.wr.send_flags = IB_SEND_SIGNALED;
  625 
  626         ret = ib_post_send(src_qp, &wr.wr, &bad_wr);
  627         if (!ret)
  628                 return 0;
  629  out:
  630         spin_lock(&tun_qp->tx_lock);
  631         tun_qp->tx_ix_tail++;
  632         spin_unlock(&tun_qp->tx_lock);
  633         tun_qp->tx_ring[tun_tx_ix].ah = NULL;
  634 end:
  635         ib_destroy_ah(ah, 0);
  636         return ret;
  637 }
  638 
  639 static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
  640                         struct ib_wc *wc, struct ib_grh *grh,
  641                         struct ib_mad *mad)
  642 {
  643         struct mlx4_ib_dev *dev = to_mdev(ibdev);
  644         int err, other_port;
  645         int slave = -1;
  646         u8 *slave_id;
  647         int is_eth = 0;
  648 
  649         if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
  650                 is_eth = 0;
  651         else
  652                 is_eth = 1;
  653 
  654         if (is_eth) {
  655                 if (!(wc->wc_flags & IB_WC_GRH)) {
  656                         mlx4_ib_warn(ibdev, "RoCE grh not present.\n");
  657                         return -EINVAL;
  658                 }
  659                 if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) {
  660                         mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
  661                         return -EINVAL;
  662                 }
  663                 err = mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave);
  664                 if (err && mlx4_is_mf_bonded(dev->dev)) {
  665                         other_port = (port == 1) ? 2 : 1;
  666                         err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, grh->dgid.raw, &slave);
  667                         if (!err) {
  668                                 port = other_port;
  669                                 pr_debug("resolved slave %d from gid %pI6 wire port %d other %d\n",
  670                                          slave, grh->dgid.raw, port, other_port);
  671                         }
  672                 }
  673                 if (err) {
  674                         mlx4_ib_warn(ibdev, "failed matching grh\n");
  675                         return -ENOENT;
  676                 }
  677                 if (slave >= dev->dev->caps.sqp_demux) {
  678                         mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
  679                                      slave, dev->dev->caps.sqp_demux);
  680                         return -ENOENT;
  681                 }
  682 
  683                 if (mlx4_ib_demux_cm_handler(ibdev, port, NULL, mad))
  684                         return 0;
  685 
  686                 err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
  687                 if (err)
  688                         pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
  689                                  slave, err);
  690                 return 0;
  691         }
  692 
  693         /* Initially assume that this mad is for us */
  694         slave = mlx4_master_func_num(dev->dev);
  695 
  696         /* See if the slave id is encoded in a response mad */
  697         if (mad->mad_hdr.method & 0x80) {
  698                 slave_id = (u8 *) &mad->mad_hdr.tid;
  699                 slave = *slave_id;
  700                 if (slave != 255) /*255 indicates the dom0*/
  701                         *slave_id = 0; /* remap tid */
  702         }
  703 
  704         /* If a grh is present, we demux according to it */
  705         if (wc->wc_flags & IB_WC_GRH) {
  706                 slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id);
  707                 if (slave < 0) {
  708                         mlx4_ib_warn(ibdev, "failed matching grh\n");
  709                         return -ENOENT;
  710                 }
  711         }
  712         /* Class-specific handling */
  713         switch (mad->mad_hdr.mgmt_class) {
  714         case IB_MGMT_CLASS_SUBN_LID_ROUTED:
  715         case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
  716                 /* 255 indicates the dom0 */
  717                 if (slave != 255 && slave != mlx4_master_func_num(dev->dev)) {
  718                         if (!mlx4_vf_smi_enabled(dev->dev, slave, port))
  719                                 return -EPERM;
  720                         /* for a VF. drop unsolicited MADs */
  721                         if (!(mad->mad_hdr.method & IB_MGMT_METHOD_RESP)) {
  722                                 mlx4_ib_warn(ibdev, "demux QP0. rejecting unsolicited mad for slave %d class 0x%x, method 0x%x\n",
  723                                              slave, mad->mad_hdr.mgmt_class,
  724                                              mad->mad_hdr.method);
  725                                 return -EINVAL;
  726                         }
  727                 }
  728                 break;
  729         case IB_MGMT_CLASS_SUBN_ADM:
  730                 if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
  731                                              (struct ib_sa_mad *) mad))
  732                         return 0;
  733                 break;
  734         case IB_MGMT_CLASS_CM:
  735                 if (mlx4_ib_demux_cm_handler(ibdev, port, &slave, mad))
  736                         return 0;
  737                 break;
  738         case IB_MGMT_CLASS_DEVICE_MGMT:
  739                 if (mad->mad_hdr.method != IB_MGMT_METHOD_GET_RESP)
  740                         return 0;
  741                 break;
  742         default:
  743                 /* Drop unsupported classes for slaves in tunnel mode */
  744                 if (slave != mlx4_master_func_num(dev->dev)) {
  745                         pr_debug("dropping unsupported ingress mad from class:%d "
  746                                  "for slave:%d\n", mad->mad_hdr.mgmt_class, slave);
  747                         return 0;
  748                 }
  749         }
  750         /*make sure that no slave==255 was not handled yet.*/
  751         if (slave >= dev->dev->caps.sqp_demux) {
  752                 mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
  753                              slave, dev->dev->caps.sqp_demux);
  754                 return -ENOENT;
  755         }
  756 
  757         err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
  758         if (err)
  759                 pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
  760                          slave, err);
  761         return 0;
  762 }
  763 
  764 static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
  765                         const struct ib_wc *in_wc, const struct ib_grh *in_grh,
  766                         const struct ib_mad *in_mad, struct ib_mad *out_mad)
  767 {
  768         u16 slid, prev_lid = 0;
  769         int err;
  770         struct ib_port_attr pattr;
  771 
  772         if (in_wc && in_wc->qp->qp_num) {
  773                 pr_debug("received MAD: slid:%d sqpn:%d "
  774                         "dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n",
  775                         in_wc->slid, in_wc->src_qp,
  776                         in_wc->dlid_path_bits,
  777                         in_wc->qp->qp_num,
  778                         in_wc->wc_flags,
  779                         in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
  780                         be16_to_cpu(in_mad->mad_hdr.attr_id));
  781                 if (in_wc->wc_flags & IB_WC_GRH) {
  782                         pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
  783                                  (unsigned long long)be64_to_cpu(in_grh->sgid.global.subnet_prefix),
  784                                  (unsigned long long)be64_to_cpu(in_grh->sgid.global.interface_id));
  785                         pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n",
  786                                  (unsigned long long)be64_to_cpu(in_grh->dgid.global.subnet_prefix),
  787                                  (unsigned long long)be64_to_cpu(in_grh->dgid.global.interface_id));
  788                 }
  789         }
  790 
  791         slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
  792 
  793         if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
  794                 forward_trap(to_mdev(ibdev), port_num, in_mad);
  795                 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
  796         }
  797 
  798         if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
  799             in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
  800                 if (in_mad->mad_hdr.method   != IB_MGMT_METHOD_GET &&
  801                     in_mad->mad_hdr.method   != IB_MGMT_METHOD_SET &&
  802                     in_mad->mad_hdr.method   != IB_MGMT_METHOD_TRAP_REPRESS)
  803                         return IB_MAD_RESULT_SUCCESS;
  804 
  805                 /*
  806                  * Don't process SMInfo queries -- the SMA can't handle them.
  807                  */
  808                 if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
  809                         return IB_MAD_RESULT_SUCCESS;
  810         } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
  811                    in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1   ||
  812                    in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2   ||
  813                    in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) {
  814                 if (in_mad->mad_hdr.method  != IB_MGMT_METHOD_GET &&
  815                     in_mad->mad_hdr.method  != IB_MGMT_METHOD_SET)
  816                         return IB_MAD_RESULT_SUCCESS;
  817         } else
  818                 return IB_MAD_RESULT_SUCCESS;
  819 
  820         if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
  821              in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
  822             in_mad->mad_hdr.method == IB_MGMT_METHOD_SET &&
  823             in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
  824             !ib_query_port(ibdev, port_num, &pattr))
  825                 prev_lid = pattr.lid;
  826 
  827         err = mlx4_MAD_IFC(to_mdev(ibdev),
  828                            (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) |
  829                            (mad_flags & IB_MAD_IGNORE_BKEY ? MLX4_MAD_IFC_IGNORE_BKEY : 0) |
  830                            MLX4_MAD_IFC_NET_VIEW,
  831                            port_num, in_wc, in_grh, in_mad, out_mad);
  832         if (err)
  833                 return IB_MAD_RESULT_FAILURE;
  834 
  835         if (!out_mad->mad_hdr.status) {
  836                 smp_snoop(ibdev, port_num, in_mad, prev_lid);
  837                 /* slaves get node desc from FW */
  838                 if (!mlx4_is_slave(to_mdev(ibdev)->dev))
  839                         node_desc_override(ibdev, out_mad);
  840         }
  841 
  842         /* set return bit in status of directed route responses */
  843         if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
  844                 out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
  845 
  846         if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
  847                 /* no response for trap repress */
  848                 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
  849 
  850         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
  851 }
  852 
  853 static void edit_counter(struct mlx4_counter *cnt, void *counters,
  854                          __be16 attr_id)
  855 {
  856         switch (attr_id) {
  857         case IB_PMA_PORT_COUNTERS:
  858         {
  859                 struct ib_pma_portcounters *pma_cnt =
  860                         (struct ib_pma_portcounters *)counters;
  861 
  862                 ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_data,
  863                                      (be64_to_cpu(cnt->tx_bytes) >> 2));
  864                 ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_data,
  865                                      (be64_to_cpu(cnt->rx_bytes) >> 2));
  866                 ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_packets,
  867                                      be64_to_cpu(cnt->tx_frames));
  868                 ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_packets,
  869                                      be64_to_cpu(cnt->rx_frames));
  870                 break;
  871         }
  872         case IB_PMA_PORT_COUNTERS_EXT:
  873         {
  874                 struct ib_pma_portcounters_ext *pma_cnt_ext =
  875                         (struct ib_pma_portcounters_ext *)counters;
  876 
  877                 pma_cnt_ext->port_xmit_data =
  878                         cpu_to_be64(be64_to_cpu(cnt->tx_bytes) >> 2);
  879                 pma_cnt_ext->port_rcv_data =
  880                         cpu_to_be64(be64_to_cpu(cnt->rx_bytes) >> 2);
  881                 pma_cnt_ext->port_xmit_packets = cnt->tx_frames;
  882                 pma_cnt_ext->port_rcv_packets = cnt->rx_frames;
  883                 break;
  884         }
  885         default:
  886                 break;
  887         }
  888 }
  889 
  890 static int iboe_process_mad_port_info(void *out_mad)
  891 {
  892         struct ib_class_port_info cpi = {};
  893 
  894         cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
  895         memcpy(out_mad, &cpi, sizeof(cpi));
  896         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
  897 }
  898 
  899 static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
  900                         const struct ib_wc *in_wc, const struct ib_grh *in_grh,
  901                         const struct ib_mad *in_mad, struct ib_mad *out_mad)
  902 {
  903         struct mlx4_counter counter_stats;
  904         struct mlx4_ib_dev *dev = to_mdev(ibdev);
  905         struct counter_index *tmp_counter;
  906         int err = IB_MAD_RESULT_FAILURE, stats_avail = 0;
  907 
  908         if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
  909                 return -EINVAL;
  910 
  911         if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)
  912                 return iboe_process_mad_port_info((void *)(out_mad->data + 40));
  913 
  914         memset(&counter_stats, 0, sizeof(counter_stats));
  915         mutex_lock(&dev->counters_table[port_num - 1].mutex);
  916         list_for_each_entry(tmp_counter,
  917                             &dev->counters_table[port_num - 1].counters_list,
  918                             list) {
  919                 err = mlx4_get_counter_stats(dev->dev,
  920                                              tmp_counter->index,
  921                                              &counter_stats, 0);
  922                 if (err) {
  923                         err = IB_MAD_RESULT_FAILURE;
  924                         stats_avail = 0;
  925                         break;
  926                 }
  927                 stats_avail = 1;
  928         }
  929         mutex_unlock(&dev->counters_table[port_num - 1].mutex);
  930         if (stats_avail) {
  931                 memset(out_mad->data, 0, sizeof out_mad->data);
  932                 switch (counter_stats.counter_mode & 0xf) {
  933                 case 0:
  934                         edit_counter(&counter_stats,
  935                                      (void *)(out_mad->data + 40),
  936                                      in_mad->mad_hdr.attr_id);
  937                         err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
  938                         break;
  939                 default:
  940                         err = IB_MAD_RESULT_FAILURE;
  941                 }
  942         }
  943 
  944         return err;
  945 }
  946 
  947 int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
  948                         const struct ib_wc *in_wc, const struct ib_grh *in_grh,
  949                         const struct ib_mad_hdr *in, size_t in_mad_size,
  950                         struct ib_mad_hdr *out, size_t *out_mad_size,
  951                         u16 *out_mad_pkey_index)
  952 {
  953         struct mlx4_ib_dev *dev = to_mdev(ibdev);
  954         const struct ib_mad *in_mad = (const struct ib_mad *)in;
  955         struct ib_mad *out_mad = (struct ib_mad *)out;
  956         enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num);
  957 
  958         if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
  959                          *out_mad_size != sizeof(*out_mad)))
  960                 return IB_MAD_RESULT_FAILURE;
  961 
  962         /* iboe_process_mad() which uses the HCA flow-counters to implement IB PMA
  963          * queries, should be called only by VFs and for that specific purpose
  964          */
  965         if (link == IB_LINK_LAYER_INFINIBAND) {
  966                 if (mlx4_is_slave(dev->dev) &&
  967                     (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
  968                      (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS ||
  969                       in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT ||
  970                       in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)))
  971                         return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
  972                                                 in_grh, in_mad, out_mad);
  973 
  974                 return ib_process_mad(ibdev, mad_flags, port_num, in_wc,
  975                                       in_grh, in_mad, out_mad);
  976         }
  977 
  978         if (link == IB_LINK_LAYER_ETHERNET)
  979                 return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
  980                                         in_grh, in_mad, out_mad);
  981 
  982         return -EINVAL;
  983 }
  984 
  985 static void send_handler(struct ib_mad_agent *agent,
  986                          struct ib_mad_send_wc *mad_send_wc)
  987 {
  988         if (mad_send_wc->send_buf->context[0])
  989                 ib_destroy_ah(mad_send_wc->send_buf->context[0], 0);
  990         ib_free_send_mad(mad_send_wc->send_buf);
  991 }
  992 
  993 int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
  994 {
  995         struct ib_mad_agent *agent;
  996         int p, q;
  997         int ret;
  998         enum rdma_link_layer ll;
  999 
 1000         for (p = 0; p < dev->num_ports; ++p) {
 1001                 ll = rdma_port_get_link_layer(&dev->ib_dev, p + 1);
 1002                 for (q = 0; q <= 1; ++q) {
 1003                         if (ll == IB_LINK_LAYER_INFINIBAND) {
 1004                                 agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
 1005                                                               q ? IB_QPT_GSI : IB_QPT_SMI,
 1006                                                               NULL, 0, send_handler,
 1007                                                               NULL, NULL, 0);
 1008                                 if (IS_ERR(agent)) {
 1009                                         ret = PTR_ERR(agent);
 1010                                         goto err;
 1011                                 }
 1012                                 dev->send_agent[p][q] = agent;
 1013                         } else
 1014                                 dev->send_agent[p][q] = NULL;
 1015                 }
 1016         }
 1017 
 1018         return 0;
 1019 
 1020 err:
 1021         for (p = 0; p < dev->num_ports; ++p)
 1022                 for (q = 0; q <= 1; ++q)
 1023                         if (dev->send_agent[p][q])
 1024                                 ib_unregister_mad_agent(dev->send_agent[p][q]);
 1025 
 1026         return ret;
 1027 }
 1028 
 1029 void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
 1030 {
 1031         struct ib_mad_agent *agent;
 1032         int p, q;
 1033 
 1034         for (p = 0; p < dev->num_ports; ++p) {
 1035                 for (q = 0; q <= 1; ++q) {
 1036                         agent = dev->send_agent[p][q];
 1037                         if (agent) {
 1038                                 dev->send_agent[p][q] = NULL;
 1039                                 ib_unregister_mad_agent(agent);
 1040                         }
 1041                 }
 1042 
 1043                 if (dev->sm_ah[p])
 1044                         ib_destroy_ah(dev->sm_ah[p], 0);
 1045         }
 1046 }
 1047 
 1048 static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num)
 1049 {
 1050         mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_LID_CHANGE);
 1051 
 1052         if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
 1053                 mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
 1054                                             MLX4_EQ_PORT_INFO_LID_CHANGE_MASK);
 1055 }
 1056 
 1057 static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num)
 1058 {
 1059         /* re-configure the alias-guid and mcg's */
 1060         if (mlx4_is_master(dev->dev)) {
 1061                 mlx4_ib_invalidate_all_guid_record(dev, port_num);
 1062 
 1063                 if (!dev->sriov.is_going_down) {
 1064                         mlx4_ib_mcg_port_cleanup(&dev->sriov.demux[port_num - 1], 0);
 1065                         mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
 1066                                                     MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK);
 1067                 }
 1068         }
 1069 
 1070         /* Update the sl to vl table from inside client rereg
 1071          * only if in secure-host mode (snooping is not possible)
 1072          * and the sl-to-vl change event is not generated by FW.
 1073          */
 1074         if (!mlx4_is_slave(dev->dev) &&
 1075             dev->dev->flags & MLX4_FLAG_SECURE_HOST &&
 1076             !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT)) {
 1077                 if (mlx4_is_master(dev->dev))
 1078                         /* already in work queue from mlx4_ib_event queueing
 1079                          * mlx4_handle_port_mgmt_change_event, which calls
 1080                          * this procedure. Therefore, call sl2vl_update directly.
 1081                          */
 1082                         mlx4_ib_sl2vl_update(dev, port_num);
 1083                 else
 1084                         mlx4_sched_ib_sl2vl_update_work(dev, port_num);
 1085         }
 1086         mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER);
 1087 }
 1088 
 1089 static void propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
 1090                               struct mlx4_eqe *eqe)
 1091 {
 1092         __propagate_pkey_ev(dev, port_num, GET_BLK_PTR_FROM_EQE(eqe),
 1093                             GET_MASK_FROM_EQE(eqe));
 1094 }
 1095 
 1096 static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u8 port_num,
 1097                                       u32 guid_tbl_blk_num, u32 change_bitmap)
 1098 {
 1099         struct ib_smp *in_mad  = NULL;
 1100         struct ib_smp *out_mad  = NULL;
 1101         u16 i;
 1102 
 1103         if (!mlx4_is_mfunc(dev->dev) || !mlx4_is_master(dev->dev))
 1104                 return;
 1105 
 1106         in_mad  = kmalloc(sizeof *in_mad, GFP_KERNEL);
 1107         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 1108         if (!in_mad || !out_mad) {
 1109                 mlx4_ib_warn(&dev->ib_dev, "failed to allocate memory for guid info mads\n");
 1110                 goto out;
 1111         }
 1112 
 1113         guid_tbl_blk_num  *= 4;
 1114 
 1115         for (i = 0; i < 4; i++) {
 1116                 if (change_bitmap && (!((change_bitmap >> (8 * i)) & 0xff)))
 1117                         continue;
 1118                 memset(in_mad, 0, sizeof *in_mad);
 1119                 memset(out_mad, 0, sizeof *out_mad);
 1120 
 1121                 in_mad->base_version  = 1;
 1122                 in_mad->mgmt_class    = IB_MGMT_CLASS_SUBN_LID_ROUTED;
 1123                 in_mad->class_version = 1;
 1124                 in_mad->method        = IB_MGMT_METHOD_GET;
 1125                 in_mad->attr_id       = IB_SMP_ATTR_GUID_INFO;
 1126                 in_mad->attr_mod      = cpu_to_be32(guid_tbl_blk_num + i);
 1127 
 1128                 if (mlx4_MAD_IFC(dev,
 1129                                  MLX4_MAD_IFC_IGNORE_KEYS | MLX4_MAD_IFC_NET_VIEW,
 1130                                  port_num, NULL, NULL, in_mad, out_mad)) {
 1131                         mlx4_ib_warn(&dev->ib_dev, "Failed in get GUID INFO MAD_IFC\n");
 1132                         goto out;
 1133                 }
 1134 
 1135                 mlx4_ib_update_cache_on_guid_change(dev, guid_tbl_blk_num + i,
 1136                                                     port_num,
 1137                                                     (u8 *)(&((struct ib_smp *)out_mad)->data));
 1138                 mlx4_ib_notify_slaves_on_guid_change(dev, guid_tbl_blk_num + i,
 1139                                                      port_num,
 1140                                                      (u8 *)(&((struct ib_smp *)out_mad)->data));
 1141         }
 1142 
 1143 out:
 1144         kfree(in_mad);
 1145         kfree(out_mad);
 1146         return;
 1147 }
 1148 
 1149 void handle_port_mgmt_change_event(struct work_struct *work)
 1150 {
 1151         struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
 1152         struct mlx4_ib_dev *dev = ew->ib_dev;
 1153         struct mlx4_eqe *eqe = &(ew->ib_eqe);
 1154         u8 port = eqe->event.port_mgmt_change.port;
 1155         u32 changed_attr;
 1156         u32 tbl_block;
 1157         u32 change_bitmap;
 1158 
 1159         switch (eqe->subtype) {
 1160         case MLX4_DEV_PMC_SUBTYPE_PORT_INFO:
 1161                 changed_attr = be32_to_cpu(eqe->event.port_mgmt_change.params.port_info.changed_attr);
 1162 
 1163                 /* Update the SM ah - This should be done before handling
 1164                    the other changed attributes so that MADs can be sent to the SM */
 1165                 if (changed_attr & MSTR_SM_CHANGE_MASK) {
 1166                         u16 lid = be16_to_cpu(eqe->event.port_mgmt_change.params.port_info.mstr_sm_lid);
 1167                         u8 sl = eqe->event.port_mgmt_change.params.port_info.mstr_sm_sl & 0xf;
 1168                         update_sm_ah(dev, port, lid, sl);
 1169                 }
 1170 
 1171                 /* Check if it is a lid change event */
 1172                 if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK)
 1173                         handle_lid_change_event(dev, port);
 1174 
 1175                 /* Generate GUID changed event */
 1176                 if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
 1177                         if (mlx4_is_master(dev->dev)) {
 1178                                 union ib_gid gid;
 1179                                 int err = 0;
 1180 
 1181                                 if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix)
 1182                                         err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1);
 1183                                 else
 1184                                         gid.global.subnet_prefix =
 1185                                                 eqe->event.port_mgmt_change.params.port_info.gid_prefix;
 1186                                 if (err) {
 1187                                         pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n",
 1188                                                 port, err);
 1189                                 } else {
 1190                                         pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n",
 1191                                                  port,
 1192                                                  (long long)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix),
 1193                                                  (long long)be64_to_cpu(gid.global.subnet_prefix));
 1194                                         atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix,
 1195                                                      be64_to_cpu(gid.global.subnet_prefix));
 1196                                 }
 1197                         }
 1198                         mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
 1199                         /*if master, notify all slaves*/
 1200                         if (mlx4_is_master(dev->dev))
 1201                                 mlx4_gen_slaves_port_mgt_ev(dev->dev, port,
 1202                                                             MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK);
 1203                 }
 1204 
 1205                 if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK)
 1206                         handle_client_rereg_event(dev, port);
 1207                 break;
 1208 
 1209         case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE:
 1210                 mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE);
 1211                 if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
 1212                         propagate_pkey_ev(dev, port, eqe);
 1213                 break;
 1214         case MLX4_DEV_PMC_SUBTYPE_GUID_INFO:
 1215                 /* paravirtualized master's guid is guid 0 -- does not change */
 1216                 if (!mlx4_is_master(dev->dev))
 1217                         mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
 1218                 /*if master, notify relevant slaves*/
 1219                 else if (!dev->sriov.is_going_down) {
 1220                         tbl_block = GET_BLK_PTR_FROM_EQE(eqe);
 1221                         change_bitmap = GET_MASK_FROM_EQE(eqe);
 1222                         handle_slaves_guid_change(dev, port, tbl_block, change_bitmap);
 1223                 }
 1224                 break;
 1225 
 1226         case MLX4_DEV_PMC_SUBTYPE_SL_TO_VL_MAP:
 1227                 /* cache sl to vl mapping changes for use in
 1228                  * filling QP1 LRH VL field when sending packets
 1229                  */
 1230                 if (!mlx4_is_slave(dev->dev)) {
 1231                         union sl2vl_tbl_to_u64 sl2vl64;
 1232                         int jj;
 1233 
 1234                         for (jj = 0; jj < 8; jj++) {
 1235                                 sl2vl64.sl8[jj] =
 1236                                         eqe->event.port_mgmt_change.params.sl2vl_tbl_change_info.sl2vl_table[jj];
 1237                                 pr_debug("sl2vl[%d] = %02x\n", jj, sl2vl64.sl8[jj]);
 1238                         }
 1239                         atomic64_set(&dev->sl2vl[port - 1], sl2vl64.sl64);
 1240                 }
 1241                 break;
 1242         default:
 1243                 pr_warn("Unsupported subtype 0x%x for "
 1244                         "Port Management Change event\n", eqe->subtype);
 1245         }
 1246 
 1247         kfree(ew);
 1248 }
 1249 
 1250 void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
 1251                             enum ib_event_type type)
 1252 {
 1253         struct ib_event event;
 1254 
 1255         event.device            = &dev->ib_dev;
 1256         event.element.port_num  = port_num;
 1257         event.event             = type;
 1258 
 1259         ib_dispatch_event(&event);
 1260 }
 1261 
 1262 static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg)
 1263 {
 1264         unsigned long flags;
 1265         struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context;
 1266         struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
 1267         spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
 1268         if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE)
 1269                 queue_work(ctx->wq, &ctx->work);
 1270         spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
 1271 }
 1272 
 1273 static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
 1274                                   struct mlx4_ib_demux_pv_qp *tun_qp,
 1275                                   int index)
 1276 {
 1277         struct ib_sge sg_list;
 1278         struct ib_recv_wr recv_wr;
 1279         const struct ib_recv_wr *bad_recv_wr;
 1280         int size;
 1281 
 1282         size = (tun_qp->qp->qp_type == IB_QPT_UD) ?
 1283                 sizeof (struct mlx4_tunnel_mad) : sizeof (struct mlx4_mad_rcv_buf);
 1284 
 1285         sg_list.addr = tun_qp->ring[index].map;
 1286         sg_list.length = size;
 1287         sg_list.lkey = ctx->pd->local_dma_lkey;
 1288 
 1289         recv_wr.next = NULL;
 1290         recv_wr.sg_list = &sg_list;
 1291         recv_wr.num_sge = 1;
 1292         recv_wr.wr_id = (u64) index | MLX4_TUN_WRID_RECV |
 1293                 MLX4_TUN_SET_WRID_QPN(tun_qp->proxy_qpt);
 1294         ib_dma_sync_single_for_device(ctx->ib_dev, tun_qp->ring[index].map,
 1295                                       size, DMA_FROM_DEVICE);
 1296         return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr);
 1297 }
 1298 
 1299 static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
 1300                 int slave, struct ib_sa_mad *sa_mad)
 1301 {
 1302         int ret = 0;
 1303 
 1304         /* dispatch to different sa handlers */
 1305         switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
 1306         case IB_SA_ATTR_MC_MEMBER_REC:
 1307                 ret = mlx4_ib_mcg_multiplex_handler(ibdev, port, slave, sa_mad);
 1308                 break;
 1309         default:
 1310                 break;
 1311         }
 1312         return ret;
 1313 }
 1314 
 1315 static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
 1316 {
 1317         int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
 1318 
 1319         return (qpn >= proxy_start && qpn <= proxy_start + 1);
 1320 }
 1321 
 1322 
 1323 int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
 1324                          enum ib_qp_type dest_qpt, u16 pkey_index,
 1325                          u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr,
 1326                          u8 *s_mac, u16 vlan_id, struct ib_mad *mad)
 1327 {
 1328         struct ib_sge list;
 1329         struct ib_ud_wr wr;
 1330         const struct ib_send_wr *bad_wr;
 1331         struct mlx4_ib_demux_pv_ctx *sqp_ctx;
 1332         struct mlx4_ib_demux_pv_qp *sqp;
 1333         struct mlx4_mad_snd_buf *sqp_mad;
 1334         struct ib_ah *ah;
 1335         struct ib_qp *send_qp = NULL;
 1336         unsigned wire_tx_ix = 0;
 1337         u16 wire_pkey_ix;
 1338         int src_qpnum;
 1339         int ret;
 1340 
 1341         sqp_ctx = dev->sriov.sqps[port-1];
 1342 
 1343         /* check if proxy qp created */
 1344         if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
 1345                 return -EAGAIN;
 1346 
 1347         if (dest_qpt == IB_QPT_SMI) {
 1348                 src_qpnum = 0;
 1349                 sqp = &sqp_ctx->qp[0];
 1350                 wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
 1351         } else {
 1352                 src_qpnum = 1;
 1353                 sqp = &sqp_ctx->qp[1];
 1354                 wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][pkey_index];
 1355         }
 1356 
 1357         send_qp = sqp->qp;
 1358 
 1359         ah = rdma_zalloc_drv_obj(sqp_ctx->pd->device, ib_ah);
 1360         if (!ah)
 1361                 return -ENOMEM;
 1362 
 1363         ah->device = sqp_ctx->pd->device;
 1364         ah->pd = sqp_ctx->pd;
 1365 
 1366         /* create ah */
 1367         ret = mlx4_ib_create_ah_slave(ah, attr,
 1368                                       attr->grh.sgid_index,
 1369                                       s_mac, vlan_id);
 1370         if (ret)
 1371                 goto out;
 1372 
 1373         spin_lock(&sqp->tx_lock);
 1374         if (sqp->tx_ix_head - sqp->tx_ix_tail >=
 1375             (MLX4_NUM_TUNNEL_BUFS - 1))
 1376                 ret = -EAGAIN;
 1377         else
 1378                 wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
 1379         spin_unlock(&sqp->tx_lock);
 1380         if (ret)
 1381                 goto out;
 1382 
 1383         sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
 1384         kfree(sqp->tx_ring[wire_tx_ix].ah);
 1385         sqp->tx_ring[wire_tx_ix].ah = ah;
 1386         ib_dma_sync_single_for_cpu(&dev->ib_dev,
 1387                                    sqp->tx_ring[wire_tx_ix].buf.map,
 1388                                    sizeof (struct mlx4_mad_snd_buf),
 1389                                    DMA_TO_DEVICE);
 1390 
 1391         memcpy(&sqp_mad->payload, mad, sizeof *mad);
 1392 
 1393         ib_dma_sync_single_for_device(&dev->ib_dev,
 1394                                       sqp->tx_ring[wire_tx_ix].buf.map,
 1395                                       sizeof (struct mlx4_mad_snd_buf),
 1396                                       DMA_TO_DEVICE);
 1397 
 1398         list.addr = sqp->tx_ring[wire_tx_ix].buf.map;
 1399         list.length = sizeof (struct mlx4_mad_snd_buf);
 1400         list.lkey = sqp_ctx->pd->local_dma_lkey;
 1401 
 1402         wr.ah = ah;
 1403         wr.port_num = port;
 1404         wr.pkey_index = wire_pkey_ix;
 1405         wr.remote_qkey = qkey;
 1406         wr.remote_qpn = remote_qpn;
 1407         wr.wr.next = NULL;
 1408         wr.wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
 1409         wr.wr.sg_list = &list;
 1410         wr.wr.num_sge = 1;
 1411         wr.wr.opcode = IB_WR_SEND;
 1412         wr.wr.send_flags = IB_SEND_SIGNALED;
 1413 
 1414         ret = ib_post_send(send_qp, &wr.wr, &bad_wr);
 1415         if (!ret)
 1416                 return 0;
 1417 
 1418         spin_lock(&sqp->tx_lock);
 1419         sqp->tx_ix_tail++;
 1420         spin_unlock(&sqp->tx_lock);
 1421         sqp->tx_ring[wire_tx_ix].ah = NULL;
 1422 out:
 1423         kfree(ah);
 1424         return ret;
 1425 }
 1426 
 1427 static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port)
 1428 {
 1429         if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
 1430                 return slave;
 1431         return mlx4_get_base_gid_ix(dev->dev, slave, port);
 1432 }
 1433 
 1434 static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port,
 1435                                     struct ib_ah_attr *ah_attr)
 1436 {
 1437         if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
 1438                 ah_attr->grh.sgid_index = slave;
 1439         else
 1440                 ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port);
 1441 }
 1442 
 1443 static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
 1444 {
 1445         struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
 1446         struct mlx4_ib_demux_pv_qp *tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc->wr_id)];
 1447         int wr_ix = wc->wr_id & (MLX4_NUM_TUNNEL_BUFS - 1);
 1448         struct mlx4_tunnel_mad *tunnel = tun_qp->ring[wr_ix].addr;
 1449         struct mlx4_ib_ah ah;
 1450         struct ib_ah_attr ah_attr;
 1451         u8 *slave_id;
 1452         int slave;
 1453         int port;
 1454         u16 vlan_id;
 1455 
 1456         /* Get slave that sent this packet */
 1457         if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
 1458             wc->src_qp >= dev->dev->phys_caps.base_proxy_sqpn + 8 * MLX4_MFUNC_MAX ||
 1459             (wc->src_qp & 0x1) != ctx->port - 1 ||
 1460             wc->src_qp & 0x4) {
 1461                 mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d\n", wc->src_qp);
 1462                 return;
 1463         }
 1464         slave = ((wc->src_qp & ~0x7) - dev->dev->phys_caps.base_proxy_sqpn) / 8;
 1465         if (slave != ctx->slave) {
 1466                 mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
 1467                              "belongs to another slave\n", wc->src_qp);
 1468                 return;
 1469         }
 1470 
 1471         /* Map transaction ID */
 1472         ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
 1473                                    sizeof (struct mlx4_tunnel_mad),
 1474                                    DMA_FROM_DEVICE);
 1475         switch (tunnel->mad.mad_hdr.method) {
 1476         case IB_MGMT_METHOD_SET:
 1477         case IB_MGMT_METHOD_GET:
 1478         case IB_MGMT_METHOD_REPORT:
 1479         case IB_SA_METHOD_GET_TABLE:
 1480         case IB_SA_METHOD_DELETE:
 1481         case IB_SA_METHOD_GET_MULTI:
 1482         case IB_SA_METHOD_GET_TRACE_TBL:
 1483                 slave_id = (u8 *) &tunnel->mad.mad_hdr.tid;
 1484                 if (*slave_id) {
 1485                         mlx4_ib_warn(ctx->ib_dev, "egress mad has non-null tid msb:%d "
 1486                                      "class:%d slave:%d\n", *slave_id,
 1487                                      tunnel->mad.mad_hdr.mgmt_class, slave);
 1488                         return;
 1489                 } else
 1490                         *slave_id = slave;
 1491         default:
 1492                 /* nothing */;
 1493         }
 1494 
 1495         /* Class-specific handling */
 1496         switch (tunnel->mad.mad_hdr.mgmt_class) {
 1497         case IB_MGMT_CLASS_SUBN_LID_ROUTED:
 1498         case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
 1499                 if (slave != mlx4_master_func_num(dev->dev) &&
 1500                     !mlx4_vf_smi_enabled(dev->dev, slave, ctx->port))
 1501                         return;
 1502                 break;
 1503         case IB_MGMT_CLASS_SUBN_ADM:
 1504                 if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
 1505                               (struct ib_sa_mad *) &tunnel->mad))
 1506                         return;
 1507                 break;
 1508         case IB_MGMT_CLASS_CM:
 1509                 if (mlx4_ib_multiplex_cm_handler(ctx->ib_dev, ctx->port, slave,
 1510                               (struct ib_mad *) &tunnel->mad))
 1511                         return;
 1512                 break;
 1513         case IB_MGMT_CLASS_DEVICE_MGMT:
 1514                 if (tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_GET &&
 1515                     tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_SET)
 1516                         return;
 1517                 break;
 1518         default:
 1519                 /* Drop unsupported classes for slaves in tunnel mode */
 1520                 if (slave != mlx4_master_func_num(dev->dev)) {
 1521                         mlx4_ib_warn(ctx->ib_dev, "dropping unsupported egress mad from class:%d "
 1522                                      "for slave:%d\n", tunnel->mad.mad_hdr.mgmt_class, slave);
 1523                         return;
 1524                 }
 1525         }
 1526 
 1527         /* We are using standard ib_core services to send the mad, so generate a
 1528          * stadard address handle by decoding the tunnelled mlx4_ah fields */
 1529         memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
 1530         ah.ibah.device = ctx->ib_dev;
 1531 
 1532         port = be32_to_cpu(ah.av.ib.port_pd) >> 24;
 1533         port = mlx4_slave_convert_port(dev->dev, slave, port);
 1534         if (port < 0)
 1535                 return;
 1536         ah.av.ib.port_pd = cpu_to_be32(port << 24 | (be32_to_cpu(ah.av.ib.port_pd) & 0xffffff));
 1537 
 1538         mlx4_ib_query_ah(&ah.ibah, &ah_attr);
 1539         if (ah_attr.ah_flags & IB_AH_GRH)
 1540                 fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr);
 1541 
 1542         memcpy(ah_attr.dmac, tunnel->hdr.mac, 6);
 1543         vlan_id = be16_to_cpu(tunnel->hdr.vlan);
 1544         /* if slave have default vlan use it */
 1545         mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
 1546                                     &vlan_id, &ah_attr.sl);
 1547 
 1548         mlx4_ib_send_to_wire(dev, slave, ctx->port,
 1549                              is_proxy_qp0(dev, wc->src_qp, slave) ?
 1550                              IB_QPT_SMI : IB_QPT_GSI,
 1551                              be16_to_cpu(tunnel->hdr.pkey_index),
 1552                              be32_to_cpu(tunnel->hdr.remote_qpn),
 1553                              be32_to_cpu(tunnel->hdr.qkey),
 1554                              &ah_attr, wc->smac, vlan_id, &tunnel->mad);
 1555 }
 1556 
 1557 static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
 1558                                  enum ib_qp_type qp_type, int is_tun)
 1559 {
 1560         int i;
 1561         struct mlx4_ib_demux_pv_qp *tun_qp;
 1562         int rx_buf_size, tx_buf_size;
 1563 
 1564         if (qp_type > IB_QPT_GSI)
 1565                 return -EINVAL;
 1566 
 1567         tun_qp = &ctx->qp[qp_type];
 1568 
 1569         tun_qp->ring = kzalloc(sizeof (struct mlx4_ib_buf) * MLX4_NUM_TUNNEL_BUFS,
 1570                                GFP_KERNEL);
 1571         if (!tun_qp->ring)
 1572                 return -ENOMEM;
 1573 
 1574         tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
 1575                                   sizeof (struct mlx4_ib_tun_tx_buf),
 1576                                   GFP_KERNEL);
 1577         if (!tun_qp->tx_ring) {
 1578                 kfree(tun_qp->ring);
 1579                 tun_qp->ring = NULL;
 1580                 return -ENOMEM;
 1581         }
 1582 
 1583         if (is_tun) {
 1584                 rx_buf_size = sizeof (struct mlx4_tunnel_mad);
 1585                 tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
 1586         } else {
 1587                 rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
 1588                 tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
 1589         }
 1590 
 1591         for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
 1592                 tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
 1593                 if (!tun_qp->ring[i].addr)
 1594                         goto err;
 1595                 tun_qp->ring[i].map = ib_dma_map_single(ctx->ib_dev,
 1596                                                         tun_qp->ring[i].addr,
 1597                                                         rx_buf_size,
 1598                                                         DMA_FROM_DEVICE);
 1599                 if (ib_dma_mapping_error(ctx->ib_dev, tun_qp->ring[i].map)) {
 1600                         kfree(tun_qp->ring[i].addr);
 1601                         goto err;
 1602                 }
 1603         }
 1604 
 1605         for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
 1606                 tun_qp->tx_ring[i].buf.addr =
 1607                         kmalloc(tx_buf_size, GFP_KERNEL);
 1608                 if (!tun_qp->tx_ring[i].buf.addr)
 1609                         goto tx_err;
 1610                 tun_qp->tx_ring[i].buf.map =
 1611                         ib_dma_map_single(ctx->ib_dev,
 1612                                           tun_qp->tx_ring[i].buf.addr,
 1613                                           tx_buf_size,
 1614                                           DMA_TO_DEVICE);
 1615                 if (ib_dma_mapping_error(ctx->ib_dev,
 1616                                          tun_qp->tx_ring[i].buf.map)) {
 1617                         kfree(tun_qp->tx_ring[i].buf.addr);
 1618                         goto tx_err;
 1619                 }
 1620                 tun_qp->tx_ring[i].ah = NULL;
 1621         }
 1622         spin_lock_init(&tun_qp->tx_lock);
 1623         tun_qp->tx_ix_head = 0;
 1624         tun_qp->tx_ix_tail = 0;
 1625         tun_qp->proxy_qpt = qp_type;
 1626 
 1627         return 0;
 1628 
 1629 tx_err:
 1630         while (i > 0) {
 1631                 --i;
 1632                 ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
 1633                                     tx_buf_size, DMA_TO_DEVICE);
 1634                 kfree(tun_qp->tx_ring[i].buf.addr);
 1635         }
 1636         kfree(tun_qp->tx_ring);
 1637         tun_qp->tx_ring = NULL;
 1638         i = MLX4_NUM_TUNNEL_BUFS;
 1639 err:
 1640         while (i > 0) {
 1641                 --i;
 1642                 ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
 1643                                     rx_buf_size, DMA_FROM_DEVICE);
 1644                 kfree(tun_qp->ring[i].addr);
 1645         }
 1646         kfree(tun_qp->ring);
 1647         tun_qp->ring = NULL;
 1648         return -ENOMEM;
 1649 }
 1650 
 1651 static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
 1652                                      enum ib_qp_type qp_type, int is_tun)
 1653 {
 1654         int i;
 1655         struct mlx4_ib_demux_pv_qp *tun_qp;
 1656         int rx_buf_size, tx_buf_size;
 1657 
 1658         if (qp_type > IB_QPT_GSI)
 1659                 return;
 1660 
 1661         tun_qp = &ctx->qp[qp_type];
 1662         if (is_tun) {
 1663                 rx_buf_size = sizeof (struct mlx4_tunnel_mad);
 1664                 tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
 1665         } else {
 1666                 rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
 1667                 tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
 1668         }
 1669 
 1670 
 1671         for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
 1672                 ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
 1673                                     rx_buf_size, DMA_FROM_DEVICE);
 1674                 kfree(tun_qp->ring[i].addr);
 1675         }
 1676 
 1677         for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
 1678                 ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
 1679                                     tx_buf_size, DMA_TO_DEVICE);
 1680                 kfree(tun_qp->tx_ring[i].buf.addr);
 1681                 if (tun_qp->tx_ring[i].ah)
 1682                         ib_destroy_ah(tun_qp->tx_ring[i].ah, 0);
 1683         }
 1684         kfree(tun_qp->tx_ring);
 1685         kfree(tun_qp->ring);
 1686 }
 1687 
 1688 static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
 1689 {
 1690         struct mlx4_ib_demux_pv_ctx *ctx;
 1691         struct mlx4_ib_demux_pv_qp *tun_qp;
 1692         struct ib_wc wc;
 1693         int ret;
 1694         ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
 1695         ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
 1696 
 1697         while (ib_poll_cq(ctx->cq, 1, &wc) == 1) {
 1698                 tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
 1699                 if (wc.status == IB_WC_SUCCESS) {
 1700                         switch (wc.opcode) {
 1701                         case IB_WC_RECV:
 1702                                 mlx4_ib_multiplex_mad(ctx, &wc);
 1703                                 ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp,
 1704                                                              wc.wr_id &
 1705                                                              (MLX4_NUM_TUNNEL_BUFS - 1));
 1706                                 if (ret)
 1707                                         pr_err("Failed reposting tunnel "
 1708                                                "buf:%lld\n", (unsigned long long)wc.wr_id);
 1709                                 break;
 1710                         case IB_WC_SEND:
 1711                                 pr_debug("received tunnel send completion:"
 1712                                          "wrid=0x%llx, status=0x%x\n",
 1713                                          (unsigned long long)wc.wr_id, wc.status);
 1714                                 ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
 1715                                               (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
 1716                                 tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
 1717                                         = NULL;
 1718                                 spin_lock(&tun_qp->tx_lock);
 1719                                 tun_qp->tx_ix_tail++;
 1720                                 spin_unlock(&tun_qp->tx_lock);
 1721 
 1722                                 break;
 1723                         default:
 1724                                 break;
 1725                         }
 1726                 } else  {
 1727                         pr_debug("mlx4_ib: completion error in tunnel: %d."
 1728                                  " status = %d, wrid = 0x%llx\n",
 1729                                  ctx->slave, wc.status, (unsigned long long)wc.wr_id);
 1730                         if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
 1731                                 ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
 1732                                               (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
 1733                                 tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
 1734                                         = NULL;
 1735                                 spin_lock(&tun_qp->tx_lock);
 1736                                 tun_qp->tx_ix_tail++;
 1737                                 spin_unlock(&tun_qp->tx_lock);
 1738                         }
 1739                 }
 1740         }
 1741 }
 1742 
 1743 static void pv_qp_event_handler(struct ib_event *event, void *qp_context)
 1744 {
 1745         struct mlx4_ib_demux_pv_ctx *sqp = qp_context;
 1746 
 1747         /* It's worse than that! He's dead, Jim! */
 1748         pr_err("Fatal error (%d) on a MAD QP on port %d\n",
 1749                event->event, sqp->port);
 1750 }
 1751 
 1752 static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
 1753                             enum ib_qp_type qp_type, int create_tun)
 1754 {
 1755         int i, ret;
 1756         struct mlx4_ib_demux_pv_qp *tun_qp;
 1757         struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
 1758         struct ib_qp_attr attr;
 1759         int qp_attr_mask_INIT;
 1760 
 1761         if (qp_type > IB_QPT_GSI)
 1762                 return -EINVAL;
 1763 
 1764         tun_qp = &ctx->qp[qp_type];
 1765 
 1766         memset(&qp_init_attr, 0, sizeof qp_init_attr);
 1767         qp_init_attr.init_attr.send_cq = ctx->cq;
 1768         qp_init_attr.init_attr.recv_cq = ctx->cq;
 1769         qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
 1770         qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
 1771         qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
 1772         qp_init_attr.init_attr.cap.max_send_sge = 1;
 1773         qp_init_attr.init_attr.cap.max_recv_sge = 1;
 1774         if (create_tun) {
 1775                 qp_init_attr.init_attr.qp_type = IB_QPT_UD;
 1776                 qp_init_attr.init_attr.create_flags =
 1777                     (enum ib_qp_create_flags)MLX4_IB_SRIOV_TUNNEL_QP;
 1778                 qp_init_attr.port = ctx->port;
 1779                 qp_init_attr.slave = ctx->slave;
 1780                 qp_init_attr.proxy_qp_type = qp_type;
 1781                 qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX |
 1782                            IB_QP_QKEY | IB_QP_PORT;
 1783         } else {
 1784                 qp_init_attr.init_attr.qp_type = qp_type;
 1785                 qp_init_attr.init_attr.create_flags =
 1786                     (enum ib_qp_create_flags)MLX4_IB_SRIOV_SQP;
 1787                 qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY;
 1788         }
 1789         qp_init_attr.init_attr.port_num = ctx->port;
 1790         qp_init_attr.init_attr.qp_context = ctx;
 1791         qp_init_attr.init_attr.event_handler = pv_qp_event_handler;
 1792         tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr);
 1793         if (IS_ERR(tun_qp->qp)) {
 1794                 ret = PTR_ERR(tun_qp->qp);
 1795                 tun_qp->qp = NULL;
 1796                 pr_err("Couldn't create %s QP (%d)\n",
 1797                        create_tun ? "tunnel" : "special", ret);
 1798                 return ret;
 1799         }
 1800 
 1801         memset(&attr, 0, sizeof attr);
 1802         attr.qp_state = IB_QPS_INIT;
 1803         ret = 0;
 1804         if (create_tun)
 1805                 ret = find_slave_port_pkey_ix(to_mdev(ctx->ib_dev), ctx->slave,
 1806                                               ctx->port, IB_DEFAULT_PKEY_FULL,
 1807                                               &attr.pkey_index);
 1808         if (ret || !create_tun)
 1809                 attr.pkey_index =
 1810                         to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0];
 1811         attr.qkey = IB_QP1_QKEY;
 1812         attr.port_num = ctx->port;
 1813         ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT);
 1814         if (ret) {
 1815                 pr_err("Couldn't change %s qp state to INIT (%d)\n",
 1816                        create_tun ? "tunnel" : "special", ret);
 1817                 goto err_qp;
 1818         }
 1819         attr.qp_state = IB_QPS_RTR;
 1820         ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE);
 1821         if (ret) {
 1822                 pr_err("Couldn't change %s qp state to RTR (%d)\n",
 1823                        create_tun ? "tunnel" : "special", ret);
 1824                 goto err_qp;
 1825         }
 1826         attr.qp_state = IB_QPS_RTS;
 1827         attr.sq_psn = 0;
 1828         ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
 1829         if (ret) {
 1830                 pr_err("Couldn't change %s qp state to RTS (%d)\n",
 1831                        create_tun ? "tunnel" : "special", ret);
 1832                 goto err_qp;
 1833         }
 1834 
 1835         for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
 1836                 ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
 1837                 if (ret) {
 1838                         pr_err(" mlx4_ib_post_pv_buf error"
 1839                                " (err = %d, i = %d)\n", ret, i);
 1840                         goto err_qp;
 1841                 }
 1842         }
 1843         return 0;
 1844 
 1845 err_qp:
 1846         ib_destroy_qp(tun_qp->qp);
 1847         tun_qp->qp = NULL;
 1848         return ret;
 1849 }
 1850 
 1851 /*
 1852  * IB MAD completion callback for real SQPs
 1853  */
 1854 static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
 1855 {
 1856         struct mlx4_ib_demux_pv_ctx *ctx;
 1857         struct mlx4_ib_demux_pv_qp *sqp;
 1858         struct ib_wc wc;
 1859         struct ib_grh *grh;
 1860         struct ib_mad *mad;
 1861 
 1862         ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
 1863         ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
 1864 
 1865         while (mlx4_ib_poll_cq(ctx->cq, 1, &wc) == 1) {
 1866                 sqp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
 1867                 if (wc.status == IB_WC_SUCCESS) {
 1868                         switch (wc.opcode) {
 1869                         case IB_WC_SEND:
 1870                                 kfree(sqp->tx_ring[wc.wr_id &
 1871                                       (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
 1872                                 sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
 1873                                         = NULL;
 1874                                 spin_lock(&sqp->tx_lock);
 1875                                 sqp->tx_ix_tail++;
 1876                                 spin_unlock(&sqp->tx_lock);
 1877                                 break;
 1878                         case IB_WC_RECV:
 1879                                 mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
 1880                                                 (sqp->ring[wc.wr_id &
 1881                                                 (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
 1882                                 grh = &(((struct mlx4_mad_rcv_buf *)
 1883                                                 (sqp->ring[wc.wr_id &
 1884                                                 (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
 1885                                 mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
 1886                                 if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
 1887                                                            (MLX4_NUM_TUNNEL_BUFS - 1)))
 1888                                         pr_err("Failed reposting SQP "
 1889                                                "buf:%lld\n", (unsigned long long)wc.wr_id);
 1890                                 break;
 1891                         default:
 1892                                 BUG_ON(1);
 1893                                 break;
 1894                         }
 1895                 } else  {
 1896                         pr_debug("mlx4_ib: completion error in tunnel: %d."
 1897                                  " status = %d, wrid = 0x%llx\n",
 1898                                  ctx->slave, wc.status, (unsigned long long)wc.wr_id);
 1899                         if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
 1900                                 kfree(sqp->tx_ring[wc.wr_id &
 1901                                       (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
 1902                                 sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
 1903                                         = NULL;
 1904                                 spin_lock(&sqp->tx_lock);
 1905                                 sqp->tx_ix_tail++;
 1906                                 spin_unlock(&sqp->tx_lock);
 1907                         }
 1908                 }
 1909         }
 1910 }
 1911 
 1912 static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port,
 1913                                struct mlx4_ib_demux_pv_ctx **ret_ctx)
 1914 {
 1915         struct mlx4_ib_demux_pv_ctx *ctx;
 1916 
 1917         *ret_ctx = NULL;
 1918         ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL);
 1919         if (!ctx) {
 1920                 pr_err("failed allocating pv resource context "
 1921                        "for port %d, slave %d\n", port, slave);
 1922                 return -ENOMEM;
 1923         }
 1924 
 1925         ctx->ib_dev = &dev->ib_dev;
 1926         ctx->port = port;
 1927         ctx->slave = slave;
 1928         *ret_ctx = ctx;
 1929         return 0;
 1930 }
 1931 
 1932 static void free_pv_object(struct mlx4_ib_dev *dev, int slave, int port)
 1933 {
 1934         if (dev->sriov.demux[port - 1].tun[slave]) {
 1935                 kfree(dev->sriov.demux[port - 1].tun[slave]);
 1936                 dev->sriov.demux[port - 1].tun[slave] = NULL;
 1937         }
 1938 }
 1939 
 1940 static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
 1941                                int create_tun, struct mlx4_ib_demux_pv_ctx *ctx)
 1942 {
 1943         int ret, cq_size;
 1944         struct ib_cq_init_attr cq_attr = {};
 1945 
 1946         if (ctx->state != DEMUX_PV_STATE_DOWN)
 1947                 return -EEXIST;
 1948 
 1949         ctx->state = DEMUX_PV_STATE_STARTING;
 1950         /* have QP0 only if link layer is IB */
 1951         if (rdma_port_get_link_layer(ibdev, ctx->port) ==
 1952             IB_LINK_LAYER_INFINIBAND)
 1953                 ctx->has_smi = 1;
 1954 
 1955         if (ctx->has_smi) {
 1956                 ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_SMI, create_tun);
 1957                 if (ret) {
 1958                         pr_err("Failed allocating qp0 tunnel bufs (%d)\n", ret);
 1959                         goto err_out;
 1960                 }
 1961         }
 1962 
 1963         ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_GSI, create_tun);
 1964         if (ret) {
 1965                 pr_err("Failed allocating qp1 tunnel bufs (%d)\n", ret);
 1966                 goto err_out_qp0;
 1967         }
 1968 
 1969         cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
 1970         if (ctx->has_smi)
 1971                 cq_size *= 2;
 1972 
 1973         cq_attr.cqe = cq_size;
 1974         ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler,
 1975                                NULL, ctx, &cq_attr);
 1976         if (IS_ERR(ctx->cq)) {
 1977                 ret = PTR_ERR(ctx->cq);
 1978                 pr_err("Couldn't create tunnel CQ (%d)\n", ret);
 1979                 goto err_buf;
 1980         }
 1981 
 1982         ctx->pd = ib_alloc_pd(ctx->ib_dev, 0);
 1983         if (IS_ERR(ctx->pd)) {
 1984                 ret = PTR_ERR(ctx->pd);
 1985                 pr_err("Couldn't create tunnel PD (%d)\n", ret);
 1986                 goto err_cq;
 1987         }
 1988 
 1989         if (ctx->has_smi) {
 1990                 ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun);
 1991                 if (ret) {
 1992                         pr_err("Couldn't create %s QP0 (%d)\n",
 1993                                create_tun ? "tunnel for" : "",  ret);
 1994                         goto err_pd;
 1995                 }
 1996         }
 1997 
 1998         ret = create_pv_sqp(ctx, IB_QPT_GSI, create_tun);
 1999         if (ret) {
 2000                 pr_err("Couldn't create %s QP1 (%d)\n",
 2001                        create_tun ? "tunnel for" : "",  ret);
 2002                 goto err_qp0;
 2003         }
 2004 
 2005         if (create_tun)
 2006                 INIT_WORK(&ctx->work, mlx4_ib_tunnel_comp_worker);
 2007         else
 2008                 INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker);
 2009 
 2010         ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq;
 2011 
 2012         ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
 2013         if (ret) {
 2014                 pr_err("Couldn't arm tunnel cq (%d)\n", ret);
 2015                 goto err_wq;
 2016         }
 2017         ctx->state = DEMUX_PV_STATE_ACTIVE;
 2018         return 0;
 2019 
 2020 err_wq:
 2021         ctx->wq = NULL;
 2022         ib_destroy_qp(ctx->qp[1].qp);
 2023         ctx->qp[1].qp = NULL;
 2024 
 2025 
 2026 err_qp0:
 2027         if (ctx->has_smi)
 2028                 ib_destroy_qp(ctx->qp[0].qp);
 2029         ctx->qp[0].qp = NULL;
 2030 
 2031 err_pd:
 2032         ib_dealloc_pd(ctx->pd);
 2033         ctx->pd = NULL;
 2034 
 2035 err_cq:
 2036         ib_destroy_cq(ctx->cq);
 2037         ctx->cq = NULL;
 2038 
 2039 err_buf:
 2040         mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, create_tun);
 2041 
 2042 err_out_qp0:
 2043         if (ctx->has_smi)
 2044                 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, create_tun);
 2045 err_out:
 2046         ctx->state = DEMUX_PV_STATE_DOWN;
 2047         return ret;
 2048 }
 2049 
 2050 static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port,
 2051                                  struct mlx4_ib_demux_pv_ctx *ctx, int flush)
 2052 {
 2053         if (!ctx)
 2054                 return;
 2055         if (ctx->state > DEMUX_PV_STATE_DOWN) {
 2056                 ctx->state = DEMUX_PV_STATE_DOWNING;
 2057                 if (flush)
 2058                         flush_workqueue(ctx->wq);
 2059                 if (ctx->has_smi) {
 2060                         ib_destroy_qp(ctx->qp[0].qp);
 2061                         ctx->qp[0].qp = NULL;
 2062                         mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, 1);
 2063                 }
 2064                 ib_destroy_qp(ctx->qp[1].qp);
 2065                 ctx->qp[1].qp = NULL;
 2066                 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1);
 2067                 ib_dealloc_pd(ctx->pd);
 2068                 ctx->pd = NULL;
 2069                 ib_destroy_cq(ctx->cq);
 2070                 ctx->cq = NULL;
 2071                 ctx->state = DEMUX_PV_STATE_DOWN;
 2072         }
 2073 }
 2074 
 2075 static int mlx4_ib_tunnels_update(struct mlx4_ib_dev *dev, int slave,
 2076                                   int port, int do_init)
 2077 {
 2078         int ret = 0;
 2079 
 2080         if (!do_init) {
 2081                 clean_vf_mcast(&dev->sriov.demux[port - 1], slave);
 2082                 /* for master, destroy real sqp resources */
 2083                 if (slave == mlx4_master_func_num(dev->dev))
 2084                         destroy_pv_resources(dev, slave, port,
 2085                                              dev->sriov.sqps[port - 1], 1);
 2086                 /* destroy the tunnel qp resources */
 2087                 destroy_pv_resources(dev, slave, port,
 2088                                      dev->sriov.demux[port - 1].tun[slave], 1);
 2089                 return 0;
 2090         }
 2091 
 2092         /* create the tunnel qp resources */
 2093         ret = create_pv_resources(&dev->ib_dev, slave, port, 1,
 2094                                   dev->sriov.demux[port - 1].tun[slave]);
 2095 
 2096         /* for master, create the real sqp resources */
 2097         if (!ret && slave == mlx4_master_func_num(dev->dev))
 2098                 ret = create_pv_resources(&dev->ib_dev, slave, port, 0,
 2099                                           dev->sriov.sqps[port - 1]);
 2100         return ret;
 2101 }
 2102 
 2103 void mlx4_ib_tunnels_update_work(struct work_struct *work)
 2104 {
 2105         struct mlx4_ib_demux_work *dmxw;
 2106 
 2107         dmxw = container_of(work, struct mlx4_ib_demux_work, work);
 2108         mlx4_ib_tunnels_update(dmxw->dev, dmxw->slave, (int) dmxw->port,
 2109                                dmxw->do_init);
 2110         kfree(dmxw);
 2111         return;
 2112 }
 2113 
 2114 static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
 2115                                        struct mlx4_ib_demux_ctx *ctx,
 2116                                        int port)
 2117 {
 2118         char name[12];
 2119         int ret = 0;
 2120         int i;
 2121 
 2122         ctx->tun = kcalloc(dev->dev->caps.sqp_demux,
 2123                            sizeof (struct mlx4_ib_demux_pv_ctx *), GFP_KERNEL);
 2124         if (!ctx->tun)
 2125                 return -ENOMEM;
 2126 
 2127         ctx->dev = dev;
 2128         ctx->port = port;
 2129         ctx->ib_dev = &dev->ib_dev;
 2130 
 2131         for (i = 0;
 2132              i < min(dev->dev->caps.sqp_demux,
 2133              (u16)(dev->dev->persist->num_vfs + 1));
 2134              i++) {
 2135                 struct mlx4_active_ports actv_ports =
 2136                         mlx4_get_active_ports(dev->dev, i);
 2137 
 2138                 if (!test_bit(port - 1, actv_ports.ports))
 2139                         continue;
 2140 
 2141                 ret = alloc_pv_object(dev, i, port, &ctx->tun[i]);
 2142                 if (ret) {
 2143                         ret = -ENOMEM;
 2144                         goto err_mcg;
 2145                 }
 2146         }
 2147 
 2148         ret = mlx4_ib_mcg_port_init(ctx);
 2149         if (ret) {
 2150                 pr_err("Failed initializing mcg para-virt (%d)\n", ret);
 2151                 goto err_mcg;
 2152         }
 2153 
 2154         snprintf(name, sizeof name, "mlx4_ibt%d", port);
 2155         ctx->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
 2156         if (!ctx->wq) {
 2157                 pr_err("Failed to create tunnelling WQ for port %d\n", port);
 2158                 ret = -ENOMEM;
 2159                 goto err_wq;
 2160         }
 2161 
 2162         snprintf(name, sizeof name, "mlx4_ibud%d", port);
 2163         ctx->ud_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
 2164         if (!ctx->ud_wq) {
 2165                 pr_err("Failed to create up/down WQ for port %d\n", port);
 2166                 ret = -ENOMEM;
 2167                 goto err_udwq;
 2168         }
 2169 
 2170         return 0;
 2171 
 2172 err_udwq:
 2173         destroy_workqueue(ctx->wq);
 2174         ctx->wq = NULL;
 2175 
 2176 err_wq:
 2177         mlx4_ib_mcg_port_cleanup(ctx, 1);
 2178 err_mcg:
 2179         for (i = 0; i < dev->dev->caps.sqp_demux; i++)
 2180                 free_pv_object(dev, i, port);
 2181         kfree(ctx->tun);
 2182         ctx->tun = NULL;
 2183         return ret;
 2184 }
 2185 
 2186 static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx)
 2187 {
 2188         if (sqp_ctx->state > DEMUX_PV_STATE_DOWN) {
 2189                 sqp_ctx->state = DEMUX_PV_STATE_DOWNING;
 2190                 flush_workqueue(sqp_ctx->wq);
 2191                 if (sqp_ctx->has_smi) {
 2192                         ib_destroy_qp(sqp_ctx->qp[0].qp);
 2193                         sqp_ctx->qp[0].qp = NULL;
 2194                         mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_SMI, 0);
 2195                 }
 2196                 ib_destroy_qp(sqp_ctx->qp[1].qp);
 2197                 sqp_ctx->qp[1].qp = NULL;
 2198                 mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0);
 2199                 ib_dealloc_pd(sqp_ctx->pd);
 2200                 sqp_ctx->pd = NULL;
 2201                 ib_destroy_cq(sqp_ctx->cq);
 2202                 sqp_ctx->cq = NULL;
 2203                 sqp_ctx->state = DEMUX_PV_STATE_DOWN;
 2204         }
 2205 }
 2206 
 2207 static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx)
 2208 {
 2209         int i;
 2210         if (ctx) {
 2211                 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
 2212                 mlx4_ib_mcg_port_cleanup(ctx, 1);
 2213                 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
 2214                         if (!ctx->tun[i])
 2215                                 continue;
 2216                         if (ctx->tun[i]->state > DEMUX_PV_STATE_DOWN)
 2217                                 ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING;
 2218                 }
 2219                 flush_workqueue(ctx->wq);
 2220                 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
 2221                         destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0);
 2222                         free_pv_object(dev, i, ctx->port);
 2223                 }
 2224                 kfree(ctx->tun);
 2225                 destroy_workqueue(ctx->ud_wq);
 2226                 destroy_workqueue(ctx->wq);
 2227         }
 2228 }
 2229 
 2230 static void mlx4_ib_master_tunnels(struct mlx4_ib_dev *dev, int do_init)
 2231 {
 2232         int i;
 2233 
 2234         if (!mlx4_is_master(dev->dev))
 2235                 return;
 2236         /* initialize or tear down tunnel QPs for the master */
 2237         for (i = 0; i < dev->dev->caps.num_ports; i++)
 2238                 mlx4_ib_tunnels_update(dev, mlx4_master_func_num(dev->dev), i + 1, do_init);
 2239         return;
 2240 }
 2241 
 2242 int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
 2243 {
 2244         int i = 0;
 2245         int err;
 2246 
 2247         if (!mlx4_is_mfunc(dev->dev))
 2248                 return 0;
 2249 
 2250         dev->sriov.is_going_down = 0;
 2251         spin_lock_init(&dev->sriov.going_down_lock);
 2252         mlx4_ib_cm_paravirt_init(dev);
 2253 
 2254         mlx4_ib_warn(&dev->ib_dev, "multi-function enabled\n");
 2255 
 2256         if (mlx4_is_slave(dev->dev)) {
 2257                 mlx4_ib_warn(&dev->ib_dev, "operating in qp1 tunnel mode\n");
 2258                 return 0;
 2259         }
 2260 
 2261         for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
 2262                 if (i == mlx4_master_func_num(dev->dev))
 2263                         mlx4_put_slave_node_guid(dev->dev, i, dev->ib_dev.node_guid);
 2264                 else
 2265                         mlx4_put_slave_node_guid(dev->dev, i, mlx4_ib_gen_node_guid());
 2266         }
 2267 
 2268         err = mlx4_ib_init_alias_guid_service(dev);
 2269         if (err) {
 2270                 mlx4_ib_warn(&dev->ib_dev, "Failed init alias guid process.\n");
 2271                 goto paravirt_err;
 2272         }
 2273         err = mlx4_ib_device_register_sysfs(dev);
 2274         if (err) {
 2275                 mlx4_ib_warn(&dev->ib_dev, "Failed to register sysfs\n");
 2276                 goto sysfs_err;
 2277         }
 2278 
 2279         mlx4_ib_warn(&dev->ib_dev, "initializing demux service for %d qp1 clients\n",
 2280                      dev->dev->caps.sqp_demux);
 2281         for (i = 0; i < dev->num_ports; i++) {
 2282                 union ib_gid gid;
 2283                 err = __mlx4_ib_query_gid(&dev->ib_dev, i + 1, 0, &gid, 1);
 2284                 if (err)
 2285                         goto demux_err;
 2286                 dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
 2287                 atomic64_set(&dev->sriov.demux[i].subnet_prefix,
 2288                              be64_to_cpu(gid.global.subnet_prefix));
 2289                 err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
 2290                                       &dev->sriov.sqps[i]);
 2291                 if (err)
 2292                         goto demux_err;
 2293                 err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1);
 2294                 if (err)
 2295                         goto free_pv;
 2296         }
 2297         mlx4_ib_master_tunnels(dev, 1);
 2298         return 0;
 2299 
 2300 free_pv:
 2301         free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1);
 2302 demux_err:
 2303         while (--i >= 0) {
 2304                 free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1);
 2305                 mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
 2306         }
 2307         mlx4_ib_device_unregister_sysfs(dev);
 2308 
 2309 sysfs_err:
 2310         mlx4_ib_destroy_alias_guid_service(dev);
 2311 
 2312 paravirt_err:
 2313         mlx4_ib_cm_paravirt_clean(dev, -1);
 2314 
 2315         return err;
 2316 }
 2317 
 2318 void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev)
 2319 {
 2320         int i;
 2321         unsigned long flags;
 2322 
 2323         if (!mlx4_is_mfunc(dev->dev))
 2324                 return;
 2325 
 2326         spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
 2327         dev->sriov.is_going_down = 1;
 2328         spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
 2329         if (mlx4_is_master(dev->dev)) {
 2330                 for (i = 0; i < dev->num_ports; i++) {
 2331                         flush_workqueue(dev->sriov.demux[i].ud_wq);
 2332                         mlx4_ib_free_sqp_ctx(dev->sriov.sqps[i]);
 2333                         kfree(dev->sriov.sqps[i]);
 2334                         dev->sriov.sqps[i] = NULL;
 2335                         mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
 2336                 }
 2337 
 2338                 mlx4_ib_cm_paravirt_clean(dev, -1);
 2339                 mlx4_ib_destroy_alias_guid_service(dev);
 2340                 mlx4_ib_device_unregister_sysfs(dev);
 2341         }
 2342 }

Cache object: b4361e94022d1889c09d630b18879d63


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.