The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/mlx4/mlx4_ib/mlx4_ib_qp.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
    3  * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
    4  *
    5  * This software is available to you under a choice of one of two
    6  * licenses.  You may choose to be licensed under the terms of the GNU
    7  * General Public License (GPL) Version 2, available from the file
    8  * COPYING in the main directory of this source tree, or the
    9  * OpenIB.org BSD license below:
   10  *
   11  *     Redistribution and use in source and binary forms, with or
   12  *     without modification, are permitted provided that the following
   13  *     conditions are met:
   14  *
   15  *      - Redistributions of source code must retain the above
   16  *        copyright notice, this list of conditions and the following
   17  *        disclaimer.
   18  *
   19  *      - Redistributions in binary form must reproduce the above
   20  *        copyright notice, this list of conditions and the following
   21  *        disclaimer in the documentation and/or other materials
   22  *        provided with the distribution.
   23  *
   24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
   28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
   30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   31  * SOFTWARE.
   32  */
   33 
   34 #include <linux/log2.h>
   35 #include <linux/slab.h>
   36 #include <linux/netdevice.h>
   37 #include <linux/bitops.h>
   38 #include <linux/rcupdate.h>
   39 #include <linux/etherdevice.h>
   40 
   41 #include <rdma/ib_cache.h>
   42 #include <rdma/ib_pack.h>
   43 #include <rdma/ib_addr.h>
   44 #include <rdma/ib_mad.h>
   45 #include <rdma/uverbs_ioctl.h>
   46 
   47 #include <dev/mlx4/cmd.h>
   48 #include <dev/mlx4/qp.h>
   49 #include <dev/mlx4/driver.h>
   50 #include <linux/io.h>
   51 
   52 #include "mlx4_ib.h"
   53 #include <rdma/mlx4-abi.h>
   54 
   55 static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq,
   56                              struct mlx4_ib_cq *recv_cq);
   57 static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq,
   58                                struct mlx4_ib_cq *recv_cq);
   59 
   60 enum {
   61         MLX4_IB_ACK_REQ_FREQ    = 8,
   62 };
   63 
   64 enum {
   65         MLX4_IB_DEFAULT_SCHED_QUEUE     = 0x83,
   66         MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
   67         MLX4_IB_LINK_TYPE_IB            = 0,
   68         MLX4_IB_LINK_TYPE_ETH           = 1
   69 };
   70 
   71 enum {
   72         /*
   73          * Largest possible UD header: send with GRH and immediate
   74          * data plus 18 bytes for an Ethernet header with VLAN/802.1Q
   75          * tag.  (LRH would only use 8 bytes, so Ethernet is the
   76          * biggest case)
   77          */
   78         MLX4_IB_UD_HEADER_SIZE          = 82,
   79         MLX4_IB_LSO_HEADER_SPARE        = 128,
   80 };
   81 
   82 enum {
   83         MLX4_IB_IBOE_ETHERTYPE          = 0x8915
   84 };
   85 
   86 struct mlx4_ib_sqp {
   87         struct mlx4_ib_qp       qp;
   88         int                     pkey_index;
   89         u32                     qkey;
   90         u32                     send_psn;
   91         struct ib_ud_header     ud_header;
   92         u8                      header_buf[MLX4_IB_UD_HEADER_SIZE];
   93         struct ib_qp            *roce_v2_gsi;
   94 };
   95 
   96 enum {
   97         MLX4_IB_MIN_SQ_STRIDE   = 6,
   98         MLX4_IB_CACHE_LINE_SIZE = 64,
   99 };
  100 
  101 enum {
  102         MLX4_RAW_QP_MTU         = 7,
  103         MLX4_RAW_QP_MSGMAX      = 31,
  104 };
  105 
  106 #ifndef ETH_ALEN
  107 #define ETH_ALEN        6
  108 #endif
  109 
  110 static const __be32 mlx4_ib_opcode[] = {
  111         [IB_WR_SEND]                            = cpu_to_be32(MLX4_OPCODE_SEND),
  112         [IB_WR_LSO]                             = cpu_to_be32(MLX4_OPCODE_LSO),
  113         [IB_WR_SEND_WITH_IMM]                   = cpu_to_be32(MLX4_OPCODE_SEND_IMM),
  114         [IB_WR_RDMA_WRITE]                      = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
  115         [IB_WR_RDMA_WRITE_WITH_IMM]             = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
  116         [IB_WR_RDMA_READ]                       = cpu_to_be32(MLX4_OPCODE_RDMA_READ),
  117         [IB_WR_ATOMIC_CMP_AND_SWP]              = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
  118         [IB_WR_ATOMIC_FETCH_AND_ADD]            = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
  119         [IB_WR_SEND_WITH_INV]                   = cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
  120         [IB_WR_LOCAL_INV]                       = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
  121         [IB_WR_REG_MR]                          = cpu_to_be32(MLX4_OPCODE_FMR),
  122         [IB_WR_MASKED_ATOMIC_CMP_AND_SWP]       = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
  123         [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]     = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
  124 };
  125 
  126 static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
  127 {
  128         return container_of(mqp, struct mlx4_ib_sqp, qp);
  129 }
  130 
  131 static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
  132 {
  133         if (!mlx4_is_master(dev->dev))
  134                 return 0;
  135 
  136         return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn &&
  137                qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn +
  138                 8 * MLX4_MFUNC_MAX;
  139 }
  140 
  141 static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
  142 {
  143         int proxy_sqp = 0;
  144         int real_sqp = 0;
  145         int i;
  146         /* PPF or Native -- real SQP */
  147         real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
  148                     qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
  149                     qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3);
  150         if (real_sqp)
  151                 return 1;
  152         /* VF or PF -- proxy SQP */
  153         if (mlx4_is_mfunc(dev->dev)) {
  154                 for (i = 0; i < dev->dev->caps.num_ports; i++) {
  155                         if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] ||
  156                             qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) {
  157                                 proxy_sqp = 1;
  158                                 break;
  159                         }
  160                 }
  161         }
  162         if (proxy_sqp)
  163                 return 1;
  164 
  165         return !!(qp->flags & MLX4_IB_ROCE_V2_GSI_QP);
  166 }
  167 
  168 /* used for INIT/CLOSE port logic */
  169 static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
  170 {
  171         int proxy_qp0 = 0;
  172         int real_qp0 = 0;
  173         int i;
  174         /* PPF or Native -- real QP0 */
  175         real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
  176                     qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
  177                     qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1);
  178         if (real_qp0)
  179                 return 1;
  180         /* VF or PF -- proxy QP0 */
  181         if (mlx4_is_mfunc(dev->dev)) {
  182                 for (i = 0; i < dev->dev->caps.num_ports; i++) {
  183                         if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) {
  184                                 proxy_qp0 = 1;
  185                                 break;
  186                         }
  187                 }
  188         }
  189         return proxy_qp0;
  190 }
  191 
  192 static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
  193 {
  194         return mlx4_buf_offset(&qp->buf, offset);
  195 }
  196 
  197 static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n)
  198 {
  199         return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
  200 }
  201 
  202 static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)
  203 {
  204         return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
  205 }
  206 
  207 /*
  208  * Stamp a SQ WQE so that it is invalid if prefetched by marking the
  209  * first four bytes of every 64 byte chunk with
  210  *     0x7FFFFFF | (invalid_ownership_value << 31).
  211  *
  212  * When the max work request size is less than or equal to the WQE
  213  * basic block size, as an optimization, we can stamp all WQEs with
  214  * 0xffffffff, and skip the very first chunk of each WQE.
  215  */
  216 static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
  217 {
  218         __be32 *wqe;
  219         int i;
  220         int s;
  221         int ind;
  222         void *buf;
  223         __be32 stamp;
  224         struct mlx4_wqe_ctrl_seg *ctrl;
  225 
  226         if (qp->sq_max_wqes_per_wr > 1) {
  227                 s = roundup(size, 1U << qp->sq.wqe_shift);
  228                 for (i = 0; i < s; i += 64) {
  229                         ind = (i >> qp->sq.wqe_shift) + n;
  230                         stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) :
  231                                                        cpu_to_be32(0xffffffff);
  232                         buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
  233                         wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1));
  234                         *wqe = stamp;
  235                 }
  236         } else {
  237                 ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
  238                 s = (ctrl->fence_size & 0x3f) << 4;
  239                 for (i = 64; i < s; i += 64) {
  240                         wqe = buf + i;
  241                         *wqe = cpu_to_be32(0xffffffff);
  242                 }
  243         }
  244 }
  245 
  246 static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size)
  247 {
  248         struct mlx4_wqe_ctrl_seg *ctrl;
  249         struct mlx4_wqe_inline_seg *inl;
  250         void *wqe;
  251         int s;
  252 
  253         ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
  254         s = sizeof(struct mlx4_wqe_ctrl_seg);
  255 
  256         if (qp->ibqp.qp_type == IB_QPT_UD) {
  257                 struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl;
  258                 struct mlx4_av *av = (struct mlx4_av *)dgram->av;
  259                 memset(dgram, 0, sizeof *dgram);
  260                 av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn);
  261                 s += sizeof(struct mlx4_wqe_datagram_seg);
  262         }
  263 
  264         /* Pad the remainder of the WQE with an inline data segment. */
  265         if (size > s) {
  266                 inl = wqe + s;
  267                 inl->byte_count = cpu_to_be32(1U << 31 | (size - s - sizeof *inl));
  268         }
  269         ctrl->srcrb_flags = 0;
  270         ctrl->fence_size = size / 16;
  271         /*
  272          * Make sure descriptor is fully written before setting ownership bit
  273          * (because HW can start executing as soon as we do).
  274          */
  275         wmb();
  276 
  277         ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) |
  278                 (n & qp->sq.wqe_cnt ? cpu_to_be32(1U << 31) : 0);
  279 
  280         stamp_send_wqe(qp, n + qp->sq_spare_wqes, size);
  281 }
  282 
  283 /* Post NOP WQE to prevent wrap-around in the middle of WR */
  284 static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind)
  285 {
  286         unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1));
  287         if (unlikely(s < qp->sq_max_wqes_per_wr)) {
  288                 post_nop_wqe(qp, ind, s << qp->sq.wqe_shift);
  289                 ind += s;
  290         }
  291         return ind;
  292 }
  293 
  294 static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
  295 {
  296         struct ib_event event;
  297         struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
  298 
  299         if (type == MLX4_EVENT_TYPE_PATH_MIG)
  300                 to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
  301 
  302         if (ibqp->event_handler) {
  303                 event.device     = ibqp->device;
  304                 event.element.qp = ibqp;
  305                 switch (type) {
  306                 case MLX4_EVENT_TYPE_PATH_MIG:
  307                         event.event = IB_EVENT_PATH_MIG;
  308                         break;
  309                 case MLX4_EVENT_TYPE_COMM_EST:
  310                         event.event = IB_EVENT_COMM_EST;
  311                         break;
  312                 case MLX4_EVENT_TYPE_SQ_DRAINED:
  313                         event.event = IB_EVENT_SQ_DRAINED;
  314                         break;
  315                 case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
  316                         event.event = IB_EVENT_QP_LAST_WQE_REACHED;
  317                         break;
  318                 case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
  319                         event.event = IB_EVENT_QP_FATAL;
  320                         break;
  321                 case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
  322                         event.event = IB_EVENT_PATH_MIG_ERR;
  323                         break;
  324                 case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
  325                         event.event = IB_EVENT_QP_REQ_ERR;
  326                         break;
  327                 case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
  328                         event.event = IB_EVENT_QP_ACCESS_ERR;
  329                         break;
  330                 default:
  331                         pr_warn("Unexpected event type %d "
  332                                "on QP %06x\n", type, qp->qpn);
  333                         return;
  334                 }
  335 
  336                 ibqp->event_handler(&event, ibqp->qp_context);
  337         }
  338 }
  339 
  340 static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
  341 {
  342         /*
  343          * UD WQEs must have a datagram segment.
  344          * RC and UC WQEs might have a remote address segment.
  345          * MLX WQEs need two extra inline data segments (for the UD
  346          * header and space for the ICRC).
  347          */
  348         switch (type) {
  349         case MLX4_IB_QPT_UD:
  350                 return sizeof (struct mlx4_wqe_ctrl_seg) +
  351                         sizeof (struct mlx4_wqe_datagram_seg) +
  352                         ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
  353         case MLX4_IB_QPT_PROXY_SMI_OWNER:
  354         case MLX4_IB_QPT_PROXY_SMI:
  355         case MLX4_IB_QPT_PROXY_GSI:
  356                 return sizeof (struct mlx4_wqe_ctrl_seg) +
  357                         sizeof (struct mlx4_wqe_datagram_seg) + 64;
  358         case MLX4_IB_QPT_TUN_SMI_OWNER:
  359         case MLX4_IB_QPT_TUN_GSI:
  360                 return sizeof (struct mlx4_wqe_ctrl_seg) +
  361                         sizeof (struct mlx4_wqe_datagram_seg);
  362 
  363         case MLX4_IB_QPT_UC:
  364                 return sizeof (struct mlx4_wqe_ctrl_seg) +
  365                         sizeof (struct mlx4_wqe_raddr_seg);
  366         case MLX4_IB_QPT_RC:
  367                 return sizeof (struct mlx4_wqe_ctrl_seg) +
  368                         sizeof (struct mlx4_wqe_masked_atomic_seg) +
  369                         sizeof (struct mlx4_wqe_raddr_seg);
  370         case MLX4_IB_QPT_SMI:
  371         case MLX4_IB_QPT_GSI:
  372                 return sizeof (struct mlx4_wqe_ctrl_seg) +
  373                         ALIGN(MLX4_IB_UD_HEADER_SIZE +
  374                               DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
  375                                            MLX4_INLINE_ALIGN) *
  376                               sizeof (struct mlx4_wqe_inline_seg),
  377                               sizeof (struct mlx4_wqe_data_seg)) +
  378                         ALIGN(4 +
  379                               sizeof (struct mlx4_wqe_inline_seg),
  380                               sizeof (struct mlx4_wqe_data_seg));
  381         default:
  382                 return sizeof (struct mlx4_wqe_ctrl_seg);
  383         }
  384 }
  385 
  386 static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
  387                        int is_user, int has_rq, struct mlx4_ib_qp *qp)
  388 {
  389         /* Sanity check RQ size before proceeding */
  390         if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE ||
  391             cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg))
  392                 return -EINVAL;
  393 
  394         if (!has_rq) {
  395                 if (cap->max_recv_wr)
  396                         return -EINVAL;
  397 
  398                 qp->rq.wqe_cnt = qp->rq.max_gs = 0;
  399         } else {
  400                 /* HW requires >= 1 RQ entry with >= 1 gather entry */
  401                 if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))
  402                         return -EINVAL;
  403 
  404                 qp->rq.wqe_cnt   = roundup_pow_of_two(max(1U, cap->max_recv_wr));
  405                 qp->rq.max_gs    = roundup_pow_of_two(max(1U, cap->max_recv_sge));
  406                 qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));
  407         }
  408 
  409         /* leave userspace return values as they were, so as not to break ABI */
  410         if (is_user) {
  411                 cap->max_recv_wr  = qp->rq.max_post = qp->rq.wqe_cnt;
  412                 cap->max_recv_sge = qp->rq.max_gs;
  413         } else {
  414                 cap->max_recv_wr  = qp->rq.max_post =
  415                         min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt);
  416                 cap->max_recv_sge = min(qp->rq.max_gs,
  417                                         min(dev->dev->caps.max_sq_sg,
  418                                             dev->dev->caps.max_rq_sg));
  419         }
  420 
  421         return 0;
  422 }
  423 
  424 static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
  425                               enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp,
  426                               bool shrink_wqe)
  427 {
  428         int s;
  429 
  430         /* Sanity check SQ size before proceeding */
  431         if (cap->max_send_wr  > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) ||
  432             cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) ||
  433             cap->max_inline_data + send_wqe_overhead(type, qp->flags) +
  434             sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
  435                 return -EINVAL;
  436 
  437         /*
  438          * For MLX transport we need 2 extra S/G entries:
  439          * one for the header and one for the checksum at the end
  440          */
  441         if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI ||
  442              type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) &&
  443             cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
  444                 return -EINVAL;
  445 
  446         s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg),
  447                 cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) +
  448                 send_wqe_overhead(type, qp->flags);
  449 
  450         if (s > dev->dev->caps.max_sq_desc_sz)
  451                 return -EINVAL;
  452 
  453         /*
  454          * Hermon supports shrinking WQEs, such that a single work
  455          * request can include multiple units of 1 << wqe_shift.  This
  456          * way, work requests can differ in size, and do not have to
  457          * be a power of 2 in size, saving memory and speeding up send
  458          * WR posting.  Unfortunately, if we do this then the
  459          * wqe_index field in CQEs can't be used to look up the WR ID
  460          * anymore, so we do this only if selective signaling is off.
  461          *
  462          * Further, on 32-bit platforms, we can't use vmap() to make
  463          * the QP buffer virtually contiguous.  Thus we have to use
  464          * constant-sized WRs to make sure a WR is always fully within
  465          * a single page-sized chunk.
  466          *
  467          * Finally, we use NOP work requests to pad the end of the
  468          * work queue, to avoid wrap-around in the middle of WR.  We
  469          * set NEC bit to avoid getting completions with error for
  470          * these NOP WRs, but since NEC is only supported starting
  471          * with firmware 2.2.232, we use constant-sized WRs for older
  472          * firmware.
  473          *
  474          * And, since MLX QPs only support SEND, we use constant-sized
  475          * WRs in this case.
  476          *
  477          * We look for the smallest value of wqe_shift such that the
  478          * resulting number of wqes does not exceed device
  479          * capabilities.
  480          *
  481          * We set WQE size to at least 64 bytes, this way stamping
  482          * invalidates each WQE.
  483          */
  484         if (shrink_wqe && dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
  485             qp->sq_signal_bits && BITS_PER_LONG == 64 &&
  486             type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI &&
  487             !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI |
  488                       MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER)))
  489                 qp->sq.wqe_shift = ilog2(64);
  490         else
  491                 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
  492 
  493         for (;;) {
  494                 qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift);
  495 
  496                 /*
  497                  * We need to leave 2 KB + 1 WR of headroom in the SQ to
  498                  * allow HW to prefetch.
  499                  */
  500                 qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr;
  501                 qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr *
  502                                                     qp->sq_max_wqes_per_wr +
  503                                                     qp->sq_spare_wqes);
  504 
  505                 if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes)
  506                         break;
  507 
  508                 if (qp->sq_max_wqes_per_wr <= 1)
  509                         return -EINVAL;
  510 
  511                 ++qp->sq.wqe_shift;
  512         }
  513 
  514         qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz,
  515                              (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) -
  516                          send_wqe_overhead(type, qp->flags)) /
  517                 sizeof (struct mlx4_wqe_data_seg);
  518 
  519         qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
  520                 (qp->sq.wqe_cnt << qp->sq.wqe_shift);
  521         if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
  522                 qp->rq.offset = 0;
  523                 qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
  524         } else {
  525                 qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;
  526                 qp->sq.offset = 0;
  527         }
  528 
  529         cap->max_send_wr  = qp->sq.max_post =
  530                 (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr;
  531         cap->max_send_sge = min(qp->sq.max_gs,
  532                                 min(dev->dev->caps.max_sq_sg,
  533                                     dev->dev->caps.max_rq_sg));
  534         /* We don't support inline sends for kernel QPs (yet) */
  535         cap->max_inline_data = 0;
  536 
  537         return 0;
  538 }
  539 
  540 static int set_user_sq_size(struct mlx4_ib_dev *dev,
  541                             struct mlx4_ib_qp *qp,
  542                             struct mlx4_ib_create_qp *ucmd)
  543 {
  544         /* Sanity check SQ size before proceeding */
  545         if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes       ||
  546             ucmd->log_sq_stride >
  547                 ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||
  548             ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)
  549                 return -EINVAL;
  550 
  551         qp->sq.wqe_cnt   = 1 << ucmd->log_sq_bb_count;
  552         qp->sq.wqe_shift = ucmd->log_sq_stride;
  553 
  554         qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
  555                 (qp->sq.wqe_cnt << qp->sq.wqe_shift);
  556 
  557         return 0;
  558 }
  559 
  560 static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
  561 {
  562         int i;
  563 
  564         qp->sqp_proxy_rcv =
  565                 kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt,
  566                         GFP_KERNEL);
  567         if (!qp->sqp_proxy_rcv)
  568                 return -ENOMEM;
  569         for (i = 0; i < qp->rq.wqe_cnt; i++) {
  570                 qp->sqp_proxy_rcv[i].addr =
  571                         kmalloc(sizeof (struct mlx4_ib_proxy_sqp_hdr),
  572                                 GFP_KERNEL);
  573                 if (!qp->sqp_proxy_rcv[i].addr)
  574                         goto err;
  575                 qp->sqp_proxy_rcv[i].map =
  576                         ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
  577                                           sizeof (struct mlx4_ib_proxy_sqp_hdr),
  578                                           DMA_FROM_DEVICE);
  579                 if (ib_dma_mapping_error(dev, qp->sqp_proxy_rcv[i].map)) {
  580                         kfree(qp->sqp_proxy_rcv[i].addr);
  581                         goto err;
  582                 }
  583         }
  584         return 0;
  585 
  586 err:
  587         while (i > 0) {
  588                 --i;
  589                 ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
  590                                     sizeof (struct mlx4_ib_proxy_sqp_hdr),
  591                                     DMA_FROM_DEVICE);
  592                 kfree(qp->sqp_proxy_rcv[i].addr);
  593         }
  594         kfree(qp->sqp_proxy_rcv);
  595         qp->sqp_proxy_rcv = NULL;
  596         return -ENOMEM;
  597 }
  598 
  599 static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
  600 {
  601         int i;
  602 
  603         for (i = 0; i < qp->rq.wqe_cnt; i++) {
  604                 ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
  605                                     sizeof (struct mlx4_ib_proxy_sqp_hdr),
  606                                     DMA_FROM_DEVICE);
  607                 kfree(qp->sqp_proxy_rcv[i].addr);
  608         }
  609         kfree(qp->sqp_proxy_rcv);
  610 }
  611 
  612 static int qp_has_rq(struct ib_qp_init_attr *attr)
  613 {
  614         if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
  615                 return 0;
  616 
  617         return !attr->srq;
  618 }
  619 
  620 static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn)
  621 {
  622         int i;
  623         for (i = 0; i < dev->caps.num_ports; i++) {
  624                 if (qpn == dev->caps.qp0_proxy[i])
  625                         return !!dev->caps.qp0_qkey[i];
  626         }
  627         return 0;
  628 }
  629 
  630 static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev,
  631                                     struct mlx4_ib_qp *qp)
  632 {
  633         mutex_lock(&dev->counters_table[qp->port - 1].mutex);
  634         mlx4_counter_free(dev->dev, qp->counter_index->index);
  635         list_del(&qp->counter_index->list);
  636         mutex_unlock(&dev->counters_table[qp->port - 1].mutex);
  637 
  638         kfree(qp->counter_index);
  639         qp->counter_index = NULL;
  640 }
  641 
  642 static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
  643                             struct ib_qp_init_attr *init_attr,
  644                             struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
  645                             gfp_t gfp)
  646 {
  647         int qpn;
  648         int err;
  649         struct ib_qp_cap backup_cap;
  650         struct mlx4_ib_sqp *sqp;
  651         struct mlx4_ib_qp *qp;
  652         enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
  653         struct mlx4_ib_cq *mcq;
  654         unsigned long flags;
  655 
  656         /* When tunneling special qps, we use a plain UD qp */
  657         if (sqpn) {
  658                 if (mlx4_is_mfunc(dev->dev) &&
  659                     (!mlx4_is_master(dev->dev) ||
  660                      !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
  661                         if (init_attr->qp_type == IB_QPT_GSI)
  662                                 qp_type = MLX4_IB_QPT_PROXY_GSI;
  663                         else {
  664                                 if (mlx4_is_master(dev->dev) ||
  665                                     qp0_enabled_vf(dev->dev, sqpn))
  666                                         qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
  667                                 else
  668                                         qp_type = MLX4_IB_QPT_PROXY_SMI;
  669                         }
  670                 }
  671                 qpn = sqpn;
  672                 /* add extra sg entry for tunneling */
  673                 init_attr->cap.max_recv_sge++;
  674         } else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) {
  675                 struct mlx4_ib_qp_tunnel_init_attr *tnl_init =
  676                         container_of(init_attr,
  677                                      struct mlx4_ib_qp_tunnel_init_attr, init_attr);
  678                 if ((tnl_init->proxy_qp_type != IB_QPT_SMI &&
  679                      tnl_init->proxy_qp_type != IB_QPT_GSI)   ||
  680                     !mlx4_is_master(dev->dev))
  681                         return -EINVAL;
  682                 if (tnl_init->proxy_qp_type == IB_QPT_GSI)
  683                         qp_type = MLX4_IB_QPT_TUN_GSI;
  684                 else if (tnl_init->slave == mlx4_master_func_num(dev->dev) ||
  685                          mlx4_vf_smi_enabled(dev->dev, tnl_init->slave,
  686                                              tnl_init->port))
  687                         qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
  688                 else
  689                         qp_type = MLX4_IB_QPT_TUN_SMI;
  690                 /* we are definitely in the PPF here, since we are creating
  691                  * tunnel QPs. base_tunnel_sqpn is therefore valid. */
  692                 qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave
  693                         + tnl_init->proxy_qp_type * 2 + tnl_init->port - 1;
  694                 sqpn = qpn;
  695         }
  696 
  697         if (!*caller_qp) {
  698                 if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
  699                     (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
  700                                 MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
  701                         sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp);
  702                         if (!sqp)
  703                                 return -ENOMEM;
  704                         qp = &sqp->qp;
  705                         qp->pri.vid = 0xFFFF;
  706                         qp->alt.vid = 0xFFFF;
  707                 } else {
  708                         qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp);
  709                         if (!qp)
  710                                 return -ENOMEM;
  711                         qp->pri.vid = 0xFFFF;
  712                         qp->alt.vid = 0xFFFF;
  713                 }
  714         } else
  715                 qp = *caller_qp;
  716 
  717         qp->mlx4_ib_qp_type = qp_type;
  718 
  719         mutex_init(&qp->mutex);
  720         spin_lock_init(&qp->sq.lock);
  721         spin_lock_init(&qp->rq.lock);
  722         INIT_LIST_HEAD(&qp->gid_list);
  723         INIT_LIST_HEAD(&qp->steering_rules);
  724 
  725         qp->state        = IB_QPS_RESET;
  726         if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
  727                 qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
  728 
  729         err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp);
  730         if (err)
  731                 goto err;
  732 
  733         if (pd->uobject) {
  734                 struct mlx4_ib_create_qp ucmd;
  735 
  736                 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
  737                         err = -EFAULT;
  738                         goto err;
  739                 }
  740 
  741                 qp->sq_no_prefetch = ucmd.sq_no_prefetch;
  742 
  743                 err = set_user_sq_size(dev, qp, &ucmd);
  744                 if (err)
  745                         goto err;
  746 
  747                 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
  748                                        qp->buf_size, 0, 0);
  749                 if (IS_ERR(qp->umem)) {
  750                         err = PTR_ERR(qp->umem);
  751                         goto err;
  752                 }
  753 
  754                 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
  755                                     ilog2(qp->umem->page_size), &qp->mtt);
  756                 if (err)
  757                         goto err_buf;
  758 
  759                 err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
  760                 if (err)
  761                         goto err_mtt;
  762 
  763                 if (qp_has_rq(init_attr)) {
  764                         err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
  765                                                   ucmd.db_addr, &qp->db);
  766                         if (err)
  767                                 goto err_mtt;
  768                 }
  769         } else {
  770                 qp->sq_no_prefetch = 0;
  771 
  772                 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
  773                         qp->flags |= MLX4_IB_QP_LSO;
  774 
  775                 if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
  776                         if (dev->steering_support ==
  777                             MLX4_STEERING_MODE_DEVICE_MANAGED)
  778                                 qp->flags |= MLX4_IB_QP_NETIF;
  779                         else
  780                                 goto err;
  781                 }
  782 
  783                 memcpy(&backup_cap, &init_attr->cap, sizeof(backup_cap));
  784                 err = set_kernel_sq_size(dev, &init_attr->cap,
  785                                          qp_type, qp, true);
  786                 if (err)
  787                         goto err;
  788 
  789                 if (qp_has_rq(init_attr)) {
  790                         err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp);
  791                         if (err)
  792                                 goto err;
  793 
  794                         *qp->db.db = 0;
  795                 }
  796 
  797                 if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size,
  798                                    &qp->buf, gfp)) {
  799                         memcpy(&init_attr->cap, &backup_cap,
  800                                sizeof(backup_cap));
  801                         err = set_kernel_sq_size(dev, &init_attr->cap, qp_type,
  802                                                  qp, false);
  803                         if (err)
  804                                 goto err_db;
  805 
  806                         if (mlx4_buf_alloc(dev->dev, qp->buf_size,
  807                                            PAGE_SIZE * 2, &qp->buf, gfp)) {
  808                                 err = -ENOMEM;
  809                                 goto err_db;
  810                         }
  811                 }
  812 
  813                 err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
  814                                     &qp->mtt);
  815                 if (err)
  816                         goto err_buf;
  817 
  818                 err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp);
  819                 if (err)
  820                         goto err_mtt;
  821 
  822                 qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64),
  823                                         gfp | __GFP_NOWARN);
  824                 if (!qp->sq.wrid)
  825                         qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
  826                                                 gfp, 0 /*PAGE_KERNEL*/);
  827                 qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64),
  828                                         gfp | __GFP_NOWARN);
  829                 if (!qp->rq.wrid)
  830                         qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
  831                                                 gfp, 0 /*PAGE_KERNEL*/);
  832                 if (!qp->sq.wrid || !qp->rq.wrid) {
  833                         err = -ENOMEM;
  834                         goto err_wrid;
  835                 }
  836         }
  837 
  838         if (sqpn) {
  839                 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
  840                     MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
  841                         if (alloc_proxy_bufs(pd->device, qp)) {
  842                                 err = -ENOMEM;
  843                                 goto err_wrid;
  844                         }
  845                 }
  846         } else {
  847                 /* Raw packet QPNs may not have bits 6,7 set in their qp_num;
  848                  * otherwise, the WQE BlueFlame setup flow wrongly causes
  849                  * VLAN insertion. */
  850                 if (init_attr->qp_type == IB_QPT_RAW_PACKET)
  851                         err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn,
  852                                                     (init_attr->cap.max_send_wr ?
  853                                                      MLX4_RESERVE_ETH_BF_QP : 0) |
  854                                                     (init_attr->cap.max_recv_wr ?
  855                                                      MLX4_RESERVE_A0_QP : 0));
  856                 else
  857                         if (qp->flags & MLX4_IB_QP_NETIF)
  858                                 err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn);
  859                         else
  860                                 err = mlx4_qp_reserve_range(dev->dev, 1, 1,
  861                                                             &qpn, 0);
  862                 if (err)
  863                         goto err_proxy;
  864         }
  865 
  866         if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
  867                 qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
  868 
  869         err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
  870         if (err)
  871                 goto err_qpn;
  872 
  873         if (init_attr->qp_type == IB_QPT_XRC_TGT)
  874                 qp->mqp.qpn |= (1 << 23);
  875 
  876         /*
  877          * Hardware wants QPN written in big-endian order (after
  878          * shifting) for send doorbell.  Precompute this value to save
  879          * a little bit when posting sends.
  880          */
  881         qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
  882 
  883         qp->mqp.event = mlx4_ib_qp_event;
  884         if (!*caller_qp)
  885                 *caller_qp = qp;
  886 
  887         spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
  888         mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq),
  889                          to_mcq(init_attr->recv_cq));
  890         /* Maintain device to QPs access, needed for further handling
  891          * via reset flow
  892          */
  893         list_add_tail(&qp->qps_list, &dev->qp_list);
  894         /* Maintain CQ to QPs access, needed for further handling
  895          * via reset flow
  896          */
  897         mcq = to_mcq(init_attr->send_cq);
  898         list_add_tail(&qp->cq_send_list, &mcq->send_qp_list);
  899         mcq = to_mcq(init_attr->recv_cq);
  900         list_add_tail(&qp->cq_recv_list, &mcq->recv_qp_list);
  901         mlx4_ib_unlock_cqs(to_mcq(init_attr->send_cq),
  902                            to_mcq(init_attr->recv_cq));
  903         spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
  904         return 0;
  905 
  906 err_qpn:
  907         if (!sqpn) {
  908                 if (qp->flags & MLX4_IB_QP_NETIF)
  909                         mlx4_ib_steer_qp_free(dev, qpn, 1);
  910                 else
  911                         mlx4_qp_release_range(dev->dev, qpn, 1);
  912         }
  913 err_proxy:
  914         if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
  915                 free_proxy_bufs(pd->device, qp);
  916 err_wrid:
  917         if (pd->uobject) {
  918                 if (qp_has_rq(init_attr))
  919                         mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
  920         } else {
  921                 kvfree(qp->sq.wrid);
  922                 kvfree(qp->rq.wrid);
  923         }
  924 
  925 err_mtt:
  926         mlx4_mtt_cleanup(dev->dev, &qp->mtt);
  927 
  928 err_buf:
  929         if (pd->uobject)
  930                 ib_umem_release(qp->umem);
  931         else
  932                 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
  933 
  934 err_db:
  935         if (!pd->uobject && qp_has_rq(init_attr))
  936                 mlx4_db_free(dev->dev, &qp->db);
  937 
  938 err:
  939         if (!*caller_qp)
  940                 kfree(qp);
  941         return err;
  942 }
  943 
  944 static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)
  945 {
  946         switch (state) {
  947         case IB_QPS_RESET:      return MLX4_QP_STATE_RST;
  948         case IB_QPS_INIT:       return MLX4_QP_STATE_INIT;
  949         case IB_QPS_RTR:        return MLX4_QP_STATE_RTR;
  950         case IB_QPS_RTS:        return MLX4_QP_STATE_RTS;
  951         case IB_QPS_SQD:        return MLX4_QP_STATE_SQD;
  952         case IB_QPS_SQE:        return MLX4_QP_STATE_SQER;
  953         case IB_QPS_ERR:        return MLX4_QP_STATE_ERR;
  954         default:                return -1;
  955         }
  956 }
  957 
  958 static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
  959         __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
  960 {
  961         if (send_cq == recv_cq) {
  962                 spin_lock(&send_cq->lock);
  963                 __acquire(&recv_cq->lock);
  964         } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
  965                 spin_lock(&send_cq->lock);
  966                 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
  967         } else {
  968                 spin_lock(&recv_cq->lock);
  969                 spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
  970         }
  971 }
  972 
  973 static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
  974         __releases(&send_cq->lock) __releases(&recv_cq->lock)
  975 {
  976         if (send_cq == recv_cq) {
  977                 __release(&recv_cq->lock);
  978                 spin_unlock(&send_cq->lock);
  979         } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
  980                 spin_unlock(&recv_cq->lock);
  981                 spin_unlock(&send_cq->lock);
  982         } else {
  983                 spin_unlock(&send_cq->lock);
  984                 spin_unlock(&recv_cq->lock);
  985         }
  986 }
  987 
  988 static void del_gid_entries(struct mlx4_ib_qp *qp)
  989 {
  990         struct mlx4_ib_gid_entry *ge, *tmp;
  991 
  992         list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
  993                 list_del(&ge->list);
  994                 kfree(ge);
  995         }
  996 }
  997 
  998 static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp)
  999 {
 1000         if (qp->ibqp.qp_type == IB_QPT_XRC_TGT)
 1001                 return to_mpd(to_mxrcd(qp->ibqp.xrcd)->pd);
 1002         else
 1003                 return to_mpd(qp->ibqp.pd);
 1004 }
 1005 
 1006 static void get_cqs(struct mlx4_ib_qp *qp,
 1007                     struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq)
 1008 {
 1009         switch (qp->ibqp.qp_type) {
 1010         case IB_QPT_XRC_TGT:
 1011                 *send_cq = to_mcq(to_mxrcd(qp->ibqp.xrcd)->cq);
 1012                 *recv_cq = *send_cq;
 1013                 break;
 1014         case IB_QPT_XRC_INI:
 1015                 *send_cq = to_mcq(qp->ibqp.send_cq);
 1016                 *recv_cq = *send_cq;
 1017                 break;
 1018         default:
 1019                 *send_cq = to_mcq(qp->ibqp.send_cq);
 1020                 *recv_cq = to_mcq(qp->ibqp.recv_cq);
 1021                 break;
 1022         }
 1023 }
 1024 
 1025 static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
 1026                               struct ib_udata *udata)
 1027 {
 1028         struct mlx4_ib_cq *send_cq, *recv_cq;
 1029         unsigned long flags;
 1030 
 1031         if (qp->state != IB_QPS_RESET) {
 1032                 if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
 1033                                    MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
 1034                         pr_warn("modify QP %06x to RESET failed.\n",
 1035                                qp->mqp.qpn);
 1036                 if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port)) {
 1037                         mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
 1038                         qp->pri.smac = 0;
 1039                         qp->pri.smac_port = 0;
 1040                 }
 1041                 if (qp->alt.smac) {
 1042                         mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
 1043                         qp->alt.smac = 0;
 1044                 }
 1045                 if (qp->pri.vid < 0x1000) {
 1046                         mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid);
 1047                         qp->pri.vid = 0xFFFF;
 1048                         qp->pri.candidate_vid = 0xFFFF;
 1049                         qp->pri.update_vid = 0;
 1050                 }
 1051                 if (qp->alt.vid < 0x1000) {
 1052                         mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid);
 1053                         qp->alt.vid = 0xFFFF;
 1054                         qp->alt.candidate_vid = 0xFFFF;
 1055                         qp->alt.update_vid = 0;
 1056                 }
 1057         }
 1058 
 1059         get_cqs(qp, &send_cq, &recv_cq);
 1060 
 1061         spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
 1062         mlx4_ib_lock_cqs(send_cq, recv_cq);
 1063 
 1064         /* del from lists under both locks above to protect reset flow paths */
 1065         list_del(&qp->qps_list);
 1066         list_del(&qp->cq_send_list);
 1067         list_del(&qp->cq_recv_list);
 1068         if (!udata) {
 1069                 __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
 1070                                  qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);
 1071                 if (send_cq != recv_cq)
 1072                         __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
 1073         }
 1074 
 1075         mlx4_qp_remove(dev->dev, &qp->mqp);
 1076 
 1077         mlx4_ib_unlock_cqs(send_cq, recv_cq);
 1078         spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
 1079 
 1080         mlx4_qp_free(dev->dev, &qp->mqp);
 1081 
 1082         if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) {
 1083                 if (qp->flags & MLX4_IB_QP_NETIF)
 1084                         mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1);
 1085                 else
 1086                         mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
 1087         }
 1088 
 1089         mlx4_mtt_cleanup(dev->dev, &qp->mtt);
 1090 
 1091         if (udata) {
 1092                 if (qp->rq.wqe_cnt) {
 1093                         struct mlx4_ib_ucontext *mcontext =
 1094                                 rdma_udata_to_drv_context(
 1095                                         udata,
 1096                                         struct mlx4_ib_ucontext,
 1097                                         ibucontext);
 1098 
 1099                         mlx4_ib_db_unmap_user(mcontext, &qp->db);
 1100                 }
 1101         } else {
 1102                 kvfree(qp->sq.wrid);
 1103                 kvfree(qp->rq.wrid);
 1104                 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
 1105                     MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
 1106                         free_proxy_bufs(&dev->ib_dev, qp);
 1107                 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
 1108                 if (qp->rq.wqe_cnt)
 1109                         mlx4_db_free(dev->dev, &qp->db);
 1110         }
 1111         ib_umem_release(qp->umem);
 1112 
 1113         del_gid_entries(qp);
 1114 }
 1115 
 1116 static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
 1117 {
 1118         /* Native or PPF */
 1119         if (!mlx4_is_mfunc(dev->dev) ||
 1120             (mlx4_is_master(dev->dev) &&
 1121              attr->create_flags & MLX4_IB_SRIOV_SQP)) {
 1122                 return  dev->dev->phys_caps.base_sqpn +
 1123                         (attr->qp_type == IB_QPT_SMI ? 0 : 2) +
 1124                         attr->port_num - 1;
 1125         }
 1126         /* PF or VF -- creating proxies */
 1127         if (attr->qp_type == IB_QPT_SMI)
 1128                 return dev->dev->caps.qp0_proxy[attr->port_num - 1];
 1129         else
 1130                 return dev->dev->caps.qp1_proxy[attr->port_num - 1];
 1131 }
 1132 
 1133 static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
 1134                                         struct ib_qp_init_attr *init_attr,
 1135                                         struct ib_udata *udata)
 1136 {
 1137         struct mlx4_ib_qp *qp = NULL;
 1138         int err;
 1139         int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
 1140         u16 xrcdn = 0;
 1141         gfp_t gfp;
 1142 
 1143         gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ?
 1144                 GFP_NOIO : GFP_KERNEL;
 1145         /*
 1146          * We only support LSO, vendor flag1, and multicast loopback blocking,
 1147          * and only for kernel UD QPs.
 1148          */
 1149         if (init_attr->create_flags & ~(MLX4_IB_QP_LSO |
 1150                                         MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
 1151                                         MLX4_IB_SRIOV_TUNNEL_QP |
 1152                                         MLX4_IB_SRIOV_SQP |
 1153                                         MLX4_IB_QP_NETIF |
 1154                                         MLX4_IB_QP_CREATE_ROCE_V2_GSI |
 1155                                         MLX4_IB_QP_CREATE_USE_GFP_NOIO))
 1156                 return ERR_PTR(-EINVAL);
 1157 
 1158         if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
 1159                 if (init_attr->qp_type != IB_QPT_UD)
 1160                         return ERR_PTR(-EINVAL);
 1161         }
 1162 
 1163         if (init_attr->create_flags) {
 1164                 if (udata && init_attr->create_flags & ~(sup_u_create_flags))
 1165                         return ERR_PTR(-EINVAL);
 1166 
 1167                 if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
 1168                                                  MLX4_IB_QP_CREATE_USE_GFP_NOIO |
 1169                                                  MLX4_IB_QP_CREATE_ROCE_V2_GSI  |
 1170                                                  MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) &&
 1171                      init_attr->qp_type != IB_QPT_UD) ||
 1172                     (init_attr->create_flags & MLX4_IB_SRIOV_SQP &&
 1173                      init_attr->qp_type > IB_QPT_GSI) ||
 1174                     (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI &&
 1175                      init_attr->qp_type != IB_QPT_GSI))
 1176                         return ERR_PTR(-EINVAL);
 1177         }
 1178 
 1179         switch (init_attr->qp_type) {
 1180         case IB_QPT_XRC_TGT:
 1181                 pd = to_mxrcd(init_attr->xrcd)->pd;
 1182                 xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
 1183                 init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq;
 1184                 /* fall through */
 1185         case IB_QPT_XRC_INI:
 1186                 if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
 1187                         return ERR_PTR(-ENOSYS);
 1188                 init_attr->recv_cq = init_attr->send_cq;
 1189                 /* fall through */
 1190         case IB_QPT_RC:
 1191         case IB_QPT_UC:
 1192         case IB_QPT_RAW_PACKET:
 1193                 qp = kzalloc(sizeof *qp, gfp);
 1194                 if (!qp)
 1195                         return ERR_PTR(-ENOMEM);
 1196                 qp->pri.vid = 0xFFFF;
 1197                 qp->alt.vid = 0xFFFF;
 1198                 /* fall through */
 1199         case IB_QPT_UD:
 1200         {
 1201                 err = create_qp_common(to_mdev(pd->device), pd, init_attr,
 1202                                        udata, 0, &qp, gfp);
 1203                 if (err) {
 1204                         kfree(qp);
 1205                         return ERR_PTR(err);
 1206                 }
 1207 
 1208                 qp->ibqp.qp_num = qp->mqp.qpn;
 1209                 qp->xrcdn = xrcdn;
 1210 
 1211                 break;
 1212         }
 1213         case IB_QPT_SMI:
 1214         case IB_QPT_GSI:
 1215         {
 1216                 int sqpn;
 1217 
 1218                 /* Userspace is not allowed to create special QPs: */
 1219                 if (udata)
 1220                         return ERR_PTR(-EINVAL);
 1221                 if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) {
 1222                         int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0);
 1223 
 1224                         if (res)
 1225                                 return ERR_PTR(res);
 1226                 } else {
 1227                         sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
 1228                 }
 1229 
 1230                 err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
 1231                                        sqpn,
 1232                                        &qp, gfp);
 1233                 if (err)
 1234                         return ERR_PTR(err);
 1235 
 1236                 qp->port        = init_attr->port_num;
 1237                 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 :
 1238                         init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1;
 1239                 break;
 1240         }
 1241         default:
 1242                 /* Don't support raw QPs */
 1243                 return ERR_PTR(-EINVAL);
 1244         }
 1245 
 1246         return &qp->ibqp;
 1247 }
 1248 
 1249 struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 1250                                 struct ib_qp_init_attr *init_attr,
 1251                                 struct ib_udata *udata) {
 1252         struct ib_device *device = pd ? pd->device : init_attr->xrcd->device;
 1253         struct ib_qp *ibqp;
 1254         struct mlx4_ib_dev *dev = to_mdev(device);
 1255 
 1256         ibqp = _mlx4_ib_create_qp(pd, init_attr, udata);
 1257 
 1258         if (!IS_ERR(ibqp) &&
 1259             (init_attr->qp_type == IB_QPT_GSI) &&
 1260             !(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) {
 1261                 struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp)));
 1262                 int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num);
 1263 
 1264                 if (is_eth &&
 1265                     dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
 1266                         init_attr->create_flags |= MLX4_IB_QP_CREATE_ROCE_V2_GSI;
 1267                         sqp->roce_v2_gsi = ib_create_qp(pd, init_attr);
 1268 
 1269                         if (IS_ERR(sqp->roce_v2_gsi)) {
 1270                                 pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi));
 1271                                 sqp->roce_v2_gsi = NULL;
 1272                         } else {
 1273                                 sqp = to_msqp(to_mqp(sqp->roce_v2_gsi));
 1274                                 sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP;
 1275                         }
 1276 
 1277                         init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI;
 1278                 }
 1279         }
 1280         return ibqp;
 1281 }
 1282 
 1283 static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
 1284 {
 1285         struct mlx4_ib_dev *dev = to_mdev(qp->device);
 1286         struct mlx4_ib_qp *mqp = to_mqp(qp);
 1287 
 1288         if (is_qp0(dev, mqp))
 1289                 mlx4_CLOSE_PORT(dev->dev, mqp->port);
 1290 
 1291         if (dev->qp1_proxy[mqp->port - 1] == mqp) {
 1292                 mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]);
 1293                 dev->qp1_proxy[mqp->port - 1] = NULL;
 1294                 mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]);
 1295         }
 1296 
 1297         if (mqp->counter_index)
 1298                 mlx4_ib_free_qp_counter(dev, mqp);
 1299 
 1300         destroy_qp_common(dev, mqp, udata);
 1301 
 1302         if (is_sqp(dev, mqp))
 1303                 kfree(to_msqp(mqp));
 1304         else
 1305                 kfree(mqp);
 1306 
 1307         return 0;
 1308 }
 1309 
 1310 int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
 1311 {
 1312         struct mlx4_ib_qp *mqp = to_mqp(qp);
 1313 
 1314         if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
 1315                 struct mlx4_ib_sqp *sqp = to_msqp(mqp);
 1316 
 1317                 if (sqp->roce_v2_gsi)
 1318                         ib_destroy_qp(sqp->roce_v2_gsi);
 1319         }
 1320 
 1321         return _mlx4_ib_destroy_qp(qp, udata);
 1322 }
 1323 
 1324 static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
 1325 {
 1326         switch (type) {
 1327         case MLX4_IB_QPT_RC:            return MLX4_QP_ST_RC;
 1328         case MLX4_IB_QPT_UC:            return MLX4_QP_ST_UC;
 1329         case MLX4_IB_QPT_UD:            return MLX4_QP_ST_UD;
 1330         case MLX4_IB_QPT_XRC_INI:
 1331         case MLX4_IB_QPT_XRC_TGT:       return MLX4_QP_ST_XRC;
 1332         case MLX4_IB_QPT_SMI:
 1333         case MLX4_IB_QPT_GSI:
 1334         case MLX4_IB_QPT_RAW_PACKET:    return MLX4_QP_ST_MLX;
 1335 
 1336         case MLX4_IB_QPT_PROXY_SMI_OWNER:
 1337         case MLX4_IB_QPT_TUN_SMI_OWNER: return (mlx4_is_mfunc(dev->dev) ?
 1338                                                 MLX4_QP_ST_MLX : -1);
 1339         case MLX4_IB_QPT_PROXY_SMI:
 1340         case MLX4_IB_QPT_TUN_SMI:
 1341         case MLX4_IB_QPT_PROXY_GSI:
 1342         case MLX4_IB_QPT_TUN_GSI:       return (mlx4_is_mfunc(dev->dev) ?
 1343                                                 MLX4_QP_ST_UD : -1);
 1344         default:                        return -1;
 1345         }
 1346 }
 1347 
 1348 static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr,
 1349                                    int attr_mask)
 1350 {
 1351         u8 dest_rd_atomic;
 1352         u32 access_flags;
 1353         u32 hw_access_flags = 0;
 1354 
 1355         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
 1356                 dest_rd_atomic = attr->max_dest_rd_atomic;
 1357         else
 1358                 dest_rd_atomic = qp->resp_depth;
 1359 
 1360         if (attr_mask & IB_QP_ACCESS_FLAGS)
 1361                 access_flags = attr->qp_access_flags;
 1362         else
 1363                 access_flags = qp->atomic_rd_en;
 1364 
 1365         if (!dest_rd_atomic)
 1366                 access_flags &= IB_ACCESS_REMOTE_WRITE;
 1367 
 1368         if (access_flags & IB_ACCESS_REMOTE_READ)
 1369                 hw_access_flags |= MLX4_QP_BIT_RRE;
 1370         if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
 1371                 hw_access_flags |= MLX4_QP_BIT_RAE;
 1372         if (access_flags & IB_ACCESS_REMOTE_WRITE)
 1373                 hw_access_flags |= MLX4_QP_BIT_RWE;
 1374 
 1375         return cpu_to_be32(hw_access_flags);
 1376 }
 1377 
 1378 static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr,
 1379                             int attr_mask)
 1380 {
 1381         if (attr_mask & IB_QP_PKEY_INDEX)
 1382                 sqp->pkey_index = attr->pkey_index;
 1383         if (attr_mask & IB_QP_QKEY)
 1384                 sqp->qkey = attr->qkey;
 1385         if (attr_mask & IB_QP_SQ_PSN)
 1386                 sqp->send_psn = attr->sq_psn;
 1387 }
 1388 
 1389 static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
 1390 {
 1391         path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
 1392 }
 1393 
 1394 static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
 1395                           u64 smac, u16 vlan_tag, struct mlx4_qp_path *path,
 1396                           struct mlx4_roce_smac_vlan_info *smac_info, u8 port)
 1397 {
 1398         int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
 1399                 IB_LINK_LAYER_ETHERNET;
 1400         int vidx;
 1401         int smac_index;
 1402         int err;
 1403 
 1404 
 1405         path->grh_mylmc     = ah->src_path_bits & 0x7f;
 1406         path->rlid          = cpu_to_be16(ah->dlid);
 1407         if (ah->static_rate) {
 1408                 path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;
 1409                 while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
 1410                        !(1 << path->static_rate & dev->dev->caps.stat_rate_support))
 1411                         --path->static_rate;
 1412         } else
 1413                 path->static_rate = 0;
 1414 
 1415         if (ah->ah_flags & IB_AH_GRH) {
 1416                 int real_sgid_index = mlx4_ib_gid_index_to_real_index(dev,
 1417                                                                       port,
 1418                                                                       ah->grh.sgid_index);
 1419 
 1420                 if (real_sgid_index >= dev->dev->caps.gid_table_len[port]) {
 1421                         pr_err("sgid_index (%u) too large. max is %d\n",
 1422                                real_sgid_index, dev->dev->caps.gid_table_len[port] - 1);
 1423                         return -1;
 1424                 }
 1425 
 1426                 path->grh_mylmc |= 1 << 7;
 1427                 path->mgid_index = real_sgid_index;
 1428                 path->hop_limit  = ah->grh.hop_limit;
 1429                 path->tclass_flowlabel =
 1430                         cpu_to_be32((ah->grh.traffic_class << 20) |
 1431                                     (ah->grh.flow_label));
 1432                 memcpy(path->rgid, ah->grh.dgid.raw, 16);
 1433         }
 1434 
 1435         if (is_eth) {
 1436                 if (!(ah->ah_flags & IB_AH_GRH))
 1437                         return -1;
 1438 
 1439                 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
 1440                         ((port - 1) << 6) | ((ah->sl & 7) << 3);
 1441 
 1442                 path->feup |= MLX4_FEUP_FORCE_ETH_UP;
 1443                 if (vlan_tag < 0x1000) {
 1444                         if (smac_info->vid < 0x1000) {
 1445                                 /* both valid vlan ids */
 1446                                 if (smac_info->vid != vlan_tag) {
 1447                                         /* different VIDs.  unreg old and reg new */
 1448                                         err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx);
 1449                                         if (err)
 1450                                                 return err;
 1451                                         smac_info->candidate_vid = vlan_tag;
 1452                                         smac_info->candidate_vlan_index = vidx;
 1453                                         smac_info->candidate_vlan_port = port;
 1454                                         smac_info->update_vid = 1;
 1455                                         path->vlan_index = vidx;
 1456                                 } else {
 1457                                         path->vlan_index = smac_info->vlan_index;
 1458                                 }
 1459                         } else {
 1460                                 /* no current vlan tag in qp */
 1461                                 err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx);
 1462                                 if (err)
 1463                                         return err;
 1464                                 smac_info->candidate_vid = vlan_tag;
 1465                                 smac_info->candidate_vlan_index = vidx;
 1466                                 smac_info->candidate_vlan_port = port;
 1467                                 smac_info->update_vid = 1;
 1468                                 path->vlan_index = vidx;
 1469                         }
 1470                         path->feup |= MLX4_FVL_FORCE_ETH_VLAN;
 1471                         path->fl = 1 << 6;
 1472                 } else {
 1473                         /* have current vlan tag. unregister it at modify-qp success */
 1474                         if (smac_info->vid < 0x1000) {
 1475                                 smac_info->candidate_vid = 0xFFFF;
 1476                                 smac_info->update_vid = 1;
 1477                         }
 1478                 }
 1479 
 1480                 /* get smac_index for RoCE use.
 1481                  * If no smac was yet assigned, register one.
 1482                  * If one was already assigned, but the new mac differs,
 1483                  * unregister the old one and register the new one.
 1484                 */
 1485                 if ((!smac_info->smac && !smac_info->smac_port) ||
 1486                     smac_info->smac != smac) {
 1487                         /* register candidate now, unreg if needed, after success */
 1488                         smac_index = mlx4_register_mac(dev->dev, port, smac);
 1489                         if (smac_index >= 0) {
 1490                                 smac_info->candidate_smac_index = smac_index;
 1491                                 smac_info->candidate_smac = smac;
 1492                                 smac_info->candidate_smac_port = port;
 1493                         } else {
 1494                                 return -EINVAL;
 1495                         }
 1496                 } else {
 1497                         smac_index = smac_info->smac_index;
 1498                 }
 1499 
 1500                 memcpy(path->dmac, ah->dmac, 6);
 1501                 path->ackto = MLX4_IB_LINK_TYPE_ETH;
 1502                 /* put MAC table smac index for IBoE */
 1503                 path->grh_mylmc = (u8) (smac_index) | 0x80;
 1504         } else {
 1505                 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
 1506                         ((port - 1) << 6) | ((ah->sl & 0xf) << 2);
 1507         }
 1508 
 1509         return 0;
 1510 }
 1511 
 1512 static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp,
 1513                          enum ib_qp_attr_mask qp_attr_mask,
 1514                          struct mlx4_ib_qp *mqp,
 1515                          struct mlx4_qp_path *path, u8 port,
 1516                          u16 vlan_id, u8 *smac)
 1517 {
 1518         return _mlx4_set_path(dev, &qp->ah_attr,
 1519                               mlx4_mac_to_u64(smac),
 1520                               vlan_id,
 1521                               path, &mqp->pri, port);
 1522 }
 1523 
 1524 static int mlx4_set_alt_path(struct mlx4_ib_dev *dev,
 1525                              const struct ib_qp_attr *qp,
 1526                              enum ib_qp_attr_mask qp_attr_mask,
 1527                              struct mlx4_ib_qp *mqp,
 1528                              struct mlx4_qp_path *path, u8 port)
 1529 {
 1530         return _mlx4_set_path(dev, &qp->alt_ah_attr,
 1531                               0,
 1532                               0xffff,
 1533                               path, &mqp->alt, port);
 1534 }
 1535 
 1536 static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
 1537 {
 1538         struct mlx4_ib_gid_entry *ge, *tmp;
 1539 
 1540         list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
 1541                 if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) {
 1542                         ge->added = 1;
 1543                         ge->port = qp->port;
 1544                 }
 1545         }
 1546 }
 1547 
 1548 static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev,
 1549                                     struct mlx4_ib_qp *qp,
 1550                                     struct mlx4_qp_context *context)
 1551 {
 1552         u64 u64_mac;
 1553         int smac_index;
 1554 
 1555         u64_mac = atomic64_read(&dev->iboe.mac[qp->port - 1]);
 1556 
 1557         context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6);
 1558         if (!qp->pri.smac && !qp->pri.smac_port) {
 1559                 smac_index = mlx4_register_mac(dev->dev, qp->port, u64_mac);
 1560                 if (smac_index >= 0) {
 1561                         qp->pri.candidate_smac_index = smac_index;
 1562                         qp->pri.candidate_smac = u64_mac;
 1563                         qp->pri.candidate_smac_port = qp->port;
 1564                         context->pri_path.grh_mylmc = 0x80 | (u8) smac_index;
 1565                 } else {
 1566                         return -ENOENT;
 1567                 }
 1568         }
 1569         return 0;
 1570 }
 1571 
 1572 static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
 1573 {
 1574         struct counter_index *new_counter_index;
 1575         int err;
 1576         u32 tmp_idx;
 1577 
 1578         if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) !=
 1579             IB_LINK_LAYER_ETHERNET ||
 1580             !(qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) ||
 1581             !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK))
 1582                 return 0;
 1583 
 1584         err = mlx4_counter_alloc(dev->dev, &tmp_idx);
 1585         if (err)
 1586                 return err;
 1587 
 1588         new_counter_index = kmalloc(sizeof(*new_counter_index), GFP_KERNEL);
 1589         if (!new_counter_index) {
 1590                 mlx4_counter_free(dev->dev, tmp_idx);
 1591                 return -ENOMEM;
 1592         }
 1593 
 1594         new_counter_index->index = tmp_idx;
 1595         new_counter_index->allocated = 1;
 1596         qp->counter_index = new_counter_index;
 1597 
 1598         mutex_lock(&dev->counters_table[qp->port - 1].mutex);
 1599         list_add_tail(&new_counter_index->list,
 1600                       &dev->counters_table[qp->port - 1].counters_list);
 1601         mutex_unlock(&dev->counters_table[qp->port - 1].mutex);
 1602 
 1603         return 0;
 1604 }
 1605 
 1606 enum {
 1607         MLX4_QPC_ROCE_MODE_1 = 0,
 1608         MLX4_QPC_ROCE_MODE_2 = 2,
 1609         MLX4_QPC_ROCE_MODE_UNDEFINED = 0xff
 1610 };
 1611 
 1612 static u8 gid_type_to_qpc(enum ib_gid_type gid_type)
 1613 {
 1614         switch (gid_type) {
 1615         case IB_GID_TYPE_ROCE:
 1616                 return MLX4_QPC_ROCE_MODE_1;
 1617         case IB_GID_TYPE_ROCE_UDP_ENCAP:
 1618                 return MLX4_QPC_ROCE_MODE_2;
 1619         default:
 1620                 return MLX4_QPC_ROCE_MODE_UNDEFINED;
 1621         }
 1622 }
 1623 
 1624 static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 1625                                const struct ib_qp_attr *attr, int attr_mask,
 1626                                enum ib_qp_state cur_state,
 1627                                enum ib_qp_state new_state,
 1628                                struct ib_udata *udata)
 1629 {
 1630         struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
 1631         struct mlx4_ib_qp *qp = to_mqp(ibqp);
 1632         struct mlx4_ib_pd *pd;
 1633         struct mlx4_ib_cq *send_cq, *recv_cq;
 1634         struct mlx4_ib_ucontext *ucontext = rdma_udata_to_drv_context(
 1635                 udata, struct mlx4_ib_ucontext, ibucontext);
 1636         struct mlx4_qp_context *context;
 1637         enum mlx4_qp_optpar optpar = 0;
 1638         int sqd_event;
 1639         int steer_qp = 0;
 1640         int err = -EINVAL;
 1641         int counter_index;
 1642 
 1643         /* APM is not supported under RoCE */
 1644         if (attr_mask & IB_QP_ALT_PATH &&
 1645             rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
 1646             IB_LINK_LAYER_ETHERNET)
 1647                 return -ENOTSUPP;
 1648 
 1649         context = kzalloc(sizeof *context, GFP_KERNEL);
 1650         if (!context)
 1651                 return -ENOMEM;
 1652 
 1653         context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
 1654                                      (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16));
 1655 
 1656         if (!(attr_mask & IB_QP_PATH_MIG_STATE))
 1657                 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
 1658         else {
 1659                 optpar |= MLX4_QP_OPTPAR_PM_STATE;
 1660                 switch (attr->path_mig_state) {
 1661                 case IB_MIG_MIGRATED:
 1662                         context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
 1663                         break;
 1664                 case IB_MIG_REARM:
 1665                         context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11);
 1666                         break;
 1667                 case IB_MIG_ARMED:
 1668                         context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11);
 1669                         break;
 1670                 }
 1671         }
 1672 
 1673         if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
 1674                 context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
 1675         else if (ibqp->qp_type == IB_QPT_RAW_PACKET)
 1676                 context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX;
 1677         else if (ibqp->qp_type == IB_QPT_UD) {
 1678                 if (qp->flags & MLX4_IB_QP_LSO)
 1679                         context->mtu_msgmax = (IB_MTU_4096 << 5) |
 1680                                               ilog2(dev->dev->caps.max_gso_sz);
 1681                 else
 1682                         context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
 1683         } else if (attr_mask & IB_QP_PATH_MTU) {
 1684                 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
 1685                         pr_err("path MTU (%u) is invalid\n",
 1686                                attr->path_mtu);
 1687                         goto out;
 1688                 }
 1689                 context->mtu_msgmax = (attr->path_mtu << 5) |
 1690                         ilog2(dev->dev->caps.max_msg_sz);
 1691         }
 1692 
 1693         if (qp->rq.wqe_cnt)
 1694                 context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3;
 1695         context->rq_size_stride |= qp->rq.wqe_shift - 4;
 1696 
 1697         if (qp->sq.wqe_cnt)
 1698                 context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
 1699         context->sq_size_stride |= qp->sq.wqe_shift - 4;
 1700 
 1701         if (new_state == IB_QPS_RESET && qp->counter_index)
 1702                 mlx4_ib_free_qp_counter(dev, qp);
 1703 
 1704         if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
 1705                 context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
 1706                 context->xrcd = cpu_to_be32((u32) qp->xrcdn);
 1707                 if (ibqp->qp_type == IB_QPT_RAW_PACKET)
 1708                         context->param3 |= cpu_to_be32(1 << 30);
 1709         }
 1710 
 1711         if (ucontext)
 1712                 context->usr_page = cpu_to_be32(
 1713                         mlx4_to_hw_uar_index(dev->dev, ucontext->uar.index));
 1714         else
 1715                 context->usr_page = cpu_to_be32(
 1716                         mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index));
 1717 
 1718         if (attr_mask & IB_QP_DEST_QPN)
 1719                 context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
 1720 
 1721         if (attr_mask & IB_QP_PORT) {
 1722                 if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD &&
 1723                     !(attr_mask & IB_QP_AV)) {
 1724                         mlx4_set_sched(&context->pri_path, attr->port_num);
 1725                         optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE;
 1726                 }
 1727         }
 1728 
 1729         if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
 1730                 err = create_qp_lb_counter(dev, qp);
 1731                 if (err)
 1732                         goto out;
 1733 
 1734                 counter_index =
 1735                         dev->counters_table[qp->port - 1].default_counter;
 1736                 if (qp->counter_index)
 1737                         counter_index = qp->counter_index->index;
 1738 
 1739                 if (counter_index != -1) {
 1740                         context->pri_path.counter_index = counter_index;
 1741                         optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
 1742                         if (qp->counter_index) {
 1743                                 context->pri_path.fl |=
 1744                                         MLX4_FL_ETH_SRC_CHECK_MC_LB;
 1745                                 context->pri_path.vlan_control |=
 1746                                         MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER;
 1747                         }
 1748                 } else
 1749                         context->pri_path.counter_index =
 1750                                 MLX4_SINK_COUNTER_INDEX(dev->dev);
 1751 
 1752                 if (qp->flags & MLX4_IB_QP_NETIF) {
 1753                         mlx4_ib_steer_qp_reg(dev, qp, 1);
 1754                         steer_qp = 1;
 1755                 }
 1756 
 1757                 if (ibqp->qp_type == IB_QPT_GSI) {
 1758                         enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ?
 1759                                 IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE;
 1760                         u8 qpc_roce_mode = gid_type_to_qpc(gid_type);
 1761 
 1762                         context->rlkey_roce_mode |= (qpc_roce_mode << 6);
 1763                 }
 1764         }
 1765 
 1766         if (attr_mask & IB_QP_PKEY_INDEX) {
 1767                 if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
 1768                         context->pri_path.disable_pkey_check = 0x40;
 1769                 context->pri_path.pkey_index = attr->pkey_index;
 1770                 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
 1771         }
 1772 
 1773         if (attr_mask & IB_QP_AV) {
 1774                 u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 :
 1775                         attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
 1776                 union ib_gid gid;
 1777                 struct ib_gid_attr gid_attr;
 1778                 u16 vlan = 0xffff;
 1779                 u8 smac[ETH_ALEN];
 1780                 int status = 0;
 1781                 int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
 1782                         attr->ah_attr.ah_flags & IB_AH_GRH;
 1783 
 1784                 if (is_eth) {
 1785                         int index = attr->ah_attr.grh.sgid_index;
 1786 
 1787                         status = ib_get_cached_gid(ibqp->device, port_num,
 1788                                                    index, &gid, &gid_attr);
 1789                         if (!status && !memcmp(&gid, &zgid, sizeof(gid)))
 1790                                 status = -ENOENT;
 1791                         if (!status && gid_attr.ndev) {
 1792                                 vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev);
 1793                                 memcpy(smac, IF_LLADDR(gid_attr.ndev), ETH_ALEN);
 1794                                 if_rele(gid_attr.ndev);
 1795                         }
 1796                 }
 1797                 if (status)
 1798                         goto out;
 1799 
 1800                 if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path,
 1801                                   port_num, vlan, smac))
 1802                         goto out;
 1803 
 1804                 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
 1805                            MLX4_QP_OPTPAR_SCHED_QUEUE);
 1806 
 1807                 if (is_eth &&
 1808                     (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) {
 1809                         u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type);
 1810 
 1811                         if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) {
 1812                                 err = -EINVAL;
 1813                                 goto out;
 1814                         }
 1815                         context->rlkey_roce_mode |= (qpc_roce_mode << 6);
 1816                 }
 1817 
 1818         }
 1819 
 1820         if (attr_mask & IB_QP_TIMEOUT) {
 1821                 context->pri_path.ackto |= attr->timeout << 3;
 1822                 optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
 1823         }
 1824 
 1825         if (attr_mask & IB_QP_ALT_PATH) {
 1826                 if (attr->alt_port_num == 0 ||
 1827                     attr->alt_port_num > dev->dev->caps.num_ports)
 1828                         goto out;
 1829 
 1830                 if (attr->alt_pkey_index >=
 1831                     dev->dev->caps.pkey_table_len[attr->alt_port_num])
 1832                         goto out;
 1833 
 1834                 if (mlx4_set_alt_path(dev, attr, attr_mask, qp,
 1835                                       &context->alt_path,
 1836                                       attr->alt_port_num))
 1837                         goto out;
 1838 
 1839                 context->alt_path.pkey_index = attr->alt_pkey_index;
 1840                 context->alt_path.ackto = attr->alt_timeout << 3;
 1841                 optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
 1842         }
 1843 
 1844         pd = get_pd(qp);
 1845         get_cqs(qp, &send_cq, &recv_cq);
 1846         context->pd       = cpu_to_be32(pd->pdn);
 1847         context->cqn_send = cpu_to_be32(send_cq->mcq.cqn);
 1848         context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn);
 1849         context->params1  = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
 1850 
 1851         /* Set "fast registration enabled" for all kernel QPs */
 1852         if (!qp->ibqp.uobject)
 1853                 context->params1 |= cpu_to_be32(1 << 11);
 1854 
 1855         if (attr_mask & IB_QP_RNR_RETRY) {
 1856                 context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
 1857                 optpar |= MLX4_QP_OPTPAR_RNR_RETRY;
 1858         }
 1859 
 1860         if (attr_mask & IB_QP_RETRY_CNT) {
 1861                 context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
 1862                 optpar |= MLX4_QP_OPTPAR_RETRY_COUNT;
 1863         }
 1864 
 1865         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
 1866                 if (attr->max_rd_atomic)
 1867                         context->params1 |=
 1868                                 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
 1869                 optpar |= MLX4_QP_OPTPAR_SRA_MAX;
 1870         }
 1871 
 1872         if (attr_mask & IB_QP_SQ_PSN)
 1873                 context->next_send_psn = cpu_to_be32(attr->sq_psn);
 1874 
 1875         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
 1876                 if (attr->max_dest_rd_atomic)
 1877                         context->params2 |=
 1878                                 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
 1879                 optpar |= MLX4_QP_OPTPAR_RRA_MAX;
 1880         }
 1881 
 1882         if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
 1883                 context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask);
 1884                 optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;
 1885         }
 1886 
 1887         if (ibqp->srq)
 1888                 context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);
 1889 
 1890         if (attr_mask & IB_QP_MIN_RNR_TIMER) {
 1891                 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
 1892                 optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT;
 1893         }
 1894         if (attr_mask & IB_QP_RQ_PSN)
 1895                 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
 1896 
 1897         /* proxy and tunnel qp qkeys will be changed in modify-qp wrappers */
 1898         if (attr_mask & IB_QP_QKEY) {
 1899                 if (qp->mlx4_ib_qp_type &
 1900                     (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))
 1901                         context->qkey = cpu_to_be32(IB_QP_SET_QKEY);
 1902                 else {
 1903                         if (mlx4_is_mfunc(dev->dev) &&
 1904                             !(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) &&
 1905                             (attr->qkey & MLX4_RESERVED_QKEY_MASK) ==
 1906                             MLX4_RESERVED_QKEY_BASE) {
 1907                                 pr_err("Cannot use reserved QKEY"
 1908                                        " 0x%x (range 0xffff0000..0xffffffff"
 1909                                        " is reserved)\n", attr->qkey);
 1910                                 err = -EINVAL;
 1911                                 goto out;
 1912                         }
 1913                         context->qkey = cpu_to_be32(attr->qkey);
 1914                 }
 1915                 optpar |= MLX4_QP_OPTPAR_Q_KEY;
 1916         }
 1917 
 1918         if (ibqp->srq)
 1919                 context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
 1920 
 1921         if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
 1922                 context->db_rec_addr = cpu_to_be64(qp->db.dma);
 1923 
 1924         if (cur_state == IB_QPS_INIT &&
 1925             new_state == IB_QPS_RTR  &&
 1926             (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
 1927              ibqp->qp_type == IB_QPT_UD ||
 1928              ibqp->qp_type == IB_QPT_RAW_PACKET)) {
 1929                 context->pri_path.sched_queue = (qp->port - 1) << 6;
 1930                 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
 1931                     qp->mlx4_ib_qp_type &
 1932                     (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) {
 1933                         context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
 1934                         if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI)
 1935                                 context->pri_path.fl = 0x80;
 1936                 } else {
 1937                         if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
 1938                                 context->pri_path.fl = 0x80;
 1939                         context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
 1940                 }
 1941                 if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
 1942                     IB_LINK_LAYER_ETHERNET) {
 1943                         if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI ||
 1944                             qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI)
 1945                                 context->pri_path.feup = 1 << 7; /* don't fsm */
 1946                         /* handle smac_index */
 1947                         if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD ||
 1948                             qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI ||
 1949                             qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) {
 1950                                 err = handle_eth_ud_smac_index(dev, qp, context);
 1951                                 if (err) {
 1952                                         err = -EINVAL;
 1953                                         goto out;
 1954                                 }
 1955                                 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
 1956                                         dev->qp1_proxy[qp->port - 1] = qp;
 1957                         }
 1958                 }
 1959         }
 1960 
 1961         if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
 1962                 context->pri_path.ackto = (context->pri_path.ackto & 0xf8) |
 1963                                         MLX4_IB_LINK_TYPE_ETH;
 1964                 if (dev->dev->caps.tunnel_offload_mode ==  MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
 1965                         /* set QP to receive both tunneled & non-tunneled packets */
 1966                         if (!(context->flags & cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET)))
 1967                                 context->srqn = cpu_to_be32(7 << 28);
 1968                 }
 1969         }
 1970 
 1971         if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) {
 1972                 int is_eth = rdma_port_get_link_layer(
 1973                                 &dev->ib_dev, qp->port) ==
 1974                                 IB_LINK_LAYER_ETHERNET;
 1975                 if (is_eth) {
 1976                         context->pri_path.ackto = MLX4_IB_LINK_TYPE_ETH;
 1977                         optpar |= MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH;
 1978                 }
 1979         }
 1980 
 1981 
 1982         if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD  &&
 1983             attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
 1984                 sqd_event = 1;
 1985         else
 1986                 sqd_event = 0;
 1987 
 1988         if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
 1989                 context->rlkey_roce_mode |= (1 << 4);
 1990 
 1991         /*
 1992          * Before passing a kernel QP to the HW, make sure that the
 1993          * ownership bits of the send queue are set and the SQ
 1994          * headroom is stamped so that the hardware doesn't start
 1995          * processing stale work requests.
 1996          */
 1997         if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
 1998                 struct mlx4_wqe_ctrl_seg *ctrl;
 1999                 int i;
 2000 
 2001                 for (i = 0; i < qp->sq.wqe_cnt; ++i) {
 2002                         ctrl = get_send_wqe(qp, i);
 2003                         ctrl->owner_opcode = cpu_to_be32(1U << 31);
 2004                         if (qp->sq_max_wqes_per_wr == 1)
 2005                                 ctrl->fence_size =
 2006                                                 1 << (qp->sq.wqe_shift - 4);
 2007 
 2008                         stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift);
 2009                 }
 2010         }
 2011 
 2012         err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),
 2013                              to_mlx4_state(new_state), context, optpar,
 2014                              sqd_event, &qp->mqp);
 2015         if (err)
 2016                 goto out;
 2017 
 2018         qp->state = new_state;
 2019 
 2020         if (attr_mask & IB_QP_ACCESS_FLAGS)
 2021                 qp->atomic_rd_en = attr->qp_access_flags;
 2022         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
 2023                 qp->resp_depth = attr->max_dest_rd_atomic;
 2024         if (attr_mask & IB_QP_PORT) {
 2025                 qp->port = attr->port_num;
 2026                 update_mcg_macs(dev, qp);
 2027         }
 2028         if (attr_mask & IB_QP_ALT_PATH)
 2029                 qp->alt_port = attr->alt_port_num;
 2030 
 2031         if (is_sqp(dev, qp))
 2032                 store_sqp_attrs(to_msqp(qp), attr, attr_mask);
 2033 
 2034         /*
 2035          * If we moved QP0 to RTR, bring the IB link up; if we moved
 2036          * QP0 to RESET or ERROR, bring the link back down.
 2037          */
 2038         if (is_qp0(dev, qp)) {
 2039                 if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR)
 2040                         if (mlx4_INIT_PORT(dev->dev, qp->port))
 2041                                 pr_warn("INIT_PORT failed for port %d\n",
 2042                                        qp->port);
 2043 
 2044                 if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
 2045                     (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
 2046                         mlx4_CLOSE_PORT(dev->dev, qp->port);
 2047         }
 2048 
 2049         /*
 2050          * If we moved a kernel QP to RESET, clean up all old CQ
 2051          * entries and reinitialize the QP.
 2052          */
 2053         if (new_state == IB_QPS_RESET) {
 2054                 if (!ibqp->uobject) {
 2055                         mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
 2056                                          ibqp->srq ? to_msrq(ibqp->srq) : NULL);
 2057                         if (send_cq != recv_cq)
 2058                                 mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
 2059 
 2060                         qp->rq.head = 0;
 2061                         qp->rq.tail = 0;
 2062                         qp->sq.head = 0;
 2063                         qp->sq.tail = 0;
 2064                         qp->sq_next_wqe = 0;
 2065                         if (qp->rq.wqe_cnt)
 2066                                 *qp->db.db  = 0;
 2067 
 2068                         if (qp->flags & MLX4_IB_QP_NETIF)
 2069                                 mlx4_ib_steer_qp_reg(dev, qp, 0);
 2070                 }
 2071                 if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port)) {
 2072                         mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
 2073                         qp->pri.smac = 0;
 2074                         qp->pri.smac_port = 0;
 2075                 }
 2076                 if (qp->alt.smac) {
 2077                         mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
 2078                         qp->alt.smac = 0;
 2079                 }
 2080                 if (qp->pri.vid < 0x1000) {
 2081                         mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid);
 2082                         qp->pri.vid = 0xFFFF;
 2083                         qp->pri.candidate_vid = 0xFFFF;
 2084                         qp->pri.update_vid = 0;
 2085                 }
 2086 
 2087                 if (qp->alt.vid < 0x1000) {
 2088                         mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid);
 2089                         qp->alt.vid = 0xFFFF;
 2090                         qp->alt.candidate_vid = 0xFFFF;
 2091                         qp->alt.update_vid = 0;
 2092                 }
 2093         }
 2094 out:
 2095         if (err && qp->counter_index)
 2096                 mlx4_ib_free_qp_counter(dev, qp);
 2097         if (err && steer_qp)
 2098                 mlx4_ib_steer_qp_reg(dev, qp, 0);
 2099         kfree(context);
 2100         if (qp->pri.candidate_smac ||
 2101             (!qp->pri.candidate_smac && qp->pri.candidate_smac_port)) {
 2102                 if (err) {
 2103                         mlx4_unregister_mac(dev->dev, qp->pri.candidate_smac_port, qp->pri.candidate_smac);
 2104                 } else {
 2105                         if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port))
 2106                                 mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
 2107                         qp->pri.smac = qp->pri.candidate_smac;
 2108                         qp->pri.smac_index = qp->pri.candidate_smac_index;
 2109                         qp->pri.smac_port = qp->pri.candidate_smac_port;
 2110                 }
 2111                 qp->pri.candidate_smac = 0;
 2112                 qp->pri.candidate_smac_index = 0;
 2113                 qp->pri.candidate_smac_port = 0;
 2114         }
 2115         if (qp->alt.candidate_smac) {
 2116                 if (err) {
 2117                         mlx4_unregister_mac(dev->dev, qp->alt.candidate_smac_port, qp->alt.candidate_smac);
 2118                 } else {
 2119                         if (qp->alt.smac)
 2120                                 mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
 2121                         qp->alt.smac = qp->alt.candidate_smac;
 2122                         qp->alt.smac_index = qp->alt.candidate_smac_index;
 2123                         qp->alt.smac_port = qp->alt.candidate_smac_port;
 2124                 }
 2125                 qp->alt.candidate_smac = 0;
 2126                 qp->alt.candidate_smac_index = 0;
 2127                 qp->alt.candidate_smac_port = 0;
 2128         }
 2129 
 2130         if (qp->pri.update_vid) {
 2131                 if (err) {
 2132                         if (qp->pri.candidate_vid < 0x1000)
 2133                                 mlx4_unregister_vlan(dev->dev, qp->pri.candidate_vlan_port,
 2134                                                      qp->pri.candidate_vid);
 2135                 } else {
 2136                         if (qp->pri.vid < 0x1000)
 2137                                 mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port,
 2138                                                      qp->pri.vid);
 2139                         qp->pri.vid = qp->pri.candidate_vid;
 2140                         qp->pri.vlan_port = qp->pri.candidate_vlan_port;
 2141                         qp->pri.vlan_index =  qp->pri.candidate_vlan_index;
 2142                 }
 2143                 qp->pri.candidate_vid = 0xFFFF;
 2144                 qp->pri.update_vid = 0;
 2145         }
 2146 
 2147         if (qp->alt.update_vid) {
 2148                 if (err) {
 2149                         if (qp->alt.candidate_vid < 0x1000)
 2150                                 mlx4_unregister_vlan(dev->dev, qp->alt.candidate_vlan_port,
 2151                                                      qp->alt.candidate_vid);
 2152                 } else {
 2153                         if (qp->alt.vid < 0x1000)
 2154                                 mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port,
 2155                                                      qp->alt.vid);
 2156                         qp->alt.vid = qp->alt.candidate_vid;
 2157                         qp->alt.vlan_port = qp->alt.candidate_vlan_port;
 2158                         qp->alt.vlan_index =  qp->alt.candidate_vlan_index;
 2159                 }
 2160                 qp->alt.candidate_vid = 0xFFFF;
 2161                 qp->alt.update_vid = 0;
 2162         }
 2163 
 2164         return err;
 2165 }
 2166 
 2167 static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 2168                               int attr_mask, struct ib_udata *udata)
 2169 {
 2170         struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
 2171         struct mlx4_ib_qp *qp = to_mqp(ibqp);
 2172         enum ib_qp_state cur_state, new_state;
 2173         int err = -EINVAL;
 2174         mutex_lock(&qp->mutex);
 2175 
 2176         cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
 2177         new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 2178 
 2179         if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
 2180                                 attr_mask)) {
 2181                 pr_debug("qpn 0x%x: invalid attribute mask specified "
 2182                          "for transition %d to %d. qp_type %d,"
 2183                          " attr_mask 0x%x\n",
 2184                          ibqp->qp_num, cur_state, new_state,
 2185                          ibqp->qp_type, attr_mask);
 2186                 goto out;
 2187         }
 2188 
 2189         if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) {
 2190                 if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) {
 2191                         if ((ibqp->qp_type == IB_QPT_RC) ||
 2192                             (ibqp->qp_type == IB_QPT_UD) ||
 2193                             (ibqp->qp_type == IB_QPT_UC) ||
 2194                             (ibqp->qp_type == IB_QPT_RAW_PACKET) ||
 2195                             (ibqp->qp_type == IB_QPT_XRC_INI)) {
 2196                                 attr->port_num = mlx4_ib_bond_next_port(dev);
 2197                         }
 2198                 } else {
 2199                         /* no sense in changing port_num
 2200                          * when ports are bonded */
 2201                         attr_mask &= ~IB_QP_PORT;
 2202                 }
 2203         }
 2204 
 2205         if ((attr_mask & IB_QP_PORT) &&
 2206             (attr->port_num == 0 || attr->port_num > dev->num_ports)) {
 2207                 pr_debug("qpn 0x%x: invalid port number (%d) specified "
 2208                          "for transition %d to %d. qp_type %d\n",
 2209                          ibqp->qp_num, attr->port_num, cur_state,
 2210                          new_state, ibqp->qp_type);
 2211                 goto out;
 2212         }
 2213 
 2214         if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_PACKET) &&
 2215             (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num) !=
 2216              IB_LINK_LAYER_ETHERNET))
 2217                 goto out;
 2218 
 2219         if (attr_mask & IB_QP_PKEY_INDEX) {
 2220                 int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
 2221                 if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) {
 2222                         pr_debug("qpn 0x%x: invalid pkey index (%d) specified "
 2223                                  "for transition %d to %d. qp_type %d\n",
 2224                                  ibqp->qp_num, attr->pkey_index, cur_state,
 2225                                  new_state, ibqp->qp_type);
 2226                         goto out;
 2227                 }
 2228         }
 2229 
 2230         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
 2231             attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
 2232                 pr_debug("qpn 0x%x: max_rd_atomic (%d) too large. "
 2233                          "Transition %d to %d. qp_type %d\n",
 2234                          ibqp->qp_num, attr->max_rd_atomic, cur_state,
 2235                          new_state, ibqp->qp_type);
 2236                 goto out;
 2237         }
 2238 
 2239         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
 2240             attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {
 2241                 pr_debug("qpn 0x%x: max_dest_rd_atomic (%d) too large. "
 2242                          "Transition %d to %d. qp_type %d\n",
 2243                          ibqp->qp_num, attr->max_dest_rd_atomic, cur_state,
 2244                          new_state, ibqp->qp_type);
 2245                 goto out;
 2246         }
 2247 
 2248         if (cur_state == new_state && cur_state == IB_QPS_RESET) {
 2249                 err = 0;
 2250                 goto out;
 2251         }
 2252 
 2253         err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state, udata);
 2254 
 2255         if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT))
 2256                 attr->port_num = 1;
 2257 
 2258 out:
 2259         mutex_unlock(&qp->mutex);
 2260         return err;
 2261 }
 2262 
 2263 int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 2264                       int attr_mask, struct ib_udata *udata)
 2265 {
 2266         struct mlx4_ib_qp *mqp = to_mqp(ibqp);
 2267         int ret;
 2268 
 2269         ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata);
 2270 
 2271         if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
 2272                 struct mlx4_ib_sqp *sqp = to_msqp(mqp);
 2273                 int err = 0;
 2274 
 2275                 if (sqp->roce_v2_gsi)
 2276                         err = ib_modify_qp(sqp->roce_v2_gsi, attr, attr_mask);
 2277                 if (err)
 2278                         pr_err("Failed to modify GSI QP for RoCEv2 (%d)\n",
 2279                                err);
 2280         }
 2281         return ret;
 2282 }
 2283 
 2284 static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
 2285 {
 2286         int i;
 2287         for (i = 0; i < dev->caps.num_ports; i++) {
 2288                 if (qpn == dev->caps.qp0_proxy[i] ||
 2289                     qpn == dev->caps.qp0_tunnel[i]) {
 2290                         *qkey = dev->caps.qp0_qkey[i];
 2291                         return 0;
 2292                 }
 2293         }
 2294         return -EINVAL;
 2295 }
 2296 
 2297 static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
 2298                                   const struct ib_ud_wr *wr,
 2299                                   void *wqe, unsigned *mlx_seg_len)
 2300 {
 2301         struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
 2302         struct ib_device *ib_dev = &mdev->ib_dev;
 2303         struct mlx4_wqe_mlx_seg *mlx = wqe;
 2304         struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
 2305         struct mlx4_ib_ah *ah = to_mah(wr->ah);
 2306         u16 pkey;
 2307         u32 qkey;
 2308         int send_size;
 2309         int header_size;
 2310         int spc;
 2311         int i;
 2312 
 2313         if (wr->wr.opcode != IB_WR_SEND)
 2314                 return -EINVAL;
 2315 
 2316         send_size = 0;
 2317 
 2318         for (i = 0; i < wr->wr.num_sge; ++i)
 2319                 send_size += wr->wr.sg_list[i].length;
 2320 
 2321         /* for proxy-qp0 sends, need to add in size of tunnel header */
 2322         /* for tunnel-qp0 sends, tunnel header is already in s/g list */
 2323         if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
 2324                 send_size += sizeof (struct mlx4_ib_tunnel_header);
 2325 
 2326         ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header);
 2327 
 2328         if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
 2329                 sqp->ud_header.lrh.service_level =
 2330                         be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
 2331                 sqp->ud_header.lrh.destination_lid =
 2332                         cpu_to_be16(ah->av.ib.g_slid & 0x7f);
 2333                 sqp->ud_header.lrh.source_lid =
 2334                         cpu_to_be16(ah->av.ib.g_slid & 0x7f);
 2335         }
 2336 
 2337         mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
 2338 
 2339         /* force loopback */
 2340         mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR);
 2341         mlx->rlid = sqp->ud_header.lrh.destination_lid;
 2342 
 2343         sqp->ud_header.lrh.virtual_lane    = 0;
 2344         sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
 2345         ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
 2346         sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
 2347         if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
 2348                 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
 2349         else
 2350                 sqp->ud_header.bth.destination_qpn =
 2351                         cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
 2352 
 2353         sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
 2354         if (mlx4_is_master(mdev->dev)) {
 2355                 if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
 2356                         return -EINVAL;
 2357         } else {
 2358                 if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
 2359                         return -EINVAL;
 2360         }
 2361         sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
 2362         sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
 2363 
 2364         sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY;
 2365         sqp->ud_header.immediate_present = 0;
 2366 
 2367         header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
 2368 
 2369         /*
 2370          * Inline data segments may not cross a 64 byte boundary.  If
 2371          * our UD header is bigger than the space available up to the
 2372          * next 64 byte boundary in the WQE, use two inline data
 2373          * segments to hold the UD header.
 2374          */
 2375         spc = MLX4_INLINE_ALIGN -
 2376               ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
 2377         if (header_size <= spc) {
 2378                 inl->byte_count = cpu_to_be32((1U << 31) | header_size);
 2379                 memcpy(inl + 1, sqp->header_buf, header_size);
 2380                 i = 1;
 2381         } else {
 2382                 inl->byte_count = cpu_to_be32((1U << 31) | spc);
 2383                 memcpy(inl + 1, sqp->header_buf, spc);
 2384 
 2385                 inl = (void *) (inl + 1) + spc;
 2386                 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
 2387                 /*
 2388                  * Need a barrier here to make sure all the data is
 2389                  * visible before the byte_count field is set.
 2390                  * Otherwise the HCA prefetcher could grab the 64-byte
 2391                  * chunk with this inline segment and get a valid (!=
 2392                  * 0xffffffff) byte count but stale data, and end up
 2393                  * generating a packet with bad headers.
 2394                  *
 2395                  * The first inline segment's byte_count field doesn't
 2396                  * need a barrier, because it comes after a
 2397                  * control/MLX segment and therefore is at an offset
 2398                  * of 16 mod 64.
 2399                  */
 2400                 wmb();
 2401                 inl->byte_count = cpu_to_be32((1U << 31) | (header_size - spc));
 2402                 i = 2;
 2403         }
 2404 
 2405         *mlx_seg_len =
 2406         ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
 2407         return 0;
 2408 }
 2409 
 2410 static u8 sl_to_vl(struct mlx4_ib_dev *dev, u8 sl, int port_num)
 2411 {
 2412         union sl2vl_tbl_to_u64 tmp_vltab;
 2413         u8 vl;
 2414 
 2415         if (sl > 15)
 2416                 return 0xf;
 2417         tmp_vltab.sl64 = atomic64_read(&dev->sl2vl[port_num - 1]);
 2418         vl = tmp_vltab.sl8[sl >> 1];
 2419         if (sl & 1)
 2420                 vl &= 0x0f;
 2421         else
 2422                 vl >>= 4;
 2423         return vl;
 2424 }
 2425 
 2426 #define MLX4_ROCEV2_QP1_SPORT 0xC000
 2427 static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
 2428                             void *wqe, unsigned *mlx_seg_len)
 2429 {
 2430         struct ib_device *ib_dev = sqp->qp.ibqp.device;
 2431         struct mlx4_wqe_mlx_seg *mlx = wqe;
 2432         struct mlx4_wqe_ctrl_seg *ctrl = wqe;
 2433         struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
 2434         struct mlx4_ib_ah *ah = to_mah(wr->ah);
 2435         union ib_gid sgid;
 2436         u16 pkey;
 2437         int send_size;
 2438         int header_size;
 2439         int spc;
 2440         int i;
 2441         int err = 0;
 2442         u16 vlan = 0xffff;
 2443         bool is_eth;
 2444         bool is_vlan = false;
 2445         bool is_grh;
 2446         bool is_udp = false;
 2447         int ip_version = 0;
 2448 
 2449         send_size = 0;
 2450         for (i = 0; i < wr->wr.num_sge; ++i)
 2451                 send_size += wr->wr.sg_list[i].length;
 2452 
 2453         is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
 2454         is_grh = mlx4_ib_ah_grh_present(ah);
 2455         if (is_eth) {
 2456                 struct ib_gid_attr gid_attr;
 2457 
 2458                 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
 2459                         /* When multi-function is enabled, the ib_core gid
 2460                          * indexes don't necessarily match the hw ones, so
 2461                          * we must use our own cache */
 2462                         err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev,
 2463                                                            be32_to_cpu(ah->av.ib.port_pd) >> 24,
 2464                                                            ah->av.ib.gid_index, &sgid.raw[0]);
 2465                         if (err)
 2466                                 return err;
 2467                 } else  {
 2468                         err = ib_get_cached_gid(ib_dev,
 2469                                                 be32_to_cpu(ah->av.ib.port_pd) >> 24,
 2470                                                 ah->av.ib.gid_index, &sgid,
 2471                                                 &gid_attr);
 2472                         if (!err) {
 2473                                 if (gid_attr.ndev)
 2474                                         if_rele(gid_attr.ndev);
 2475                                 if (!memcmp(&sgid, &zgid, sizeof(sgid)))
 2476                                         err = -ENOENT;
 2477                         }
 2478                         if (!err) {
 2479                                 is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
 2480                                 if (is_udp) {
 2481                                         if (ipv6_addr_v4mapped((struct in6_addr *)&sgid))
 2482                                                 ip_version = 4;
 2483                                         else
 2484                                                 ip_version = 6;
 2485                                         is_grh = false;
 2486                                 }
 2487                         } else {
 2488                                 return err;
 2489                         }
 2490                 }
 2491                 if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
 2492                         vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
 2493                         is_vlan = 1;
 2494                 }
 2495         }
 2496         err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh,
 2497                           ip_version, is_udp, 0, &sqp->ud_header);
 2498         if (err)
 2499                 return err;
 2500 
 2501         if (!is_eth) {
 2502                 sqp->ud_header.lrh.service_level =
 2503                         be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
 2504                 sqp->ud_header.lrh.destination_lid = ah->av.ib.dlid;
 2505                 sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
 2506         }
 2507 
 2508         if (is_grh || (ip_version == 6)) {
 2509                 sqp->ud_header.grh.traffic_class =
 2510                         (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
 2511                 sqp->ud_header.grh.flow_label    =
 2512                         ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
 2513                 sqp->ud_header.grh.hop_limit     = ah->av.ib.hop_limit;
 2514                 if (is_eth) {
 2515                         memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
 2516                 } else {
 2517                         if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
 2518                                 /* When multi-function is enabled, the ib_core gid
 2519                                  * indexes don't necessarily match the hw ones, so
 2520                                  * we must use our own cache
 2521                                  */
 2522                                 sqp->ud_header.grh.source_gid.global.subnet_prefix =
 2523                                         cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov.
 2524                                                                     demux[sqp->qp.port - 1].
 2525                                                                     subnet_prefix)));
 2526                                 sqp->ud_header.grh.source_gid.global.interface_id =
 2527                                         to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
 2528                                                        guid_cache[ah->av.ib.gid_index];
 2529                         } else {
 2530                                 ib_get_cached_gid(ib_dev,
 2531                                                   be32_to_cpu(ah->av.ib.port_pd) >> 24,
 2532                                                   ah->av.ib.gid_index,
 2533                                                   &sqp->ud_header.grh.source_gid, NULL);
 2534                         }
 2535                 }
 2536                 memcpy(sqp->ud_header.grh.destination_gid.raw,
 2537                        ah->av.ib.dgid, 16);
 2538         }
 2539 
 2540         if (ip_version == 4) {
 2541                 sqp->ud_header.ip4.tos =
 2542                         (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
 2543                 sqp->ud_header.ip4.id = 0;
 2544                 sqp->ud_header.ip4.frag_off = htons(IP_DF);
 2545                 sqp->ud_header.ip4.ttl = ah->av.eth.hop_limit;
 2546 
 2547                 memcpy(&sqp->ud_header.ip4.saddr,
 2548                        sgid.raw + 12, 4);
 2549                 memcpy(&sqp->ud_header.ip4.daddr, ah->av.ib.dgid + 12, 4);
 2550                 sqp->ud_header.ip4.check = ib_ud_ip4_csum(&sqp->ud_header);
 2551         }
 2552 
 2553         if (is_udp) {
 2554                 sqp->ud_header.udp.dport = htons(ROCE_V2_UDP_DPORT);
 2555                 sqp->ud_header.udp.sport = htons(MLX4_ROCEV2_QP1_SPORT);
 2556                 sqp->ud_header.udp.csum = 0;
 2557         }
 2558 
 2559         mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
 2560 
 2561         if (!is_eth) {
 2562                 mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
 2563                                           (sqp->ud_header.lrh.destination_lid ==
 2564                                            IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
 2565                                           (sqp->ud_header.lrh.service_level << 8));
 2566                 if (ah->av.ib.port_pd & cpu_to_be32(0x80000000))
 2567                         mlx->flags |= cpu_to_be32(0x1); /* force loopback */
 2568                 mlx->rlid = sqp->ud_header.lrh.destination_lid;
 2569         }
 2570 
 2571         switch (wr->wr.opcode) {
 2572         case IB_WR_SEND:
 2573                 sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY;
 2574                 sqp->ud_header.immediate_present = 0;
 2575                 break;
 2576         case IB_WR_SEND_WITH_IMM:
 2577                 sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
 2578                 sqp->ud_header.immediate_present = 1;
 2579                 sqp->ud_header.immediate_data    = wr->wr.ex.imm_data;
 2580                 break;
 2581         default:
 2582                 return -EINVAL;
 2583         }
 2584 
 2585         if (is_eth) {
 2586                 struct in6_addr in6;
 2587                 u16 ether_type;
 2588                 u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
 2589 
 2590                 ether_type = (!is_udp) ? MLX4_IB_IBOE_ETHERTYPE :
 2591                         (ip_version == 4 ? ETHERTYPE_IP : ETHERTYPE_IPV6);
 2592 
 2593                 mlx->sched_prio = cpu_to_be16(pcp);
 2594 
 2595                 ether_addr_copy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac);
 2596                 memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
 2597                 memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
 2598                 memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
 2599                 memcpy(&in6, sgid.raw, sizeof(in6));
 2600 
 2601 
 2602                 if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
 2603                         mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
 2604                 if (!is_vlan) {
 2605                         sqp->ud_header.eth.type = cpu_to_be16(ether_type);
 2606                 } else {
 2607                         sqp->ud_header.vlan.type = cpu_to_be16(ether_type);
 2608                         sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
 2609                 }
 2610         } else {
 2611                 sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 :
 2612                                                         sl_to_vl(to_mdev(ib_dev),
 2613                                                                  sqp->ud_header.lrh.service_level,
 2614                                                                  sqp->qp.port);
 2615                 if (sqp->qp.ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15)
 2616                         return -EINVAL;
 2617                 if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
 2618                         sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
 2619         }
 2620         sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
 2621         if (!sqp->qp.ibqp.qp_num)
 2622                 ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
 2623         else
 2624                 ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, &pkey);
 2625         sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
 2626         sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
 2627         sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
 2628         sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
 2629                                                sqp->qkey : wr->remote_qkey);
 2630         sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
 2631 
 2632         header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
 2633 
 2634         if (0) {
 2635                 pr_err("built UD header of size %d:\n", header_size);
 2636                 for (i = 0; i < header_size / 4; ++i) {
 2637                         if (i % 8 == 0)
 2638                                 pr_err("  [%02x] ", i * 4);
 2639                         pr_cont(" %08x",
 2640                                 be32_to_cpu(((__be32 *) sqp->header_buf)[i]));
 2641                         if ((i + 1) % 8 == 0)
 2642                                 pr_cont("\n");
 2643                 }
 2644                 pr_err("\n");
 2645         }
 2646 
 2647         /*
 2648          * Inline data segments may not cross a 64 byte boundary.  If
 2649          * our UD header is bigger than the space available up to the
 2650          * next 64 byte boundary in the WQE, use two inline data
 2651          * segments to hold the UD header.
 2652          */
 2653         spc = MLX4_INLINE_ALIGN -
 2654                 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
 2655         if (header_size <= spc) {
 2656                 inl->byte_count = cpu_to_be32(1U << 31 | header_size);
 2657                 memcpy(inl + 1, sqp->header_buf, header_size);
 2658                 i = 1;
 2659         } else {
 2660                 inl->byte_count = cpu_to_be32(1U << 31 | spc);
 2661                 memcpy(inl + 1, sqp->header_buf, spc);
 2662 
 2663                 inl = (void *) (inl + 1) + spc;
 2664                 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
 2665                 /*
 2666                  * Need a barrier here to make sure all the data is
 2667                  * visible before the byte_count field is set.
 2668                  * Otherwise the HCA prefetcher could grab the 64-byte
 2669                  * chunk with this inline segment and get a valid (!=
 2670                  * 0xffffffff) byte count but stale data, and end up
 2671                  * generating a packet with bad headers.
 2672                  *
 2673                  * The first inline segment's byte_count field doesn't
 2674                  * need a barrier, because it comes after a
 2675                  * control/MLX segment and therefore is at an offset
 2676                  * of 16 mod 64.
 2677                  */
 2678                 wmb();
 2679                 inl->byte_count = cpu_to_be32(1U << 31 | (header_size - spc));
 2680                 i = 2;
 2681         }
 2682 
 2683         *mlx_seg_len =
 2684                 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
 2685         return 0;
 2686 }
 2687 
 2688 static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
 2689 {
 2690         unsigned cur;
 2691         struct mlx4_ib_cq *cq;
 2692 
 2693         cur = wq->head - wq->tail;
 2694         if (likely(cur + nreq < wq->max_post))
 2695                 return 0;
 2696 
 2697         cq = to_mcq(ib_cq);
 2698         spin_lock(&cq->lock);
 2699         cur = wq->head - wq->tail;
 2700         spin_unlock(&cq->lock);
 2701 
 2702         return cur + nreq >= wq->max_post;
 2703 }
 2704 
 2705 static __be32 convert_access(int acc)
 2706 {
 2707         return (acc & IB_ACCESS_REMOTE_ATOMIC ?
 2708                 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC)       : 0) |
 2709                (acc & IB_ACCESS_REMOTE_WRITE  ?
 2710                 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) |
 2711                (acc & IB_ACCESS_REMOTE_READ   ?
 2712                 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ)  : 0) |
 2713                (acc & IB_ACCESS_LOCAL_WRITE   ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE)  : 0) |
 2714                 cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
 2715 }
 2716 
 2717 static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg,
 2718                         const struct ib_reg_wr *wr)
 2719 {
 2720         struct mlx4_ib_mr *mr = to_mmr(wr->mr);
 2721 
 2722         fseg->flags             = convert_access(wr->access);
 2723         fseg->mem_key           = cpu_to_be32(wr->key);
 2724         fseg->buf_list          = cpu_to_be64(mr->page_map);
 2725         fseg->start_addr        = cpu_to_be64(mr->ibmr.iova);
 2726         fseg->reg_len           = cpu_to_be64(mr->ibmr.length);
 2727         fseg->offset            = 0; /* XXX -- is this just for ZBVA? */
 2728         fseg->page_size         = cpu_to_be32(ilog2(mr->ibmr.page_size));
 2729         fseg->reserved[0]       = 0;
 2730         fseg->reserved[1]       = 0;
 2731 }
 2732 
 2733 static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
 2734 {
 2735         memset(iseg, 0, sizeof(*iseg));
 2736         iseg->mem_key = cpu_to_be32(rkey);
 2737 }
 2738 
 2739 static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
 2740                                           u64 remote_addr, u32 rkey)
 2741 {
 2742         rseg->raddr    = cpu_to_be64(remote_addr);
 2743         rseg->rkey     = cpu_to_be32(rkey);
 2744         rseg->reserved = 0;
 2745 }
 2746 
 2747 static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg,
 2748                 const struct ib_atomic_wr *wr)
 2749 {
 2750         if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 2751                 aseg->swap_add = cpu_to_be64(wr->swap);
 2752                 aseg->compare  = cpu_to_be64(wr->compare_add);
 2753         } else if (wr->wr.opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
 2754                 aseg->swap_add = cpu_to_be64(wr->compare_add);
 2755                 aseg->compare  = cpu_to_be64(wr->compare_add_mask);
 2756         } else {
 2757                 aseg->swap_add = cpu_to_be64(wr->compare_add);
 2758                 aseg->compare  = 0;
 2759         }
 2760 
 2761 }
 2762 
 2763 static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
 2764                                   const struct ib_atomic_wr *wr)
 2765 {
 2766         aseg->swap_add          = cpu_to_be64(wr->swap);
 2767         aseg->swap_add_mask     = cpu_to_be64(wr->swap_mask);
 2768         aseg->compare           = cpu_to_be64(wr->compare_add);
 2769         aseg->compare_mask      = cpu_to_be64(wr->compare_add_mask);
 2770 }
 2771 
 2772 static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
 2773                              const struct ib_ud_wr *wr)
 2774 {
 2775         memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av));
 2776         dseg->dqpn = cpu_to_be32(wr->remote_qpn);
 2777         dseg->qkey = cpu_to_be32(wr->remote_qkey);
 2778         dseg->vlan = to_mah(wr->ah)->av.eth.vlan;
 2779         memcpy(dseg->mac, to_mah(wr->ah)->av.eth.mac, 6);
 2780 }
 2781 
 2782 static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
 2783                                     struct mlx4_wqe_datagram_seg *dseg,
 2784                                     const struct ib_ud_wr *wr,
 2785                                     enum mlx4_ib_qp_type qpt)
 2786 {
 2787         union mlx4_ext_av *av = &to_mah(wr->ah)->av;
 2788         struct mlx4_av sqp_av = {0};
 2789         int port = *((u8 *) &av->ib.port_pd) & 0x3;
 2790 
 2791         /* force loopback */
 2792         sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000);
 2793         sqp_av.g_slid = av->ib.g_slid & 0x7f; /* no GRH */
 2794         sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel &
 2795                         cpu_to_be32(0xf0000000);
 2796 
 2797         memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
 2798         if (qpt == MLX4_IB_QPT_PROXY_GSI)
 2799                 dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
 2800         else
 2801                 dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]);
 2802         /* Use QKEY from the QP context, which is set by master */
 2803         dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
 2804 }
 2805 
 2806 static void build_tunnel_header(const struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len)
 2807 {
 2808         struct mlx4_wqe_inline_seg *inl = wqe;
 2809         struct mlx4_ib_tunnel_header hdr;
 2810         struct mlx4_ib_ah *ah = to_mah(wr->ah);
 2811         int spc;
 2812         int i;
 2813 
 2814         memcpy(&hdr.av, &ah->av, sizeof hdr.av);
 2815         hdr.remote_qpn = cpu_to_be32(wr->remote_qpn);
 2816         hdr.pkey_index = cpu_to_be16(wr->pkey_index);
 2817         hdr.qkey = cpu_to_be32(wr->remote_qkey);
 2818         memcpy(hdr.mac, ah->av.eth.mac, 6);
 2819         hdr.vlan = ah->av.eth.vlan;
 2820 
 2821         spc = MLX4_INLINE_ALIGN -
 2822                 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
 2823         if (sizeof (hdr) <= spc) {
 2824                 memcpy(inl + 1, &hdr, sizeof (hdr));
 2825                 wmb();
 2826                 inl->byte_count = cpu_to_be32((1U << 31) | (u32)sizeof(hdr));
 2827                 i = 1;
 2828         } else {
 2829                 memcpy(inl + 1, &hdr, spc);
 2830                 wmb();
 2831                 inl->byte_count = cpu_to_be32((1U << 31) | spc);
 2832 
 2833                 inl = (void *) (inl + 1) + spc;
 2834                 memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc);
 2835                 wmb();
 2836                 inl->byte_count = cpu_to_be32((1U << 31) | (u32)(sizeof (hdr) - spc));
 2837                 i = 2;
 2838         }
 2839 
 2840         *mlx_seg_len =
 2841                 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + sizeof (hdr), 16);
 2842 }
 2843 
 2844 static void set_mlx_icrc_seg(void *dseg)
 2845 {
 2846         u32 *t = dseg;
 2847         struct mlx4_wqe_inline_seg *iseg = dseg;
 2848 
 2849         t[1] = 0;
 2850 
 2851         /*
 2852          * Need a barrier here before writing the byte_count field to
 2853          * make sure that all the data is visible before the
 2854          * byte_count field is set.  Otherwise, if the segment begins
 2855          * a new cacheline, the HCA prefetcher could grab the 64-byte
 2856          * chunk and get a valid (!= * 0xffffffff) byte count but
 2857          * stale data, and end up sending the wrong data.
 2858          */
 2859         wmb();
 2860 
 2861         iseg->byte_count = cpu_to_be32((1U << 31) | 4);
 2862 }
 2863 
 2864 static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
 2865 {
 2866         dseg->lkey       = cpu_to_be32(sg->lkey);
 2867         dseg->addr       = cpu_to_be64(sg->addr);
 2868 
 2869         /*
 2870          * Need a barrier here before writing the byte_count field to
 2871          * make sure that all the data is visible before the
 2872          * byte_count field is set.  Otherwise, if the segment begins
 2873          * a new cacheline, the HCA prefetcher could grab the 64-byte
 2874          * chunk and get a valid (!= * 0xffffffff) byte count but
 2875          * stale data, and end up sending the wrong data.
 2876          */
 2877         wmb();
 2878 
 2879         dseg->byte_count = cpu_to_be32(sg->length);
 2880 }
 2881 
 2882 static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
 2883 {
 2884         dseg->byte_count = cpu_to_be32(sg->length);
 2885         dseg->lkey       = cpu_to_be32(sg->lkey);
 2886         dseg->addr       = cpu_to_be64(sg->addr);
 2887 }
 2888 
 2889 static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, const struct ib_ud_wr *wr,
 2890                          struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
 2891                          __be32 *lso_hdr_sz, __be32 *blh)
 2892 {
 2893         unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16);
 2894 
 2895         if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
 2896                 *blh = cpu_to_be32(1 << 6);
 2897 
 2898         if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
 2899                      wr->wr.num_sge > qp->sq.max_gs - (halign >> 4)))
 2900                 return -EINVAL;
 2901 
 2902         memcpy(wqe->header, wr->header, wr->hlen);
 2903 
 2904         *lso_hdr_sz  = cpu_to_be32(wr->mss << 16 | wr->hlen);
 2905         *lso_seg_len = halign;
 2906         return 0;
 2907 }
 2908 
 2909 static __be32 send_ieth(const struct ib_send_wr *wr)
 2910 {
 2911         switch (wr->opcode) {
 2912         case IB_WR_SEND_WITH_IMM:
 2913         case IB_WR_RDMA_WRITE_WITH_IMM:
 2914                 return wr->ex.imm_data;
 2915 
 2916         case IB_WR_SEND_WITH_INV:
 2917                 return cpu_to_be32(wr->ex.invalidate_rkey);
 2918 
 2919         default:
 2920                 return 0;
 2921         }
 2922 }
 2923 
 2924 static void add_zero_len_inline(void *wqe)
 2925 {
 2926         struct mlx4_wqe_inline_seg *inl = wqe;
 2927         memset(wqe, 0, 16);
 2928         inl->byte_count = cpu_to_be32(1U << 31);
 2929 }
 2930 
 2931 int mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 2932                       const struct ib_send_wr **bad_wr)
 2933 {
 2934         struct mlx4_ib_qp *qp = to_mqp(ibqp);
 2935         void *wqe;
 2936         struct mlx4_wqe_ctrl_seg *ctrl;
 2937         struct mlx4_wqe_data_seg *dseg;
 2938         unsigned long flags;
 2939         int nreq;
 2940         int err = 0;
 2941         unsigned ind;
 2942         int uninitialized_var(stamp);
 2943         int uninitialized_var(size);
 2944         unsigned uninitialized_var(seglen);
 2945         __be32 dummy;
 2946         __be32 *lso_wqe;
 2947         __be32 lso_hdr_sz = 0;
 2948         __be32 blh;
 2949         int i;
 2950         struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
 2951 
 2952         if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
 2953                 struct mlx4_ib_sqp *sqp = to_msqp(qp);
 2954 
 2955                 if (sqp->roce_v2_gsi) {
 2956                         struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
 2957                         struct ib_gid_attr gid_attr;
 2958                         union ib_gid gid;
 2959 
 2960                         if (!ib_get_cached_gid(ibqp->device,
 2961                                                be32_to_cpu(ah->av.ib.port_pd) >> 24,
 2962                                                ah->av.ib.gid_index, &gid,
 2963                                                &gid_attr)) {
 2964                                 if (gid_attr.ndev)
 2965                                         if_rele(gid_attr.ndev);
 2966                                 qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
 2967                                         to_mqp(sqp->roce_v2_gsi) : qp;
 2968                         } else {
 2969                                 pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
 2970                                        ah->av.ib.gid_index);
 2971                         }
 2972                 }
 2973         }
 2974 
 2975         spin_lock_irqsave(&qp->sq.lock, flags);
 2976         if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
 2977                 err = -EIO;
 2978                 *bad_wr = wr;
 2979                 nreq = 0;
 2980                 goto out;
 2981         }
 2982 
 2983         ind = qp->sq_next_wqe;
 2984 
 2985         for (nreq = 0; wr; ++nreq, wr = wr->next) {
 2986                 lso_wqe = &dummy;
 2987                 blh = 0;
 2988 
 2989                 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
 2990                         err = -ENOMEM;
 2991                         *bad_wr = wr;
 2992                         goto out;
 2993                 }
 2994 
 2995                 if (unlikely(wr->num_sge > qp->sq.max_gs)) {
 2996                         err = -EINVAL;
 2997                         *bad_wr = wr;
 2998                         goto out;
 2999                 }
 3000 
 3001                 ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
 3002                 qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id;
 3003 
 3004                 ctrl->srcrb_flags =
 3005                         (wr->send_flags & IB_SEND_SIGNALED ?
 3006                          cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
 3007                         (wr->send_flags & IB_SEND_SOLICITED ?
 3008                          cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |
 3009                         ((wr->send_flags & IB_SEND_IP_CSUM) ?
 3010                          cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
 3011                                      MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
 3012                         qp->sq_signal_bits;
 3013 
 3014                 ctrl->imm = send_ieth(wr);
 3015 
 3016                 wqe += sizeof *ctrl;
 3017                 size = sizeof *ctrl / 16;
 3018 
 3019                 switch (qp->mlx4_ib_qp_type) {
 3020                 case MLX4_IB_QPT_RC:
 3021                 case MLX4_IB_QPT_UC:
 3022                         switch (wr->opcode) {
 3023                         case IB_WR_ATOMIC_CMP_AND_SWP:
 3024                         case IB_WR_ATOMIC_FETCH_AND_ADD:
 3025                         case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
 3026                                 set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
 3027                                               atomic_wr(wr)->rkey);
 3028                                 wqe  += sizeof (struct mlx4_wqe_raddr_seg);
 3029 
 3030                                 set_atomic_seg(wqe, atomic_wr(wr));
 3031                                 wqe  += sizeof (struct mlx4_wqe_atomic_seg);
 3032 
 3033                                 size += (sizeof (struct mlx4_wqe_raddr_seg) +
 3034                                          sizeof (struct mlx4_wqe_atomic_seg)) / 16;
 3035 
 3036                                 break;
 3037 
 3038                         case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
 3039                                 set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
 3040                                               atomic_wr(wr)->rkey);
 3041                                 wqe  += sizeof (struct mlx4_wqe_raddr_seg);
 3042 
 3043                                 set_masked_atomic_seg(wqe, atomic_wr(wr));
 3044                                 wqe  += sizeof (struct mlx4_wqe_masked_atomic_seg);
 3045 
 3046                                 size += (sizeof (struct mlx4_wqe_raddr_seg) +
 3047                                          sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16;
 3048 
 3049                                 break;
 3050 
 3051                         case IB_WR_RDMA_READ:
 3052                         case IB_WR_RDMA_WRITE:
 3053                         case IB_WR_RDMA_WRITE_WITH_IMM:
 3054                                 set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
 3055                                               rdma_wr(wr)->rkey);
 3056                                 wqe  += sizeof (struct mlx4_wqe_raddr_seg);
 3057                                 size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
 3058                                 break;
 3059 
 3060                         case IB_WR_LOCAL_INV:
 3061                                 ctrl->srcrb_flags |=
 3062                                         cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
 3063                                 set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
 3064                                 wqe  += sizeof (struct mlx4_wqe_local_inval_seg);
 3065                                 size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
 3066                                 break;
 3067 
 3068                         case IB_WR_REG_MR:
 3069                                 ctrl->srcrb_flags |=
 3070                                         cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
 3071                                 set_reg_seg(wqe, reg_wr(wr));
 3072                                 wqe  += sizeof(struct mlx4_wqe_fmr_seg);
 3073                                 size += sizeof(struct mlx4_wqe_fmr_seg) / 16;
 3074                                 break;
 3075 
 3076                         default:
 3077                                 /* No extra segments required for sends */
 3078                                 break;
 3079                         }
 3080                         break;
 3081 
 3082                 case MLX4_IB_QPT_TUN_SMI_OWNER:
 3083                         err =  build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
 3084                                         ctrl, &seglen);
 3085                         if (unlikely(err)) {
 3086                                 *bad_wr = wr;
 3087                                 goto out;
 3088                         }
 3089                         wqe  += seglen;
 3090                         size += seglen / 16;
 3091                         break;
 3092                 case MLX4_IB_QPT_TUN_SMI:
 3093                 case MLX4_IB_QPT_TUN_GSI:
 3094                         /* this is a UD qp used in MAD responses to slaves. */
 3095                         set_datagram_seg(wqe, ud_wr(wr));
 3096                         /* set the forced-loopback bit in the data seg av */
 3097                         *(__be32 *) wqe |= cpu_to_be32(0x80000000);
 3098                         wqe  += sizeof (struct mlx4_wqe_datagram_seg);
 3099                         size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
 3100                         break;
 3101                 case MLX4_IB_QPT_UD:
 3102                         set_datagram_seg(wqe, ud_wr(wr));
 3103                         wqe  += sizeof (struct mlx4_wqe_datagram_seg);
 3104                         size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
 3105 
 3106                         if (wr->opcode == IB_WR_LSO) {
 3107                                 err = build_lso_seg(wqe, ud_wr(wr), qp, &seglen,
 3108                                                 &lso_hdr_sz, &blh);
 3109                                 if (unlikely(err)) {
 3110                                         *bad_wr = wr;
 3111                                         goto out;
 3112                                 }
 3113                                 lso_wqe = (__be32 *) wqe;
 3114                                 wqe  += seglen;
 3115                                 size += seglen / 16;
 3116                         }
 3117                         break;
 3118 
 3119                 case MLX4_IB_QPT_PROXY_SMI_OWNER:
 3120                         err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
 3121                                         ctrl, &seglen);
 3122                         if (unlikely(err)) {
 3123                                 *bad_wr = wr;
 3124                                 goto out;
 3125                         }
 3126                         wqe  += seglen;
 3127                         size += seglen / 16;
 3128                         /* to start tunnel header on a cache-line boundary */
 3129                         add_zero_len_inline(wqe);
 3130                         wqe += 16;
 3131                         size++;
 3132                         build_tunnel_header(ud_wr(wr), wqe, &seglen);
 3133                         wqe  += seglen;
 3134                         size += seglen / 16;
 3135                         break;
 3136                 case MLX4_IB_QPT_PROXY_SMI:
 3137                 case MLX4_IB_QPT_PROXY_GSI:
 3138                         /* If we are tunneling special qps, this is a UD qp.
 3139                          * In this case we first add a UD segment targeting
 3140                          * the tunnel qp, and then add a header with address
 3141                          * information */
 3142                         set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe,
 3143                                                 ud_wr(wr),
 3144                                                 qp->mlx4_ib_qp_type);
 3145                         wqe  += sizeof (struct mlx4_wqe_datagram_seg);
 3146                         size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
 3147                         build_tunnel_header(ud_wr(wr), wqe, &seglen);
 3148                         wqe  += seglen;
 3149                         size += seglen / 16;
 3150                         break;
 3151 
 3152                 case MLX4_IB_QPT_SMI:
 3153                 case MLX4_IB_QPT_GSI:
 3154                         err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl,
 3155                                         &seglen);
 3156                         if (unlikely(err)) {
 3157                                 *bad_wr = wr;
 3158                                 goto out;
 3159                         }
 3160                         wqe  += seglen;
 3161                         size += seglen / 16;
 3162                         break;
 3163 
 3164                 default:
 3165                         break;
 3166                 }
 3167 
 3168                 /*
 3169                  * Write data segments in reverse order, so as to
 3170                  * overwrite cacheline stamp last within each
 3171                  * cacheline.  This avoids issues with WQE
 3172                  * prefetching.
 3173                  */
 3174 
 3175                 dseg = wqe;
 3176                 dseg += wr->num_sge - 1;
 3177                 size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16);
 3178 
 3179                 /* Add one more inline data segment for ICRC for MLX sends */
 3180                 if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
 3181                              qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI ||
 3182                              qp->mlx4_ib_qp_type &
 3183                              (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) {
 3184                         set_mlx_icrc_seg(dseg + 1);
 3185                         size += sizeof (struct mlx4_wqe_data_seg) / 16;
 3186                 }
 3187 
 3188                 for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
 3189                         set_data_seg(dseg, wr->sg_list + i);
 3190 
 3191                 /*
 3192                  * Possibly overwrite stamping in cacheline with LSO
 3193                  * segment only after making sure all data segments
 3194                  * are written.
 3195                  */
 3196                 wmb();
 3197                 *lso_wqe = lso_hdr_sz;
 3198 
 3199                 ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
 3200                                              MLX4_WQE_CTRL_FENCE : 0) | size;
 3201 
 3202                 /*
 3203                  * Make sure descriptor is fully written before
 3204                  * setting ownership bit (because HW can start
 3205                  * executing as soon as we do).
 3206                  */
 3207                 wmb();
 3208 
 3209                 if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
 3210                         *bad_wr = wr;
 3211                         err = -EINVAL;
 3212                         goto out;
 3213                 }
 3214 
 3215                 ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
 3216                         (ind & qp->sq.wqe_cnt ? cpu_to_be32(1U << 31) : 0) | blh;
 3217 
 3218                 stamp = ind + qp->sq_spare_wqes;
 3219                 ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
 3220 
 3221                 /*
 3222                  * We can improve latency by not stamping the last
 3223                  * send queue WQE until after ringing the doorbell, so
 3224                  * only stamp here if there are still more WQEs to post.
 3225                  *
 3226                  * Same optimization applies to padding with NOP wqe
 3227                  * in case of WQE shrinking (used to prevent wrap-around
 3228                  * in the middle of WR).
 3229                  */
 3230                 if (wr->next) {
 3231                         stamp_send_wqe(qp, stamp, size * 16);
 3232                         ind = pad_wraparound(qp, ind);
 3233                 }
 3234         }
 3235 
 3236 out:
 3237         if (likely(nreq)) {
 3238                 qp->sq.head += nreq;
 3239 
 3240                 /*
 3241                  * Make sure that descriptors are written before
 3242                  * doorbell record.
 3243                  */
 3244                 wmb();
 3245 
 3246                 writel(qp->doorbell_qpn,
 3247                        to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
 3248 
 3249                 /*
 3250                  * Make sure doorbells don't leak out of SQ spinlock
 3251                  * and reach the HCA out of order.
 3252                  */
 3253                 mmiowb();
 3254 
 3255                 stamp_send_wqe(qp, stamp, size * 16);
 3256 
 3257                 ind = pad_wraparound(qp, ind);
 3258                 qp->sq_next_wqe = ind;
 3259         }
 3260 
 3261         spin_unlock_irqrestore(&qp->sq.lock, flags);
 3262 
 3263         return err;
 3264 }
 3265 
 3266 int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 3267                       const struct ib_recv_wr **bad_wr)
 3268 {
 3269         struct mlx4_ib_qp *qp = to_mqp(ibqp);
 3270         struct mlx4_wqe_data_seg *scat;
 3271         unsigned long flags;
 3272         int err = 0;
 3273         int nreq;
 3274         int ind;
 3275         int max_gs;
 3276         int i;
 3277         struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
 3278 
 3279         max_gs = qp->rq.max_gs;
 3280         spin_lock_irqsave(&qp->rq.lock, flags);
 3281 
 3282         if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
 3283                 err = -EIO;
 3284                 *bad_wr = wr;
 3285                 nreq = 0;
 3286                 goto out;
 3287         }
 3288 
 3289         ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
 3290 
 3291         for (nreq = 0; wr; ++nreq, wr = wr->next) {
 3292                 if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
 3293                         err = -ENOMEM;
 3294                         *bad_wr = wr;
 3295                         goto out;
 3296                 }
 3297 
 3298                 if (unlikely(wr->num_sge > qp->rq.max_gs)) {
 3299                         err = -EINVAL;
 3300                         *bad_wr = wr;
 3301                         goto out;
 3302                 }
 3303 
 3304                 scat = get_recv_wqe(qp, ind);
 3305 
 3306                 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
 3307                     MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
 3308                         ib_dma_sync_single_for_device(ibqp->device,
 3309                                                       qp->sqp_proxy_rcv[ind].map,
 3310                                                       sizeof (struct mlx4_ib_proxy_sqp_hdr),
 3311                                                       DMA_FROM_DEVICE);
 3312                         scat->byte_count =
 3313                                 cpu_to_be32(sizeof (struct mlx4_ib_proxy_sqp_hdr));
 3314                         /* use dma lkey from upper layer entry */
 3315                         scat->lkey = cpu_to_be32(wr->sg_list->lkey);
 3316                         scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map);
 3317                         scat++;
 3318                         max_gs--;
 3319                 }
 3320 
 3321                 for (i = 0; i < wr->num_sge; ++i)
 3322                         __set_data_seg(scat + i, wr->sg_list + i);
 3323 
 3324                 if (i < max_gs) {
 3325                         scat[i].byte_count = 0;
 3326                         scat[i].lkey       = cpu_to_be32(MLX4_INVALID_LKEY);
 3327                         scat[i].addr       = 0;
 3328                 }
 3329 
 3330                 qp->rq.wrid[ind] = wr->wr_id;
 3331 
 3332                 ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
 3333         }
 3334 
 3335 out:
 3336         if (likely(nreq)) {
 3337                 qp->rq.head += nreq;
 3338 
 3339                 /*
 3340                  * Make sure that descriptors are written before
 3341                  * doorbell record.
 3342                  */
 3343                 wmb();
 3344 
 3345                 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
 3346         }
 3347 
 3348         spin_unlock_irqrestore(&qp->rq.lock, flags);
 3349 
 3350         return err;
 3351 }
 3352 
 3353 static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state)
 3354 {
 3355         switch (mlx4_state) {
 3356         case MLX4_QP_STATE_RST:      return IB_QPS_RESET;
 3357         case MLX4_QP_STATE_INIT:     return IB_QPS_INIT;
 3358         case MLX4_QP_STATE_RTR:      return IB_QPS_RTR;
 3359         case MLX4_QP_STATE_RTS:      return IB_QPS_RTS;
 3360         case MLX4_QP_STATE_SQ_DRAINING:
 3361         case MLX4_QP_STATE_SQD:      return IB_QPS_SQD;
 3362         case MLX4_QP_STATE_SQER:     return IB_QPS_SQE;
 3363         case MLX4_QP_STATE_ERR:      return IB_QPS_ERR;
 3364         default:                     return -1;
 3365         }
 3366 }
 3367 
 3368 static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state)
 3369 {
 3370         switch (mlx4_mig_state) {
 3371         case MLX4_QP_PM_ARMED:          return IB_MIG_ARMED;
 3372         case MLX4_QP_PM_REARM:          return IB_MIG_REARM;
 3373         case MLX4_QP_PM_MIGRATED:       return IB_MIG_MIGRATED;
 3374         default: return -1;
 3375         }
 3376 }
 3377 
 3378 static int to_ib_qp_access_flags(int mlx4_flags)
 3379 {
 3380         int ib_flags = 0;
 3381 
 3382         if (mlx4_flags & MLX4_QP_BIT_RRE)
 3383                 ib_flags |= IB_ACCESS_REMOTE_READ;
 3384         if (mlx4_flags & MLX4_QP_BIT_RWE)
 3385                 ib_flags |= IB_ACCESS_REMOTE_WRITE;
 3386         if (mlx4_flags & MLX4_QP_BIT_RAE)
 3387                 ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
 3388 
 3389         return ib_flags;
 3390 }
 3391 
 3392 static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
 3393                                 struct mlx4_qp_path *path)
 3394 {
 3395         struct mlx4_dev *dev = ibdev->dev;
 3396         int is_eth;
 3397 
 3398         memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
 3399         ib_ah_attr->port_num      = path->sched_queue & 0x40 ? 2 : 1;
 3400 
 3401         if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
 3402                 return;
 3403 
 3404         is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) ==
 3405                 IB_LINK_LAYER_ETHERNET;
 3406         if (is_eth)
 3407                 ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) |
 3408                 ((path->sched_queue & 4) << 1);
 3409         else
 3410                 ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf;
 3411 
 3412         ib_ah_attr->dlid          = be16_to_cpu(path->rlid);
 3413         ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;
 3414         ib_ah_attr->static_rate   = path->static_rate ? path->static_rate - 5 : 0;
 3415         ib_ah_attr->ah_flags      = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
 3416         if (ib_ah_attr->ah_flags) {
 3417                 ib_ah_attr->grh.sgid_index = path->mgid_index;
 3418                 ib_ah_attr->grh.hop_limit  = path->hop_limit;
 3419                 ib_ah_attr->grh.traffic_class =
 3420                         (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
 3421                 ib_ah_attr->grh.flow_label =
 3422                         be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
 3423                 memcpy(ib_ah_attr->grh.dgid.raw,
 3424                         path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
 3425         }
 3426 }
 3427 
 3428 int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
 3429                      struct ib_qp_init_attr *qp_init_attr)
 3430 {
 3431         struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
 3432         struct mlx4_ib_qp *qp = to_mqp(ibqp);
 3433         struct mlx4_qp_context context;
 3434         int mlx4_state;
 3435         int err = 0;
 3436 
 3437         mutex_lock(&qp->mutex);
 3438 
 3439         if (qp->state == IB_QPS_RESET) {
 3440                 qp_attr->qp_state = IB_QPS_RESET;
 3441                 goto done;
 3442         }
 3443 
 3444         err = mlx4_qp_query(dev->dev, &qp->mqp, &context);
 3445         if (err) {
 3446                 err = -EINVAL;
 3447                 goto out;
 3448         }
 3449 
 3450         mlx4_state = be32_to_cpu(context.flags) >> 28;
 3451 
 3452         qp->state                    = to_ib_qp_state(mlx4_state);
 3453         qp_attr->qp_state            = qp->state;
 3454         qp_attr->path_mtu            = context.mtu_msgmax >> 5;
 3455         qp_attr->path_mig_state      =
 3456                 to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);
 3457         qp_attr->qkey                = be32_to_cpu(context.qkey);
 3458         qp_attr->rq_psn              = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff;
 3459         qp_attr->sq_psn              = be32_to_cpu(context.next_send_psn) & 0xffffff;
 3460         qp_attr->dest_qp_num         = be32_to_cpu(context.remote_qpn) & 0xffffff;
 3461         qp_attr->qp_access_flags     =
 3462                 to_ib_qp_access_flags(be32_to_cpu(context.params2));
 3463 
 3464         if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
 3465                 to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
 3466                 to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path);
 3467                 qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
 3468                 qp_attr->alt_port_num   = qp_attr->alt_ah_attr.port_num;
 3469         }
 3470 
 3471         qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
 3472         if (qp_attr->qp_state == IB_QPS_INIT)
 3473                 qp_attr->port_num = qp->port;
 3474         else
 3475                 qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;
 3476 
 3477         /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
 3478         qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING;
 3479 
 3480         qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7);
 3481 
 3482         qp_attr->max_dest_rd_atomic =
 3483                 1 << ((be32_to_cpu(context.params2) >> 21) & 0x7);
 3484         qp_attr->min_rnr_timer      =
 3485                 (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f;
 3486         qp_attr->timeout            = context.pri_path.ackto >> 3;
 3487         qp_attr->retry_cnt          = (be32_to_cpu(context.params1) >> 16) & 0x7;
 3488         qp_attr->rnr_retry          = (be32_to_cpu(context.params1) >> 13) & 0x7;
 3489         qp_attr->alt_timeout        = context.alt_path.ackto >> 3;
 3490 
 3491 done:
 3492         qp_attr->cur_qp_state        = qp_attr->qp_state;
 3493         qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;
 3494         qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
 3495 
 3496         if (!ibqp->uobject) {
 3497                 qp_attr->cap.max_send_wr  = qp->sq.wqe_cnt;
 3498                 qp_attr->cap.max_send_sge = qp->sq.max_gs;
 3499         } else {
 3500                 qp_attr->cap.max_send_wr  = 0;
 3501                 qp_attr->cap.max_send_sge = 0;
 3502         }
 3503 
 3504         /*
 3505          * We don't support inline sends for kernel QPs (yet), and we
 3506          * don't know what userspace's value should be.
 3507          */
 3508         qp_attr->cap.max_inline_data = 0;
 3509 
 3510         qp_init_attr->cap            = qp_attr->cap;
 3511 
 3512         qp_init_attr->create_flags = 0;
 3513         if (qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)
 3514                 qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
 3515 
 3516         if (qp->flags & MLX4_IB_QP_LSO)
 3517                 qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
 3518 
 3519         if (qp->flags & MLX4_IB_QP_NETIF)
 3520                 qp_init_attr->create_flags |= IB_QP_CREATE_NETIF_QP;
 3521 
 3522         qp_init_attr->sq_sig_type =
 3523                 qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
 3524                 IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
 3525 
 3526 out:
 3527         mutex_unlock(&qp->mutex);
 3528         return err;
 3529 }
 3530 

Cache object: edf277572a4121287936e911d28abb6e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.