The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/mlx5/mlx5_ib/mlx5_ib_cq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2013-2020, Mellanox Technologies.  All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without
    5  * modification, are permitted provided that the following conditions
    6  * are met:
    7  * 1. Redistributions of source code must retain the above copyright
    8  *    notice, this list of conditions and the following disclaimer.
    9  * 2. Redistributions in binary form must reproduce the above copyright
   10  *    notice, this list of conditions and the following disclaimer in the
   11  *    documentation and/or other materials provided with the distribution.
   12  *
   13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
   14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
   17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   23  * SUCH DAMAGE.
   24  *
   25  * $FreeBSD$
   26  */
   27 
   28 #include "opt_rss.h"
   29 #include "opt_ratelimit.h"
   30 
   31 #include <linux/kref.h>
   32 #include <rdma/ib_umem.h>
   33 #include <rdma/ib_user_verbs.h>
   34 #include <rdma/ib_cache.h>
   35 #include <rdma/uverbs_ioctl.h>
   36 #include <dev/mlx5/mlx5_ib/mlx5_ib.h>
   37 
   38 static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe __unused)
   39 {
   40         struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
   41 
   42         ibcq->comp_handler(ibcq, ibcq->cq_context);
   43 }
   44 
   45 static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, int type)
   46 {
   47         struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq);
   48         struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
   49         struct ib_cq *ibcq = &cq->ibcq;
   50         struct ib_event event;
   51 
   52         if (type != MLX5_EVENT_TYPE_CQ_ERROR) {
   53                 mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n",
   54                              type, mcq->cqn);
   55                 return;
   56         }
   57 
   58         if (ibcq->event_handler) {
   59                 event.device     = &dev->ib_dev;
   60                 event.event      = IB_EVENT_CQ_ERR;
   61                 event.element.cq = ibcq;
   62                 ibcq->event_handler(&event, ibcq->cq_context);
   63         }
   64 }
   65 
   66 static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size)
   67 {
   68         return mlx5_buf_offset(&buf->buf, n * size);
   69 }
   70 
   71 static void *get_cqe(struct mlx5_ib_cq *cq, int n)
   72 {
   73         return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
   74 }
   75 
   76 static u8 sw_ownership_bit(int n, int nent)
   77 {
   78         return (n & nent) ? 1 : 0;
   79 }
   80 
   81 static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
   82 {
   83         void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
   84         struct mlx5_cqe64 *cqe64;
   85 
   86         cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
   87 
   88         if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) &&
   89             !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
   90                 return cqe;
   91         } else {
   92                 return NULL;
   93         }
   94 }
   95 
   96 static void *next_cqe_sw(struct mlx5_ib_cq *cq)
   97 {
   98         return get_sw_cqe(cq, cq->mcq.cons_index);
   99 }
  100 
  101 static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx)
  102 {
  103         switch (wq->wr_data[idx]) {
  104         case MLX5_IB_WR_UMR:
  105                 return 0;
  106 
  107         case IB_WR_LOCAL_INV:
  108                 return IB_WC_LOCAL_INV;
  109 
  110         case IB_WR_REG_MR:
  111                 return IB_WC_REG_MR;
  112 
  113         default:
  114                 pr_warn("unknown completion status\n");
  115                 return 0;
  116         }
  117 }
  118 
  119 static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
  120                             struct mlx5_ib_wq *wq, int idx)
  121 {
  122         wc->wc_flags = 0;
  123         switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) {
  124         case MLX5_OPCODE_RDMA_WRITE_IMM:
  125                 wc->wc_flags |= IB_WC_WITH_IMM;
  126         case MLX5_OPCODE_RDMA_WRITE:
  127                 wc->opcode    = IB_WC_RDMA_WRITE;
  128                 break;
  129         case MLX5_OPCODE_SEND_IMM:
  130                 wc->wc_flags |= IB_WC_WITH_IMM;
  131         case MLX5_OPCODE_SEND:
  132         case MLX5_OPCODE_SEND_INVAL:
  133                 wc->opcode    = IB_WC_SEND;
  134                 break;
  135         case MLX5_OPCODE_RDMA_READ:
  136                 wc->opcode    = IB_WC_RDMA_READ;
  137                 wc->byte_len  = be32_to_cpu(cqe->byte_cnt);
  138                 break;
  139         case MLX5_OPCODE_ATOMIC_CS:
  140                 wc->opcode    = IB_WC_COMP_SWAP;
  141                 wc->byte_len  = 8;
  142                 break;
  143         case MLX5_OPCODE_ATOMIC_FA:
  144                 wc->opcode    = IB_WC_FETCH_ADD;
  145                 wc->byte_len  = 8;
  146                 break;
  147         case MLX5_OPCODE_ATOMIC_MASKED_CS:
  148                 wc->opcode    = IB_WC_MASKED_COMP_SWAP;
  149                 wc->byte_len  = 8;
  150                 break;
  151         case MLX5_OPCODE_ATOMIC_MASKED_FA:
  152                 wc->opcode    = IB_WC_MASKED_FETCH_ADD;
  153                 wc->byte_len  = 8;
  154                 break;
  155         case MLX5_OPCODE_UMR:
  156                 wc->opcode = get_umr_comp(wq, idx);
  157                 break;
  158         }
  159 }
  160 
  161 enum {
  162         MLX5_GRH_IN_BUFFER = 1,
  163         MLX5_GRH_IN_CQE    = 2,
  164 };
  165 
  166 static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
  167                              struct mlx5_ib_qp *qp)
  168 {
  169         enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
  170         struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
  171         struct mlx5_ib_srq *srq;
  172         struct mlx5_ib_wq *wq;
  173         u16 wqe_ctr;
  174         u8  roce_packet_type;
  175         bool vlan_present;
  176         u8 g;
  177 
  178         if (qp->ibqp.srq || qp->ibqp.xrcd) {
  179                 struct mlx5_core_srq *msrq = NULL;
  180 
  181                 if (qp->ibqp.xrcd) {
  182                         msrq = mlx5_core_get_srq(dev->mdev,
  183                                                  be32_to_cpu(cqe->srqn));
  184                         srq = to_mibsrq(msrq);
  185                 } else {
  186                         srq = to_msrq(qp->ibqp.srq);
  187                 }
  188                 if (srq) {
  189                         wqe_ctr = be16_to_cpu(cqe->wqe_counter);
  190                         wc->wr_id = srq->wrid[wqe_ctr];
  191                         mlx5_ib_free_srq_wqe(srq, wqe_ctr);
  192                         if (msrq && atomic_dec_and_test(&msrq->refcount))
  193                                 complete(&msrq->free);
  194                 }
  195         } else {
  196                 wq        = &qp->rq;
  197                 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
  198                 ++wq->tail;
  199         }
  200         wc->byte_len = be32_to_cpu(cqe->byte_cnt);
  201 
  202         switch (cqe->op_own >> 4) {
  203         case MLX5_CQE_RESP_WR_IMM:
  204                 wc->opcode      = IB_WC_RECV_RDMA_WITH_IMM;
  205                 wc->wc_flags    = IB_WC_WITH_IMM;
  206                 wc->ex.imm_data = cqe->imm_inval_pkey;
  207                 break;
  208         case MLX5_CQE_RESP_SEND:
  209                 wc->opcode   = IB_WC_RECV;
  210                 wc->wc_flags = IB_WC_IP_CSUM_OK;
  211                 if (unlikely(!((cqe->hds_ip_ext & CQE_L3_OK) &&
  212                                (cqe->hds_ip_ext & CQE_L4_OK))))
  213                         wc->wc_flags = 0;
  214                 break;
  215         case MLX5_CQE_RESP_SEND_IMM:
  216                 wc->opcode      = IB_WC_RECV;
  217                 wc->wc_flags    = IB_WC_WITH_IMM;
  218                 wc->ex.imm_data = cqe->imm_inval_pkey;
  219                 break;
  220         case MLX5_CQE_RESP_SEND_INV:
  221                 wc->opcode      = IB_WC_RECV;
  222                 wc->wc_flags    = IB_WC_WITH_INVALIDATE;
  223                 wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey);
  224                 break;
  225         }
  226         wc->src_qp         = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
  227         wc->dlid_path_bits = cqe->ml_path;
  228         g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
  229         wc->wc_flags |= g ? IB_WC_GRH : 0;
  230         if (unlikely(is_qp1(qp->ibqp.qp_type))) {
  231                 u16 pkey = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff;
  232 
  233                 ib_find_cached_pkey(&dev->ib_dev, qp->port, pkey,
  234                                     &wc->pkey_index);
  235         } else {
  236                 wc->pkey_index = 0;
  237         }
  238 
  239         if (ll != IB_LINK_LAYER_ETHERNET) {
  240                 wc->slid = be16_to_cpu(cqe->slid);
  241                 wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
  242                 return;
  243         }
  244 
  245         wc->slid = 0;
  246         vlan_present = cqe_has_vlan(cqe);
  247         roce_packet_type   = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3;
  248         if (vlan_present) {
  249                 wc->vlan_id = (be16_to_cpu(cqe->vlan_info)) & 0xfff;
  250                 wc->sl = (be16_to_cpu(cqe->vlan_info) >> 13) & 0x7;
  251                 wc->wc_flags |= IB_WC_WITH_VLAN;
  252         } else {
  253                 wc->sl = 0;
  254         }
  255 
  256         switch (roce_packet_type) {
  257         case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
  258                 wc->network_hdr_type = RDMA_NETWORK_IB;
  259                 break;
  260         case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
  261                 wc->network_hdr_type = RDMA_NETWORK_IPV6;
  262                 break;
  263         case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4:
  264                 wc->network_hdr_type = RDMA_NETWORK_IPV4;
  265                 break;
  266         }
  267         wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
  268 }
  269 
  270 static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
  271 {
  272         __be32 *p = (__be32 *)cqe;
  273         int i;
  274 
  275         mlx5_ib_warn(dev, "dump error cqe\n");
  276         for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4)
  277                 pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]),
  278                         be32_to_cpu(p[1]), be32_to_cpu(p[2]),
  279                         be32_to_cpu(p[3]));
  280 }
  281 
  282 static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
  283                                   struct mlx5_err_cqe *cqe,
  284                                   struct ib_wc *wc)
  285 {
  286         int dump = 1;
  287 
  288         switch (cqe->syndrome) {
  289         case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
  290                 wc->status = IB_WC_LOC_LEN_ERR;
  291                 break;
  292         case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR:
  293                 wc->status = IB_WC_LOC_QP_OP_ERR;
  294                 break;
  295         case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
  296                 wc->status = IB_WC_LOC_PROT_ERR;
  297                 break;
  298         case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
  299                 dump = 0;
  300                 wc->status = IB_WC_WR_FLUSH_ERR;
  301                 break;
  302         case MLX5_CQE_SYNDROME_MW_BIND_ERR:
  303                 wc->status = IB_WC_MW_BIND_ERR;
  304                 break;
  305         case MLX5_CQE_SYNDROME_BAD_RESP_ERR:
  306                 wc->status = IB_WC_BAD_RESP_ERR;
  307                 break;
  308         case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR:
  309                 wc->status = IB_WC_LOC_ACCESS_ERR;
  310                 break;
  311         case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
  312                 wc->status = IB_WC_REM_INV_REQ_ERR;
  313                 break;
  314         case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
  315                 wc->status = IB_WC_REM_ACCESS_ERR;
  316                 break;
  317         case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
  318                 wc->status = IB_WC_REM_OP_ERR;
  319                 break;
  320         case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
  321                 wc->status = IB_WC_RETRY_EXC_ERR;
  322                 dump = 0;
  323                 break;
  324         case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
  325                 wc->status = IB_WC_RNR_RETRY_EXC_ERR;
  326                 dump = 0;
  327                 break;
  328         case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
  329                 wc->status = IB_WC_REM_ABORT_ERR;
  330                 break;
  331         default:
  332                 wc->status = IB_WC_GENERAL_ERR;
  333                 break;
  334         }
  335 
  336         wc->vendor_err = cqe->vendor_err_synd;
  337         if (dump)
  338                 dump_cqe(dev, cqe);
  339 }
  340 
  341 static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx)
  342 {
  343         /* TBD: waiting decision
  344         */
  345         return 0;
  346 }
  347 
  348 static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx)
  349 {
  350         struct mlx5_wqe_data_seg *dpseg;
  351         void *addr;
  352 
  353         dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) +
  354                 sizeof(struct mlx5_wqe_raddr_seg) +
  355                 sizeof(struct mlx5_wqe_atomic_seg);
  356         addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr);
  357         return addr;
  358 }
  359 
  360 static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
  361                           uint16_t idx)
  362 {
  363         void *addr;
  364         int byte_count;
  365         int i;
  366 
  367         if (!is_atomic_response(qp, idx))
  368                 return;
  369 
  370         byte_count = be32_to_cpu(cqe64->byte_cnt);
  371         addr = mlx5_get_atomic_laddr(qp, idx);
  372 
  373         if (byte_count == 4) {
  374                 *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr));
  375         } else {
  376                 for (i = 0; i < byte_count; i += 8) {
  377                         *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr));
  378                         addr += 8;
  379                 }
  380         }
  381 
  382         return;
  383 }
  384 
  385 static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
  386                            u16 tail, u16 head)
  387 {
  388         u16 idx;
  389 
  390         do {
  391                 idx = tail & (qp->sq.wqe_cnt - 1);
  392                 handle_atomic(qp, cqe64, idx);
  393                 if (idx == head)
  394                         break;
  395 
  396                 tail = qp->sq.w_list[idx].next;
  397         } while (1);
  398         tail = qp->sq.w_list[idx].next;
  399         qp->sq.last_poll = tail;
  400 }
  401 
  402 static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
  403 {
  404         mlx5_buf_free(dev->mdev, &buf->buf);
  405 }
  406 
  407 static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
  408                              struct ib_sig_err *item)
  409 {
  410         u16 syndrome = be16_to_cpu(cqe->syndrome);
  411 
  412 #define GUARD_ERR   (1 << 13)
  413 #define APPTAG_ERR  (1 << 12)
  414 #define REFTAG_ERR  (1 << 11)
  415 
  416         if (syndrome & GUARD_ERR) {
  417                 item->err_type = IB_SIG_BAD_GUARD;
  418                 item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16;
  419                 item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16;
  420         } else
  421         if (syndrome & REFTAG_ERR) {
  422                 item->err_type = IB_SIG_BAD_REFTAG;
  423                 item->expected = be32_to_cpu(cqe->expected_reftag);
  424                 item->actual = be32_to_cpu(cqe->actual_reftag);
  425         } else
  426         if (syndrome & APPTAG_ERR) {
  427                 item->err_type = IB_SIG_BAD_APPTAG;
  428                 item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff;
  429                 item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff;
  430         } else {
  431                 pr_err("Got signature completion error with bad syndrome %04x\n",
  432                        syndrome);
  433         }
  434 
  435         item->sig_err_offset = be64_to_cpu(cqe->err_offset);
  436         item->key = be32_to_cpu(cqe->mkey);
  437 }
  438 
  439 static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries,
  440                          struct ib_wc *wc, int *npolled)
  441 {
  442         struct mlx5_ib_wq *wq;
  443         unsigned int cur;
  444         unsigned int idx;
  445         int np;
  446         int i;
  447 
  448         wq = &qp->sq;
  449         cur = wq->head - wq->tail;
  450         np = *npolled;
  451 
  452         if (cur == 0)
  453                 return;
  454 
  455         for (i = 0;  i < cur && np < num_entries; i++) {
  456                 idx = wq->last_poll & (wq->wqe_cnt - 1);
  457                 wc->wr_id = wq->wrid[idx];
  458                 wc->status = IB_WC_WR_FLUSH_ERR;
  459                 wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
  460                 wq->tail++;
  461                 np++;
  462                 wc->qp = &qp->ibqp;
  463                 wc++;
  464                 wq->last_poll = wq->w_list[idx].next;
  465         }
  466         *npolled = np;
  467 }
  468 
  469 static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries,
  470                          struct ib_wc *wc, int *npolled)
  471 {
  472         struct mlx5_ib_wq *wq;
  473         unsigned int cur;
  474         int np;
  475         int i;
  476 
  477         wq = &qp->rq;
  478         cur = wq->head - wq->tail;
  479         np = *npolled;
  480 
  481         if (cur == 0)
  482                 return;
  483 
  484         for (i = 0;  i < cur && np < num_entries; i++) {
  485                 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
  486                 wc->status = IB_WC_WR_FLUSH_ERR;
  487                 wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
  488                 wq->tail++;
  489                 np++;
  490                 wc->qp = &qp->ibqp;
  491                 wc++;
  492         }
  493         *npolled = np;
  494 }
  495 
  496 static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries,
  497                                  struct ib_wc *wc, int *npolled)
  498 {
  499         struct mlx5_ib_qp *qp;
  500 
  501         *npolled = 0;
  502         /* Find uncompleted WQEs belonging to that cq and retrun mmics ones */
  503         list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
  504                 sw_send_comp(qp, num_entries, wc + *npolled, npolled);
  505                 if (*npolled >= num_entries)
  506                         return;
  507         }
  508 
  509         list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
  510                 sw_recv_comp(qp, num_entries, wc + *npolled, npolled);
  511                 if (*npolled >= num_entries)
  512                         return;
  513         }
  514 }
  515 
  516 static int mlx5_poll_one(struct mlx5_ib_cq *cq,
  517                          struct mlx5_ib_qp **cur_qp,
  518                          struct ib_wc *wc)
  519 {
  520         struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
  521         struct mlx5_err_cqe *err_cqe;
  522         struct mlx5_cqe64 *cqe64;
  523         struct mlx5_core_qp *mqp;
  524         struct mlx5_ib_wq *wq;
  525         struct mlx5_sig_err_cqe *sig_err_cqe;
  526         struct mlx5_core_mkey *mmkey;
  527         struct mlx5_ib_mr *mr;
  528         unsigned long flags;
  529         uint8_t opcode;
  530         uint32_t qpn;
  531         u16 wqe_ctr;
  532         void *cqe;
  533         int idx;
  534 
  535 repoll:
  536         cqe = next_cqe_sw(cq);
  537         if (!cqe)
  538                 return -EAGAIN;
  539 
  540         cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
  541 
  542         ++cq->mcq.cons_index;
  543 
  544         /* Make sure we read CQ entry contents after we've checked the
  545          * ownership bit.
  546          */
  547         rmb();
  548 
  549         opcode = cqe64->op_own >> 4;
  550         if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
  551                 if (likely(cq->resize_buf)) {
  552                         free_cq_buf(dev, &cq->buf);
  553                         cq->buf = *cq->resize_buf;
  554                         kfree(cq->resize_buf);
  555                         cq->resize_buf = NULL;
  556                         goto repoll;
  557                 } else {
  558                         mlx5_ib_warn(dev, "unexpected resize cqe\n");
  559                 }
  560         }
  561 
  562         qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
  563         if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
  564                 /* We do not have to take the QP table lock here,
  565                  * because CQs will be locked while QPs are removed
  566                  * from the table.
  567                  */
  568                 mqp = __mlx5_qp_lookup(dev->mdev, qpn);
  569                 *cur_qp = to_mibqp(mqp);
  570         }
  571 
  572         wc->qp  = &(*cur_qp)->ibqp;
  573         switch (opcode) {
  574         case MLX5_CQE_REQ:
  575                 wq = &(*cur_qp)->sq;
  576                 wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
  577                 idx = wqe_ctr & (wq->wqe_cnt - 1);
  578                 handle_good_req(wc, cqe64, wq, idx);
  579                 handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
  580                 wc->wr_id = wq->wrid[idx];
  581                 wq->tail = wq->wqe_head[idx] + 1;
  582                 wc->status = IB_WC_SUCCESS;
  583                 break;
  584         case MLX5_CQE_RESP_WR_IMM:
  585         case MLX5_CQE_RESP_SEND:
  586         case MLX5_CQE_RESP_SEND_IMM:
  587         case MLX5_CQE_RESP_SEND_INV:
  588                 handle_responder(wc, cqe64, *cur_qp);
  589                 wc->status = IB_WC_SUCCESS;
  590                 break;
  591         case MLX5_CQE_RESIZE_CQ:
  592                 break;
  593         case MLX5_CQE_REQ_ERR:
  594         case MLX5_CQE_RESP_ERR:
  595                 err_cqe = (struct mlx5_err_cqe *)cqe64;
  596                 mlx5_handle_error_cqe(dev, err_cqe, wc);
  597                 mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n",
  598                             opcode == MLX5_CQE_REQ_ERR ?
  599                             "Requestor" : "Responder", cq->mcq.cqn);
  600                 mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
  601                             err_cqe->syndrome, err_cqe->vendor_err_synd);
  602                 if (opcode == MLX5_CQE_REQ_ERR) {
  603                         wq = &(*cur_qp)->sq;
  604                         wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
  605                         idx = wqe_ctr & (wq->wqe_cnt - 1);
  606                         wc->wr_id = wq->wrid[idx];
  607                         wq->tail = wq->wqe_head[idx] + 1;
  608                 } else {
  609                         struct mlx5_ib_srq *srq;
  610 
  611                         if ((*cur_qp)->ibqp.srq) {
  612                                 srq = to_msrq((*cur_qp)->ibqp.srq);
  613                                 wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
  614                                 wc->wr_id = srq->wrid[wqe_ctr];
  615                                 mlx5_ib_free_srq_wqe(srq, wqe_ctr);
  616                         } else {
  617                                 wq = &(*cur_qp)->rq;
  618                                 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
  619                                 ++wq->tail;
  620                         }
  621                 }
  622                 break;
  623         case MLX5_CQE_SIG_ERR:
  624                 sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
  625 
  626                 spin_lock_irqsave(&dev->mdev->priv.mr_table.lock, flags);
  627                 mmkey = __mlx5_mr_lookup(dev->mdev,
  628                                          mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
  629                 mr = to_mibmr(mmkey);
  630                 get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
  631                 mr->sig->sig_err_exists = true;
  632                 mr->sig->sigerr_count++;
  633 
  634                 mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
  635                              cq->mcq.cqn, mr->sig->err_item.key,
  636                              mr->sig->err_item.err_type,
  637                              (long long)mr->sig->err_item.sig_err_offset,
  638                              mr->sig->err_item.expected,
  639                              mr->sig->err_item.actual);
  640 
  641                 spin_unlock_irqrestore(&dev->mdev->priv.mr_table.lock, flags);
  642                 goto repoll;
  643         }
  644 
  645         return 0;
  646 }
  647 
  648 static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries,
  649                         struct ib_wc *wc)
  650 {
  651         struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
  652         struct mlx5_ib_wc *soft_wc, *next;
  653         int npolled = 0;
  654 
  655         list_for_each_entry_safe(soft_wc, next, &cq->wc_list, list) {
  656                 if (npolled >= num_entries)
  657                         break;
  658 
  659                 mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n",
  660                             cq->mcq.cqn);
  661 
  662                 wc[npolled++] = soft_wc->wc;
  663                 list_del(&soft_wc->list);
  664                 kfree(soft_wc);
  665         }
  666 
  667         return npolled;
  668 }
  669 
  670 int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
  671 {
  672         struct mlx5_ib_cq *cq = to_mcq(ibcq);
  673         struct mlx5_ib_qp *cur_qp = NULL;
  674         struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
  675         struct mlx5_core_dev *mdev = dev->mdev;
  676         unsigned long flags;
  677         int soft_polled = 0;
  678         int npolled;
  679 
  680         spin_lock_irqsave(&cq->lock, flags);
  681         if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
  682                 mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled);
  683                 goto out;
  684         }
  685 
  686         if (unlikely(!list_empty(&cq->wc_list)))
  687                 soft_polled = poll_soft_wc(cq, num_entries, wc);
  688 
  689         for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
  690                 if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled))
  691                         break;
  692         }
  693 
  694         if (npolled)
  695                 mlx5_cq_set_ci(&cq->mcq);
  696 out:
  697         spin_unlock_irqrestore(&cq->lock, flags);
  698 
  699         return soft_polled + npolled;
  700 }
  701 
  702 int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
  703 {
  704         struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev;
  705         struct mlx5_ib_cq *cq = to_mcq(ibcq);
  706         void __iomem *uar_page = mdev->priv.uar->map;
  707         unsigned long irq_flags;
  708         int ret = 0;
  709 
  710         if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
  711                 return -1;
  712 
  713         spin_lock_irqsave(&cq->lock, irq_flags);
  714         if (cq->notify_flags != IB_CQ_NEXT_COMP)
  715                 cq->notify_flags = flags & IB_CQ_SOLICITED_MASK;
  716 
  717         if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !list_empty(&cq->wc_list))
  718                 ret = 1;
  719         spin_unlock_irqrestore(&cq->lock, irq_flags);
  720 
  721         mlx5_cq_arm(&cq->mcq,
  722                     (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
  723                     MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
  724                     uar_page,
  725                     MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock),
  726                     cq->mcq.cons_index);
  727 
  728         return ret;
  729 }
  730 
  731 static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
  732                         int nent, int cqe_size)
  733 {
  734         int err;
  735 
  736         err = mlx5_buf_alloc(dev->mdev, nent * cqe_size,
  737             2 * PAGE_SIZE, &buf->buf);
  738         if (err)
  739                 return err;
  740 
  741         buf->cqe_size = cqe_size;
  742         buf->nent = nent;
  743 
  744         return 0;
  745 }
  746 
  747 static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
  748                           struct mlx5_ib_cq *cq, int entries, u32 **cqb,
  749                           int *cqe_size, int *index, int *inlen)
  750 {
  751         struct mlx5_ib_create_cq ucmd = {};
  752         size_t ucmdlen;
  753         int page_shift;
  754         __be64 *pas;
  755         int npages;
  756         int ncont;
  757         void *cqc;
  758         int err;
  759         struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
  760                 udata, struct mlx5_ib_ucontext, ibucontext);
  761 
  762         ucmdlen = min(udata->inlen, sizeof(ucmd));
  763         if (ucmdlen < offsetof(struct mlx5_ib_create_cq, flags))
  764                 return -EINVAL;
  765 
  766         if (ib_copy_from_udata(&ucmd, udata, ucmdlen))
  767                 return -EFAULT;
  768 
  769         if ((ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX)))
  770                 return -EINVAL;
  771 
  772         if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
  773                 return -EINVAL;
  774 
  775         *cqe_size = ucmd.cqe_size;
  776 
  777         cq->buf.umem = ib_umem_get(&context->ibucontext, ucmd.buf_addr,
  778                                    entries * ucmd.cqe_size,
  779                                    IB_ACCESS_LOCAL_WRITE, 1);
  780         if (IS_ERR(cq->buf.umem)) {
  781                 err = PTR_ERR(cq->buf.umem);
  782                 return err;
  783         }
  784 
  785         err = mlx5_ib_db_map_user(context, ucmd.db_addr,
  786                                   &cq->db);
  787         if (err)
  788                 goto err_umem;
  789 
  790         mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift,
  791                            &ncont, NULL);
  792         mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
  793                     (long long)ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
  794 
  795         *inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
  796                  MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont;
  797         *cqb = mlx5_vzalloc(*inlen);
  798         if (!*cqb) {
  799                 err = -ENOMEM;
  800                 goto err_db;
  801         }
  802 
  803         pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
  804         mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0);
  805 
  806         cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
  807         MLX5_SET(cqc, cqc, log_page_size,
  808                  page_shift - MLX5_ADAPTER_PAGE_SHIFT);
  809 
  810         if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) {
  811                 *index = ucmd.uar_page_index;
  812         } else if (context->bfregi.lib_uar_dyn) {
  813                 err = -EINVAL;
  814                 goto err_cqb;
  815         } else {
  816                 *index = context->bfregi.sys_pages[0];
  817         }
  818 
  819         MLX5_SET(create_cq_in, *cqb, uid, context->devx_uid);
  820         return 0;
  821 
  822 err_cqb:
  823         kvfree(*cqb);
  824 
  825 err_db:
  826         mlx5_ib_db_unmap_user(context, &cq->db);
  827 
  828 err_umem:
  829         ib_umem_release(cq->buf.umem);
  830         return err;
  831 }
  832 
  833 static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_udata *udata)
  834 {
  835         struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
  836                 udata, struct mlx5_ib_ucontext, ibucontext);
  837 
  838         mlx5_ib_db_unmap_user(context, &cq->db);
  839         ib_umem_release(cq->buf.umem);
  840 }
  841 
  842 static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
  843 {
  844         int i;
  845         void *cqe;
  846         struct mlx5_cqe64 *cqe64;
  847 
  848         for (i = 0; i < buf->nent; i++) {
  849                 cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
  850                 cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
  851                 cqe64->op_own = MLX5_CQE_INVALID << 4;
  852         }
  853 }
  854 
  855 static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
  856                             int entries, int cqe_size,
  857                             u32 **cqb, int *index, int *inlen)
  858 {
  859         __be64 *pas;
  860         void *cqc;
  861         int err;
  862 
  863         err = mlx5_db_alloc(dev->mdev, &cq->db);
  864         if (err)
  865                 return err;
  866 
  867         cq->mcq.set_ci_db  = cq->db.db;
  868         cq->mcq.arm_db     = cq->db.db + 1;
  869         cq->mcq.cqe_sz = cqe_size;
  870 
  871         err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
  872         if (err)
  873                 goto err_db;
  874 
  875         init_cq_buf(cq, &cq->buf);
  876 
  877         *inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
  878                  MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
  879         *cqb = mlx5_vzalloc(*inlen);
  880         if (!*cqb) {
  881                 err = -ENOMEM;
  882                 goto err_buf;
  883         }
  884 
  885         pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
  886         mlx5_fill_page_array(&cq->buf.buf, pas);
  887 
  888         cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
  889         MLX5_SET(cqc, cqc, log_page_size,
  890                  cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
  891 
  892         *index = dev->mdev->priv.uar->index;
  893 
  894         return 0;
  895 
  896 err_buf:
  897         free_cq_buf(dev, &cq->buf);
  898 
  899 err_db:
  900         mlx5_db_free(dev->mdev, &cq->db);
  901         return err;
  902 }
  903 
  904 static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
  905 {
  906         free_cq_buf(dev, &cq->buf);
  907         mlx5_db_free(dev->mdev, &cq->db);
  908 }
  909 
  910 static void notify_soft_wc_handler(struct work_struct *work)
  911 {
  912         struct mlx5_ib_cq *cq = container_of(work, struct mlx5_ib_cq,
  913                                              notify_work);
  914 
  915         cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
  916 }
  917 
  918 int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
  919                       struct ib_udata *udata)
  920 {
  921         struct ib_device *ibdev = ibcq->device;
  922         int entries = attr->cqe;
  923         int vector = attr->comp_vector;
  924         struct mlx5_ib_dev *dev = to_mdev(ibdev);
  925         u32 out[MLX5_ST_SZ_DW(create_cq_out)];
  926         struct mlx5_ib_cq *cq = to_mcq(ibcq);
  927         int uninitialized_var(index);
  928         int uninitialized_var(inlen);
  929         u32 *cqb = NULL;
  930         void *cqc;
  931         int cqe_size;
  932         unsigned int irqn;
  933         int eqn;
  934         int err;
  935 
  936         if (entries < 0 ||
  937             (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))))
  938                 return -EINVAL;
  939 
  940         if (check_cq_create_flags(attr->flags))
  941                 return -EOPNOTSUPP;
  942 
  943         entries = roundup_pow_of_two(entries + 1);
  944         if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
  945                 return -EINVAL;
  946 
  947         cq->ibcq.cqe = entries - 1;
  948         mutex_init(&cq->resize_mutex);
  949         spin_lock_init(&cq->lock);
  950         cq->resize_buf = NULL;
  951         cq->resize_umem = NULL;
  952         cq->create_flags = attr->flags;
  953         INIT_LIST_HEAD(&cq->list_send_qp);
  954         INIT_LIST_HEAD(&cq->list_recv_qp);
  955 
  956         if (udata) {
  957                 err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size,
  958                                      &index, &inlen);
  959                 if (err)
  960                         return err;
  961         } else {
  962                 cqe_size = cache_line_size() == 128 ? 128 : 64;
  963                 err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
  964                                        &index, &inlen);
  965                 if (err)
  966                         return err;
  967 
  968                 INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
  969         }
  970 
  971         err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
  972         if (err)
  973                 goto err_cqb;
  974 
  975         cq->cqe_size = cqe_size;
  976 
  977         cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context);
  978         MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size));
  979         MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
  980         MLX5_SET(cqc, cqc, uar_page, index);
  981         MLX5_SET(cqc, cqc, c_eqn, eqn);
  982         MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
  983         if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
  984                 MLX5_SET(cqc, cqc, oi, 1);
  985 
  986         err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out));
  987         if (err)
  988                 goto err_cqb;
  989 
  990         mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
  991         cq->mcq.irqn = irqn;
  992         cq->mcq.comp  = mlx5_ib_cq_comp;
  993         cq->mcq.event = mlx5_ib_cq_event;
  994 
  995         INIT_LIST_HEAD(&cq->wc_list);
  996 
  997         if (udata)
  998                 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
  999                         err = -EFAULT;
 1000                         goto err_cmd;
 1001                 }
 1002 
 1003 
 1004         kvfree(cqb);
 1005         return 0;
 1006 
 1007 err_cmd:
 1008         mlx5_core_destroy_cq(dev->mdev, &cq->mcq);
 1009 
 1010 err_cqb:
 1011         kvfree(cqb);
 1012         if (udata)
 1013                 destroy_cq_user(cq, udata);
 1014         else
 1015                 destroy_cq_kernel(dev, cq);
 1016         return err;
 1017 }
 1018 
 1019 void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
 1020 {
 1021         struct mlx5_ib_dev *dev = to_mdev(cq->device);
 1022         struct mlx5_ib_cq *mcq = to_mcq(cq);
 1023 
 1024         mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
 1025         if (udata)
 1026                 destroy_cq_user(mcq, udata);
 1027         else
 1028                 destroy_cq_kernel(dev, mcq);
 1029 }
 1030 
 1031 static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
 1032 {
 1033         return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff);
 1034 }
 1035 
 1036 void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
 1037 {
 1038         struct mlx5_cqe64 *cqe64, *dest64;
 1039         void *cqe, *dest;
 1040         u32 prod_index;
 1041         int nfreed = 0;
 1042         u8 owner_bit;
 1043 
 1044         if (!cq)
 1045                 return;
 1046 
 1047         /* First we need to find the current producer index, so we
 1048          * know where to start cleaning from.  It doesn't matter if HW
 1049          * adds new entries after this loop -- the QP we're worried
 1050          * about is already in RESET, so the new entries won't come
 1051          * from our QP and therefore don't need to be checked.
 1052          */
 1053         for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); prod_index++)
 1054                 if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
 1055                         break;
 1056 
 1057         /* Now sweep backwards through the CQ, removing CQ entries
 1058          * that match our QP by copying older entries on top of them.
 1059          */
 1060         while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
 1061                 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
 1062                 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
 1063                 if (is_equal_rsn(cqe64, rsn)) {
 1064                         if (srq && (ntohl(cqe64->srqn) & 0xffffff))
 1065                                 mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
 1066                         ++nfreed;
 1067                 } else if (nfreed) {
 1068                         dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
 1069                         dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64;
 1070                         owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK;
 1071                         memcpy(dest, cqe, cq->mcq.cqe_sz);
 1072                         dest64->op_own = owner_bit |
 1073                                 (dest64->op_own & ~MLX5_CQE_OWNER_MASK);
 1074                 }
 1075         }
 1076 
 1077         if (nfreed) {
 1078                 cq->mcq.cons_index += nfreed;
 1079                 /* Make sure update of buffer contents is done before
 1080                  * updating consumer index.
 1081                  */
 1082                 wmb();
 1083                 mlx5_cq_set_ci(&cq->mcq);
 1084         }
 1085 }
 1086 
 1087 void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
 1088 {
 1089         if (!cq)
 1090                 return;
 1091 
 1092         spin_lock_irq(&cq->lock);
 1093         __mlx5_ib_cq_clean(cq, qpn, srq);
 1094         spin_unlock_irq(&cq->lock);
 1095 }
 1096 
 1097 int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
 1098 {
 1099         struct mlx5_ib_dev *dev = to_mdev(cq->device);
 1100         struct mlx5_ib_cq *mcq = to_mcq(cq);
 1101         int err;
 1102 
 1103         if (!MLX5_CAP_GEN(dev->mdev, cq_moderation))
 1104                 return -ENOSYS;
 1105 
 1106         err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq,
 1107                                              cq_period, cq_count);
 1108         if (err)
 1109                 mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn);
 1110 
 1111         return err;
 1112 }
 1113 
 1114 static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 1115                        int entries, struct ib_udata *udata, int *npas,
 1116                        int *page_shift, int *cqe_size)
 1117 {
 1118         struct mlx5_ib_resize_cq ucmd;
 1119         struct ib_umem *umem;
 1120         int err;
 1121         int npages;
 1122         struct ib_ucontext *context = cq->buf.umem->context;
 1123 
 1124         err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
 1125         if (err)
 1126                 return err;
 1127 
 1128         if (ucmd.reserved0 || ucmd.reserved1)
 1129                 return -EINVAL;
 1130 
 1131         /* check multiplication overflow */
 1132         if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1)
 1133                 return -EINVAL;
 1134 
 1135         umem = ib_umem_get(context, ucmd.buf_addr,
 1136                            (size_t)ucmd.cqe_size * entries,
 1137                            IB_ACCESS_LOCAL_WRITE, 1);
 1138         if (IS_ERR(umem)) {
 1139                 err = PTR_ERR(umem);
 1140                 return err;
 1141         }
 1142 
 1143         mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift,
 1144                            npas, NULL);
 1145 
 1146         cq->resize_umem = umem;
 1147         *cqe_size = ucmd.cqe_size;
 1148 
 1149         return 0;
 1150 }
 1151 
 1152 static void un_resize_user(struct mlx5_ib_cq *cq)
 1153 {
 1154         ib_umem_release(cq->resize_umem);
 1155 }
 1156 
 1157 static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 1158                          int entries, int cqe_size)
 1159 {
 1160         int err;
 1161 
 1162         cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL);
 1163         if (!cq->resize_buf)
 1164                 return -ENOMEM;
 1165 
 1166         err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
 1167         if (err)
 1168                 goto ex;
 1169 
 1170         init_cq_buf(cq, cq->resize_buf);
 1171 
 1172         return 0;
 1173 
 1174 ex:
 1175         kfree(cq->resize_buf);
 1176         return err;
 1177 }
 1178 
 1179 static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
 1180 {
 1181         free_cq_buf(dev, cq->resize_buf);
 1182         cq->resize_buf = NULL;
 1183 }
 1184 
 1185 static int copy_resize_cqes(struct mlx5_ib_cq *cq)
 1186 {
 1187         struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
 1188         struct mlx5_cqe64 *scqe64;
 1189         struct mlx5_cqe64 *dcqe64;
 1190         void *start_cqe;
 1191         void *scqe;
 1192         void *dcqe;
 1193         int ssize;
 1194         int dsize;
 1195         int i;
 1196         u8 sw_own;
 1197 
 1198         ssize = cq->buf.cqe_size;
 1199         dsize = cq->resize_buf->cqe_size;
 1200         if (ssize != dsize) {
 1201                 mlx5_ib_warn(dev, "resize from different cqe size is not supported\n");
 1202                 return -EINVAL;
 1203         }
 1204 
 1205         i = cq->mcq.cons_index;
 1206         scqe = get_sw_cqe(cq, i);
 1207         scqe64 = ssize == 64 ? scqe : scqe + 64;
 1208         start_cqe = scqe;
 1209         if (!scqe) {
 1210                 mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
 1211                 return -EINVAL;
 1212         }
 1213 
 1214         while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
 1215                 dcqe = get_cqe_from_buf(cq->resize_buf,
 1216                                         (i + 1) & (cq->resize_buf->nent),
 1217                                         dsize);
 1218                 dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
 1219                 sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
 1220                 memcpy(dcqe, scqe, dsize);
 1221                 dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own;
 1222 
 1223                 ++i;
 1224                 scqe = get_sw_cqe(cq, i);
 1225                 scqe64 = ssize == 64 ? scqe : scqe + 64;
 1226                 if (!scqe) {
 1227                         mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
 1228                         return -EINVAL;
 1229                 }
 1230 
 1231                 if (scqe == start_cqe) {
 1232                         pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
 1233                                 cq->mcq.cqn);
 1234                         return -ENOMEM;
 1235                 }
 1236         }
 1237         ++cq->mcq.cons_index;
 1238         return 0;
 1239 }
 1240 
 1241 int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 1242 {
 1243         struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
 1244         struct mlx5_ib_cq *cq = to_mcq(ibcq);
 1245         void *cqc;
 1246         u32 *in;
 1247         int err;
 1248         int npas;
 1249         __be64 *pas;
 1250         int page_shift;
 1251         int inlen;
 1252         int uninitialized_var(cqe_size);
 1253         unsigned long flags;
 1254 
 1255         if (!MLX5_CAP_GEN(dev->mdev, cq_resize)) {
 1256                 pr_info("Firmware does not support resize CQ\n");
 1257                 return -ENOSYS;
 1258         }
 1259 
 1260         if (entries < 1 ||
 1261             entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) {
 1262                 mlx5_ib_warn(dev, "wrong entries number %d, max %d\n",
 1263                              entries,
 1264                              1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz));
 1265                 return -EINVAL;
 1266         }
 1267 
 1268         entries = roundup_pow_of_two(entries + 1);
 1269         if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1)
 1270                 return -EINVAL;
 1271 
 1272         if (entries == ibcq->cqe + 1)
 1273                 return 0;
 1274 
 1275         mutex_lock(&cq->resize_mutex);
 1276         if (udata) {
 1277                 err = resize_user(dev, cq, entries, udata, &npas, &page_shift,
 1278                                   &cqe_size);
 1279         } else {
 1280                 cqe_size = 64;
 1281                 err = resize_kernel(dev, cq, entries, cqe_size);
 1282                 if (!err) {
 1283                         npas = cq->resize_buf->buf.npages;
 1284                         page_shift = cq->resize_buf->buf.page_shift;
 1285                 }
 1286         }
 1287 
 1288         if (err)
 1289                 goto ex;
 1290 
 1291         inlen = MLX5_ST_SZ_BYTES(modify_cq_in) +
 1292                 MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas;
 1293 
 1294         in = mlx5_vzalloc(inlen);
 1295         if (!in) {
 1296                 err = -ENOMEM;
 1297                 goto ex_resize;
 1298         }
 1299 
 1300         pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas);
 1301         if (udata)
 1302                 mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
 1303                                      pas, 0);
 1304         else
 1305                 mlx5_fill_page_array(&cq->resize_buf->buf, pas);
 1306 
 1307         MLX5_SET(modify_cq_in, in,
 1308                  modify_field_select_resize_field_select.resize_field_select.resize_field_select,
 1309                  MLX5_MODIFY_CQ_MASK_LOG_SIZE  |
 1310                  MLX5_MODIFY_CQ_MASK_PG_OFFSET |
 1311                  MLX5_MODIFY_CQ_MASK_PG_SIZE);
 1312 
 1313         cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context);
 1314 
 1315         MLX5_SET(cqc, cqc, log_page_size,
 1316                  page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 1317         MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size));
 1318         MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
 1319 
 1320         MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE);
 1321         MLX5_SET(modify_cq_in, in, cqn, cq->mcq.cqn);
 1322 
 1323         err = mlx5_core_modify_cq(dev->mdev, &cq->mcq, in, inlen);
 1324         if (err)
 1325                 goto ex_alloc;
 1326 
 1327         if (udata) {
 1328                 cq->ibcq.cqe = entries - 1;
 1329                 ib_umem_release(cq->buf.umem);
 1330                 cq->buf.umem = cq->resize_umem;
 1331                 cq->resize_umem = NULL;
 1332         } else {
 1333                 struct mlx5_ib_cq_buf tbuf;
 1334                 int resized = 0;
 1335 
 1336                 spin_lock_irqsave(&cq->lock, flags);
 1337                 if (cq->resize_buf) {
 1338                         err = copy_resize_cqes(cq);
 1339                         if (!err) {
 1340                                 tbuf = cq->buf;
 1341                                 cq->buf = *cq->resize_buf;
 1342                                 kfree(cq->resize_buf);
 1343                                 cq->resize_buf = NULL;
 1344                                 resized = 1;
 1345                         }
 1346                 }
 1347                 cq->ibcq.cqe = entries - 1;
 1348                 spin_unlock_irqrestore(&cq->lock, flags);
 1349                 if (resized)
 1350                         free_cq_buf(dev, &tbuf);
 1351         }
 1352         mutex_unlock(&cq->resize_mutex);
 1353 
 1354         kvfree(in);
 1355         return 0;
 1356 
 1357 ex_alloc:
 1358         kvfree(in);
 1359 
 1360 ex_resize:
 1361         if (udata)
 1362                 un_resize_user(cq);
 1363         else
 1364                 un_resize_kernel(dev, cq);
 1365 ex:
 1366         mutex_unlock(&cq->resize_mutex);
 1367         return err;
 1368 }
 1369 
 1370 int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
 1371 {
 1372         struct mlx5_ib_cq *cq;
 1373 
 1374         if (!ibcq)
 1375                 return 128;
 1376 
 1377         cq = to_mcq(ibcq);
 1378         return cq->cqe_size;
 1379 }
 1380 
 1381 /* Called from atomic context */
 1382 int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc)
 1383 {
 1384         struct mlx5_ib_wc *soft_wc;
 1385         struct mlx5_ib_cq *cq = to_mcq(ibcq);
 1386         unsigned long flags;
 1387 
 1388         soft_wc = kmalloc(sizeof(*soft_wc), GFP_ATOMIC);
 1389         if (!soft_wc)
 1390                 return -ENOMEM;
 1391 
 1392         soft_wc->wc = *wc;
 1393         spin_lock_irqsave(&cq->lock, flags);
 1394         list_add_tail(&soft_wc->list, &cq->wc_list);
 1395         if (cq->notify_flags == IB_CQ_NEXT_COMP ||
 1396             wc->status != IB_WC_SUCCESS) {
 1397                 cq->notify_flags = 0;
 1398                 schedule_work(&cq->notify_work);
 1399         }
 1400         spin_unlock_irqrestore(&cq->lock, flags);
 1401 
 1402         return 0;
 1403 }

Cache object: cdbbe7643e51fa818f34462d70a761b0


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.